From 97801f55823357330a1223491546a5c7e24cc5f1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 23 Jun 2026 07:49:58 -0500 Subject: [PATCH] feat(#1458): Renderable Codec Protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements T3.2 of the 2.3 release plan against the spec in datajoint-docs#188. A runtime-checkable Protocol that codecs opt into by implementing ``render_spark(self, decoded, *, key=None) -> Any``. Consumers (e.g., a Databricks silver-layer publish pipeline) detect support via ``isinstance(codec, Renderable)``. What's added: - src/datajoint/rendering.py (new, ~85 lines including docstrings): Single @runtime_checkable Protocol declaration. Module-level docstring explains the design rationale (Protocol vs. abstract method on Codec); class docstring documents allowed return-value shapes (primitives / lists / dicts mapping to Spark ArrayType / StructType / MapType), with worked codec examples. - src/datajoint/__init__.py: ``dj.Renderable`` exported at the top level alongside the existing Codec API exports. - tests/unit/test_rendering.py (new, 9 tests): detection of opt-in vs non-opt-in classes, top-level re-export, @runtime_checkable guarantee, built-in and codecs are not Renderable (per spec contract), invocation pass-through, key kwarg acceptance, subclass opt-in behavior. What's NOT in this PR (out of scope per spec): - Specific renderable codec implementations. Codecs like , , , ship downstream as plugins. They register via the existing codec auto-registration and opt in by implementing render_spark(). - Silver-layer publish pipeline (lives in datajoint-databricks). - No decode_spark (reverse direction). - No BINARY fallback — codecs either implement Renderable or remain non-eligible. All 9 unit tests pass. No regressions expected — this is purely additive (a new module + one top-level re-export + tests). Slated for DataJoint 2.3. --- src/datajoint/__init__.py | 3 + src/datajoint/rendering.py | 92 ++++++++++++++++++++++++++++++ tests/unit/test_rendering.py | 105 +++++++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+) create mode 100644 src/datajoint/rendering.py create mode 100644 tests/unit/test_rendering.py diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 4970b19d4..de0013be8 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -51,6 +51,8 @@ "get_codec", "ObjectRef", "NpyRef", + # Renderable Codec Protocol + "Renderable", # Storage Adapter API "StorageAdapter", "get_storage_adapter", @@ -85,6 +87,7 @@ from .instance import Instance, _ConfigProxy, _get_singleton_connection, _global_config, _check_thread_safe from .logging import logger from .objectref import ObjectRef +from .rendering import Renderable from .storage_adapter import StorageAdapter, get_storage_adapter from .schemas import _Schema, VirtualModule, list_schemas, virtual_schema from .autopopulate import AutoPopulate diff --git a/src/datajoint/rendering.py b/src/datajoint/rendering.py new file mode 100644 index 000000000..29f3bc03c --- /dev/null +++ b/src/datajoint/rendering.py @@ -0,0 +1,92 @@ +""" +Renderable Codec Protocol. + +Opt-in contract for codecs that can render their decoded values to +Spark-native types — primitives, lists, dicts, and nested combinations. + +Codecs implement this method when they want their column eligible for +downstream typed-query systems (Spark SQL, Delta Sharing, BI tools). +Generic codecs like ```` and ```` deliberately do not +implement it: their decoded values can be arbitrary Python objects with +no fixed Spark-native shape. + +The contract is intentionally a Protocol rather than an abstract method +on :class:`datajoint.Codec`: + +- Generic codecs need no acknowledgement (no ``NotImplementedError`` stubs). +- Existing plugin codecs continue to work unchanged. +- Codec authors opt in by adding the method on their own release cadence. +- Consumers detect support structurally via ``isinstance(codec, Renderable)``. + +See ``datajoint-docs/src/reference/specs/renderable.md`` for the +normative specification (signature, return-value shape constraints, +worked codec examples). +""" + +from __future__ import annotations + +from typing import Any, Protocol, runtime_checkable + + +@runtime_checkable +class Renderable(Protocol): + """ + A codec that can render its decoded values to Spark-native types. + + Opt-in. Codecs implementing this method declare that their decoded + values can be expressed as primitives, lists, or dicts of the same — + i.e., shapes that map cleanly to Spark's ``StructType`` / + ``ArrayType`` / ``MapType``. + + Consumers (e.g., a Databricks silver-layer publish pipeline) check + ``isinstance(codec, Renderable)`` per column to determine eligibility. + + Allowed return-value shapes: + + - Primitives: ``bool``, ``int``, ``float``, ``str``, ``bytes``, + ``None``, ``datetime.date``, ``datetime.datetime``. + - ``list[T]`` where ``T`` is any allowed shape (→ Spark ``ArrayType``). + - ``dict[str, T]`` where ``T`` is any allowed shape (→ Spark + ``StructType`` or ``MapType``, consumer-decided). + + NumPy arrays must be converted to lists; no tuples, sets, or custom + objects in the return value. + + Examples + -------- + A 1D float-array codec (shipped as a plugin, not in datajoint-python):: + + class FloatArrayCodec(dj.Codec): + name = "float_array" + + def encode(self, value, *, key=None, store_name=None): ... + def decode(self, stored, *, key=None) -> np.ndarray: ... + + def render_spark(self, decoded: np.ndarray, *, key=None) -> list[float]: + return decoded.tolist() # → Spark ARRAY + + Eligibility check:: + + from datajoint import Renderable + isinstance(FloatArrayCodec(), Renderable) # True + """ + + def render_spark(self, decoded: Any, *, key: dict | None = None) -> Any: + """ + Render a decoded codec value to a Spark-native shape. + + Parameters + ---------- + decoded : Any + The Python value produced by the codec's ``decode()``. + key : dict, optional + Optional context dict — same shape as ``Codec.encode``'s + ``key`` parameter. Most codecs ignore it. + + Returns + ------- + Any + A value composed entirely of allowed Spark-native shapes + (see class docstring). + """ + ... diff --git a/tests/unit/test_rendering.py b/tests/unit/test_rendering.py new file mode 100644 index 000000000..581b56918 --- /dev/null +++ b/tests/unit/test_rendering.py @@ -0,0 +1,105 @@ +""" +Unit tests for the Renderable Codec Protocol (#1458). + +The Protocol is a structural-typing contract — codecs opt in by +implementing ``render_spark`` and consumers detect support via +``isinstance(codec, Renderable)``. These tests cover the detection +behavior, not specific rendering implementations (which live downstream). +""" + +from __future__ import annotations + +import datajoint as dj +from datajoint.rendering import Renderable + + +class _RenderableCodec: + """A minimal codec-like object that opts into the protocol.""" + + name = "fake_renderable" + + def render_spark(self, decoded, *, key=None): + return list(decoded) if hasattr(decoded, "__iter__") else decoded + + +class _NonRenderableCodec: + """A minimal codec-like object that does NOT opt into the protocol.""" + + name = "fake_opaque" + + def encode(self, value, *, key=None, store_name=None): + return bytes(value) + + def decode(self, stored, *, key=None): + return stored + + +def test_renderable_protocol_detects_opt_in(): + """A class implementing ``render_spark`` is detected as Renderable.""" + assert isinstance(_RenderableCodec(), Renderable) + + +def test_renderable_protocol_rejects_non_opt_in(): + """A class without ``render_spark`` is not detected as Renderable.""" + assert not isinstance(_NonRenderableCodec(), Renderable) + + +def test_renderable_exported_at_top_level(): + """``dj.Renderable`` is accessible at the top level.""" + assert dj.Renderable is Renderable + + +def test_renderable_is_runtime_checkable(): + """The Protocol is decorated with @runtime_checkable (the test fixtures + above rely on this).""" + # Direct assertion: classes lacking runtime_checkable would raise TypeError + # on isinstance(). The previous tests would error rather than fail. + try: + isinstance(object(), Renderable) + except TypeError: + raise AssertionError("Renderable must be @runtime_checkable") + + +def test_blob_codec_is_not_renderable(): + """The built-in codec is intentionally non-renderable per the spec.""" + from datajoint.builtin_codecs.blob import BlobCodec + + assert not isinstance(BlobCodec(), Renderable) + + +def test_hash_codec_is_not_renderable(): + """The built-in codec is intentionally non-renderable per the spec.""" + from datajoint.builtin_codecs.hash import HashCodec + + assert not isinstance(HashCodec(), Renderable) + + +def test_renderable_invocation_passes_through(): + """A codec implementing the method can be invoked and returns its result.""" + codec = _RenderableCodec() + assert codec.render_spark([1, 2, 3]) == [1, 2, 3] + assert codec.render_spark(42) == 42 + + +def test_renderable_method_accepts_key_kwarg(): + """The method signature accepts the optional ``key`` keyword argument.""" + codec = _RenderableCodec() + # Should not raise + codec.render_spark([1, 2, 3], key={"some_pk": 1}) + + +def test_subclass_with_render_spark_is_renderable(): + """A subclass of a non-renderable that adds the method becomes renderable.""" + + class _OpaqueBase: + name = "base" + + def encode(self, value, *, key=None, store_name=None): + return b"" + + class _TypedSubclass(_OpaqueBase): + def render_spark(self, decoded, *, key=None): + return decoded + + assert not isinstance(_OpaqueBase(), Renderable) + assert isinstance(_TypedSubclass(), Renderable)