Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/datajoint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
"get_codec",
"ObjectRef",
"NpyRef",
# Renderable Codec Protocol
"Renderable",
# Storage Adapter API
"StorageAdapter",
"get_storage_adapter",
Expand Down Expand Up @@ -85,6 +87,7 @@
from .instance import Instance, _ConfigProxy, _get_singleton_connection, _global_config, _check_thread_safe
from .logging import logger
from .objectref import ObjectRef
from .rendering import Renderable
from .storage_adapter import StorageAdapter, get_storage_adapter
from .schemas import _Schema, VirtualModule, list_schemas, virtual_schema
from .autopopulate import AutoPopulate
Expand Down
92 changes: 92 additions & 0 deletions src/datajoint/rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""
Renderable Codec Protocol.

Opt-in contract for codecs that can render their decoded values to
Spark-native types — primitives, lists, dicts, and nested combinations.

Codecs implement this method when they want their column eligible for
downstream typed-query systems (Spark SQL, Delta Sharing, BI tools).
Generic codecs like ``<blob@>`` and ``<hash@>`` deliberately do not
implement it: their decoded values can be arbitrary Python objects with
no fixed Spark-native shape.

The contract is intentionally a Protocol rather than an abstract method
on :class:`datajoint.Codec`:

- Generic codecs need no acknowledgement (no ``NotImplementedError`` stubs).
- Existing plugin codecs continue to work unchanged.
- Codec authors opt in by adding the method on their own release cadence.
- Consumers detect support structurally via ``isinstance(codec, Renderable)``.

See ``datajoint-docs/src/reference/specs/renderable.md`` for the
normative specification (signature, return-value shape constraints,
worked codec examples).
"""

from __future__ import annotations

from typing import Any, Protocol, runtime_checkable


@runtime_checkable
class Renderable(Protocol):
"""
A codec that can render its decoded values to Spark-native types.

Opt-in. Codecs implementing this method declare that their decoded
values can be expressed as primitives, lists, or dicts of the same —
i.e., shapes that map cleanly to Spark's ``StructType`` /
``ArrayType`` / ``MapType``.

Consumers (e.g., a Databricks silver-layer publish pipeline) check
``isinstance(codec, Renderable)`` per column to determine eligibility.

Allowed return-value shapes:

- Primitives: ``bool``, ``int``, ``float``, ``str``, ``bytes``,
``None``, ``datetime.date``, ``datetime.datetime``.
- ``list[T]`` where ``T`` is any allowed shape (→ Spark ``ArrayType``).
- ``dict[str, T]`` where ``T`` is any allowed shape (→ Spark
``StructType`` or ``MapType``, consumer-decided).

NumPy arrays must be converted to lists; no tuples, sets, or custom
objects in the return value.

Examples
--------
A 1D float-array codec (shipped as a plugin, not in datajoint-python)::

class FloatArrayCodec(dj.Codec):
name = "float_array"

def encode(self, value, *, key=None, store_name=None): ...
def decode(self, stored, *, key=None) -> np.ndarray: ...

def render_spark(self, decoded: np.ndarray, *, key=None) -> list[float]:
return decoded.tolist() # → Spark ARRAY<DOUBLE>

Eligibility check::

from datajoint import Renderable
isinstance(FloatArrayCodec(), Renderable) # True
"""

def render_spark(self, decoded: Any, *, key: dict | None = None) -> Any:
"""
Render a decoded codec value to a Spark-native shape.

Parameters
----------
decoded : Any
The Python value produced by the codec's ``decode()``.
key : dict, optional
Optional context dict — same shape as ``Codec.encode``'s
``key`` parameter. Most codecs ignore it.

Returns
-------
Any
A value composed entirely of allowed Spark-native shapes
(see class docstring).
"""
...
105 changes: 105 additions & 0 deletions tests/unit/test_rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""
Unit tests for the Renderable Codec Protocol (#1458).

The Protocol is a structural-typing contract — codecs opt in by
implementing ``render_spark`` and consumers detect support via
``isinstance(codec, Renderable)``. These tests cover the detection
behavior, not specific rendering implementations (which live downstream).
"""

from __future__ import annotations

import datajoint as dj
from datajoint.rendering import Renderable


class _RenderableCodec:
"""A minimal codec-like object that opts into the protocol."""

name = "fake_renderable"

def render_spark(self, decoded, *, key=None):
return list(decoded) if hasattr(decoded, "__iter__") else decoded


class _NonRenderableCodec:
"""A minimal codec-like object that does NOT opt into the protocol."""

name = "fake_opaque"

def encode(self, value, *, key=None, store_name=None):
return bytes(value)

def decode(self, stored, *, key=None):
return stored


def test_renderable_protocol_detects_opt_in():
"""A class implementing ``render_spark`` is detected as Renderable."""
assert isinstance(_RenderableCodec(), Renderable)


def test_renderable_protocol_rejects_non_opt_in():
"""A class without ``render_spark`` is not detected as Renderable."""
assert not isinstance(_NonRenderableCodec(), Renderable)


def test_renderable_exported_at_top_level():
"""``dj.Renderable`` is accessible at the top level."""
assert dj.Renderable is Renderable


def test_renderable_is_runtime_checkable():
"""The Protocol is decorated with @runtime_checkable (the test fixtures
above rely on this)."""
# Direct assertion: classes lacking runtime_checkable would raise TypeError
# on isinstance(). The previous tests would error rather than fail.
try:
isinstance(object(), Renderable)
except TypeError:
raise AssertionError("Renderable must be @runtime_checkable")


def test_blob_codec_is_not_renderable():
"""The built-in <blob@> codec is intentionally non-renderable per the spec."""
from datajoint.builtin_codecs.blob import BlobCodec

assert not isinstance(BlobCodec(), Renderable)


def test_hash_codec_is_not_renderable():
"""The built-in <hash@> codec is intentionally non-renderable per the spec."""
from datajoint.builtin_codecs.hash import HashCodec

assert not isinstance(HashCodec(), Renderable)


def test_renderable_invocation_passes_through():
"""A codec implementing the method can be invoked and returns its result."""
codec = _RenderableCodec()
assert codec.render_spark([1, 2, 3]) == [1, 2, 3]
assert codec.render_spark(42) == 42


def test_renderable_method_accepts_key_kwarg():
"""The method signature accepts the optional ``key`` keyword argument."""
codec = _RenderableCodec()
# Should not raise
codec.render_spark([1, 2, 3], key={"some_pk": 1})


def test_subclass_with_render_spark_is_renderable():
"""A subclass of a non-renderable that adds the method becomes renderable."""

class _OpaqueBase:
name = "base"

def encode(self, value, *, key=None, store_name=None):
return b""

class _TypedSubclass(_OpaqueBase):
def render_spark(self, decoded, *, key=None):
return decoded

assert not isinstance(_OpaqueBase(), Renderable)
assert isinstance(_TypedSubclass(), Renderable)
Loading