From 67af1befc67fbb184fa6e5cc8dd2cfdf773b5f8d Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Wed, 24 Jun 2026 16:42:15 -0400 Subject: [PATCH 1/2] Encode GPU frames with pix_fmt=cuda, expose Encode GPU frames with pix_fmt=cuda, expose VideoFrame.sw_format A hardware frame (e.g. a CUDA frame from DLPack) carries its own frames context. nvenc and friends require hw_frames_ctx to be set before avcodec_open2, so adopt the frame's before opening the encoder. This lets GPU frames encode on-device with no download/upload round-trip. Also expose VideoFrame.sw_format, which returns the underlying software pixel format (nv12, yuv444p, p010le, ...) for a hardware frame and None otherwise. closes #2199 --- CHANGELOG.rst | 1 + av/codec/context.py | 11 +++++++++++ av/video/frame.py | 16 ++++++++++++++++ av/video/frame.pyi | 2 ++ tests/test_dlpack.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 85107acc3..25fd9e9ac 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -47,6 +47,7 @@ Features: Fixes: - Fix ``add_mux_stream`` producing unwritable Matroska files by extracting codec extradata from the bitstream before the header is written by :gh-user:`WyattBlue` (:issue:`2198`). +- Encode GPU frames (e.g. CUDA frames from DLPack) directly with ``pix_fmt="cuda"`` by adopting the frame's ``hw_frames_ctx`` before opening the encoder by :gh-user:`WyattBlue` (:issue:`2199`). v17.1.0 ------- diff --git a/av/codec/context.py b/av/codec/context.py index f842f80c1..27ab8bbef 100644 --- a/av/codec/context.py +++ b/av/codec/context.py @@ -481,6 +481,17 @@ def _prepare_and_time_rebase_frames_for_encode(self, frame: Frame): if self.ptr.codec_type not in [lib.AVMEDIA_TYPE_VIDEO, lib.AVMEDIA_TYPE_AUDIO]: raise NotImplementedError("Encoding is only supported for audio and video.") + # A hardware frame (e.g. a CUDA frame from DLPack) carries its own frames + # context. Encoders like h264_nvenc require hw_frames_ctx to be set before + # avcodec_open2, so adopt the frame's if we don't already have one. + if ( + not self.is_open + and frame is not None + and frame.ptr.hw_frames_ctx != cython.NULL + and self.ptr.hw_frames_ctx == cython.NULL + ): + self.ptr.hw_frames_ctx = lib.av_buffer_ref(frame.ptr.hw_frames_ctx) + self.open(strict=False) frames = self._prepare_frames_for_encode(frame) diff --git a/av/video/frame.py b/av/video/frame.py index 78b97a550..6d6c3f044 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -516,6 +516,22 @@ def _init_user_attributes(self): def __dealloc__(self): lib.av_frame_unref(self.ptr) + @property + def sw_format(self): + """ + For a hardware frame (e.g. ``format.name == "cuda"``), the underlying + software pixel format (``nv12``, ``yuv444p``, ``p010le``, ...). ``None`` + for a regular software frame. + + :type: VideoFormat | None + """ + if not self.ptr.hw_frames_ctx: + return None + frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast( + cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data + ) + return get_video_format(frames_ctx.sw_format, self.ptr.width, self.ptr.height) + def __repr__(self): return ( f" VideoFormat | None: ... @property def time(self) -> float: ... @property diff --git a/tests/test_dlpack.py b/tests/test_dlpack.py index 13e05fbbc..2e74f342f 100644 --- a/tests/test_dlpack.py +++ b/tests/test_dlpack.py @@ -1,4 +1,6 @@ import gc +from fractions import Fraction +from typing import cast import numpy import pytest @@ -6,6 +8,7 @@ import av from av import VideoFrame from av.codec.hwaccel import HWAccel +from av.video.codeccontext import VideoCodecContext from .common import assertNdarraysEqual, fate_png @@ -283,6 +286,10 @@ def test_video_plane_dlpack_unsupported_format_raises() -> None: frame.planes[0].__dlpack__() +def test_sw_format_none_for_software_frame() -> None: + assert VideoFrame(16, 16, "yuv420p").sw_format is None + + def test_video_frame_from_dlpack_requires_two_planes() -> None: y = numpy.zeros((4, 4), dtype=numpy.uint8) with pytest.raises(ValueError, match="2-plane"): @@ -612,3 +619,39 @@ def test_video_frame_from_dlpack_cuda_hw_frame_behavior_if_available() -> None: frame.to_ndarray(format="cuda") except av.FFmpegError as e: pytest.skip(f"CUDA hwcontext not available in this build/runtime: {e}") + + +def test_encode_cuda_frame_with_nvenc_if_available() -> None: + # Issue #2199: a CUDA frame from DLPack should encode on the GPU directly. + # Its hw_frames_ctx must propagate to the encoder before avcodec_open2. + backend = _get_cuda_backend() + if backend is None: + pytest.skip("CUDA backend (cupy/torch) not available.") + + name, mod = backend + width, height = 256, 256 + + try: + if name == "torch": + y = mod.zeros((height, width), dtype=mod.uint8, device="cuda") + uv = mod.zeros((height // 2, width // 2, 2), dtype=mod.uint8, device="cuda") + else: + y = mod.zeros((height, width), dtype=mod.uint8) + uv = mod.zeros((height // 2, width // 2, 2), dtype=mod.uint8) + + frame = VideoFrame.from_dlpack((y, uv), format="nv12") + assert frame.format.name == "cuda" + assert frame.sw_format is not None and frame.sw_format.name == "nv12" + + cc = cast(VideoCodecContext, av.CodecContext.create("h264_nvenc", "w")) + cc.width = width + cc.height = height + cc.time_base = Fraction(1, 24) + cc.framerate = Fraction(24, 1) + cc.pix_fmt = "cuda" + + packets = cc.encode(frame) + packets += cc.encode(None) # flush + assert any(p.size for p in packets) + except av.FFmpegError as e: + pytest.skip(f"nvenc/CUDA not available in this build/runtime: {e}") From 852fb02b340abcce321d9fdd81d1d3f1aa2b1eae Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Wed, 24 Jun 2026 16:53:50 -0400 Subject: [PATCH 2/2] Avoid reassigns --- av/container/pyio.py | 2 +- av/opaque.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/av/container/pyio.py b/av/container/pyio.py index 34a497246..0f7af6eb7 100644 --- a/av/container/pyio.py +++ b/av/container/pyio.py @@ -38,7 +38,7 @@ def __cinit__(self, file, buffer_size, writeable=None): and self.ftell is not None and (seekable is None or seekable()) ): - seek_func: seek_func_t = pyio_seek + seek_func = pyio_seek if writeable is None: writeable = self.fwrite is not None diff --git a/av/opaque.py b/av/opaque.py index e55468c44..281033258 100644 --- a/av/opaque.py +++ b/av/opaque.py @@ -55,4 +55,4 @@ def pop(self, name) -> object: return self._objects.pop(key, None) -opaque_container: OpaqueContainer = OpaqueContainer() +opaque_container = OpaqueContainer()