From 67af1befc67fbb184fa6e5cc8dd2cfdf773b5f8d Mon Sep 17 00:00:00 2001
From: WyattBlue <wyattblue@auto-editor.com>
Date: Wed, 24 Jun 2026 16:42:15 -0400
Subject: [PATCH 1/2] Encode GPU frames with pix_fmt=cuda, expose

Encode GPU frames with pix_fmt=cuda, expose VideoFrame.sw_format

A hardware frame (e.g. a CUDA frame from DLPack) carries its own frames
context. nvenc and friends require hw_frames_ctx to be set before
avcodec_open2, so adopt the frame's before opening the encoder. This lets
GPU frames encode on-device with no download/upload round-trip.

Also expose VideoFrame.sw_format, which returns the underlying software
pixel format (nv12, yuv444p, p010le, ...) for a hardware frame and None
otherwise.

closes #2199
---
 CHANGELOG.rst        |  1 +
 av/codec/context.py  | 11 +++++++++++
 av/video/frame.py    | 16 ++++++++++++++++
 av/video/frame.pyi   |  2 ++
 tests/test_dlpack.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 73 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 85107acc3..25fd9e9ac 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -47,6 +47,7 @@ Features:
 Fixes:
 
 - Fix ``add_mux_stream`` producing unwritable Matroska files by extracting codec extradata from the bitstream before the header is written by :gh-user:`WyattBlue` (:issue:`2198`).
+- Encode GPU frames (e.g. CUDA frames from DLPack) directly with ``pix_fmt="cuda"`` by adopting the frame's ``hw_frames_ctx`` before opening the encoder by :gh-user:`WyattBlue` (:issue:`2199`).
 
 v17.1.0
 -------
diff --git a/av/codec/context.py b/av/codec/context.py
index f842f80c1..27ab8bbef 100644
--- a/av/codec/context.py
+++ b/av/codec/context.py
@@ -481,6 +481,17 @@ def _prepare_and_time_rebase_frames_for_encode(self, frame: Frame):
         if self.ptr.codec_type not in [lib.AVMEDIA_TYPE_VIDEO, lib.AVMEDIA_TYPE_AUDIO]:
             raise NotImplementedError("Encoding is only supported for audio and video.")
 
+        # A hardware frame (e.g. a CUDA frame from DLPack) carries its own frames
+        # context. Encoders like h264_nvenc require hw_frames_ctx to be set before
+        # avcodec_open2, so adopt the frame's if we don't already have one.
+        if (
+            not self.is_open
+            and frame is not None
+            and frame.ptr.hw_frames_ctx != cython.NULL
+            and self.ptr.hw_frames_ctx == cython.NULL
+        ):
+            self.ptr.hw_frames_ctx = lib.av_buffer_ref(frame.ptr.hw_frames_ctx)
+
         self.open(strict=False)
 
         frames = self._prepare_frames_for_encode(frame)
diff --git a/av/video/frame.py b/av/video/frame.py
index 78b97a550..6d6c3f044 100644
--- a/av/video/frame.py
+++ b/av/video/frame.py
@@ -516,6 +516,22 @@ def _init_user_attributes(self):
     def __dealloc__(self):
         lib.av_frame_unref(self.ptr)
 
+    @property
+    def sw_format(self):
+        """
+        For a hardware frame (e.g. ``format.name == "cuda"``), the underlying
+        software pixel format (``nv12``, ``yuv444p``, ``p010le``, ...). ``None``
+        for a regular software frame.
+
+        :type: VideoFormat | None
+        """
+        if not self.ptr.hw_frames_ctx:
+            return None
+        frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast(
+            cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data
+        )
+        return get_video_format(frames_ctx.sw_format, self.ptr.width, self.ptr.height)
+
     def __repr__(self):
         return (
             f"<av.{self.__class__.__name__}, pts={self.pts} {self.format.name} "
diff --git a/av/video/frame.pyi b/av/video/frame.pyi
index 9e98a7033..3c8ef6a79 100644
--- a/av/video/frame.pyi
+++ b/av/video/frame.pyi
@@ -45,6 +45,8 @@ class VideoFrame(Frame):
     color_trc: int
     color_primaries: int
 
+    @property
+    def sw_format(self) -> VideoFormat | None: ...
     @property
     def time(self) -> float: ...
     @property
diff --git a/tests/test_dlpack.py b/tests/test_dlpack.py
index 13e05fbbc..2e74f342f 100644
--- a/tests/test_dlpack.py
+++ b/tests/test_dlpack.py
@@ -1,4 +1,6 @@
 import gc
+from fractions import Fraction
+from typing import cast
 
 import numpy
 import pytest
@@ -6,6 +8,7 @@
 import av
 from av import VideoFrame
 from av.codec.hwaccel import HWAccel
+from av.video.codeccontext import VideoCodecContext
 
 from .common import assertNdarraysEqual, fate_png
 
@@ -283,6 +286,10 @@ def test_video_plane_dlpack_unsupported_format_raises() -> None:
         frame.planes[0].__dlpack__()
 
 
+def test_sw_format_none_for_software_frame() -> None:
+    assert VideoFrame(16, 16, "yuv420p").sw_format is None
+
+
 def test_video_frame_from_dlpack_requires_two_planes() -> None:
     y = numpy.zeros((4, 4), dtype=numpy.uint8)
     with pytest.raises(ValueError, match="2-plane"):
@@ -612,3 +619,39 @@ def test_video_frame_from_dlpack_cuda_hw_frame_behavior_if_available() -> None:
                 frame.to_ndarray(format="cuda")
     except av.FFmpegError as e:
         pytest.skip(f"CUDA hwcontext not available in this build/runtime: {e}")
+
+
+def test_encode_cuda_frame_with_nvenc_if_available() -> None:
+    # Issue #2199: a CUDA frame from DLPack should encode on the GPU directly.
+    # Its hw_frames_ctx must propagate to the encoder before avcodec_open2.
+    backend = _get_cuda_backend()
+    if backend is None:
+        pytest.skip("CUDA backend (cupy/torch) not available.")
+
+    name, mod = backend
+    width, height = 256, 256
+
+    try:
+        if name == "torch":
+            y = mod.zeros((height, width), dtype=mod.uint8, device="cuda")
+            uv = mod.zeros((height // 2, width // 2, 2), dtype=mod.uint8, device="cuda")
+        else:
+            y = mod.zeros((height, width), dtype=mod.uint8)
+            uv = mod.zeros((height // 2, width // 2, 2), dtype=mod.uint8)
+
+        frame = VideoFrame.from_dlpack((y, uv), format="nv12")
+        assert frame.format.name == "cuda"
+        assert frame.sw_format is not None and frame.sw_format.name == "nv12"
+
+        cc = cast(VideoCodecContext, av.CodecContext.create("h264_nvenc", "w"))
+        cc.width = width
+        cc.height = height
+        cc.time_base = Fraction(1, 24)
+        cc.framerate = Fraction(24, 1)
+        cc.pix_fmt = "cuda"
+
+        packets = cc.encode(frame)
+        packets += cc.encode(None)  # flush
+        assert any(p.size for p in packets)
+    except av.FFmpegError as e:
+        pytest.skip(f"nvenc/CUDA not available in this build/runtime: {e}")

From 852fb02b340abcce321d9fdd81d1d3f1aa2b1eae Mon Sep 17 00:00:00 2001
From: WyattBlue <wyattblue@auto-editor.com>
Date: Wed, 24 Jun 2026 16:53:50 -0400
Subject: [PATCH 2/2] Avoid reassigns

---
 av/container/pyio.py | 2 +-
 av/opaque.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/av/container/pyio.py b/av/container/pyio.py
index 34a497246..0f7af6eb7 100644
--- a/av/container/pyio.py
+++ b/av/container/pyio.py
@@ -38,7 +38,7 @@ def __cinit__(self, file, buffer_size, writeable=None):
             and self.ftell is not None
             and (seekable is None or seekable())
         ):
-            seek_func: seek_func_t = pyio_seek
+            seek_func = pyio_seek
 
         if writeable is None:
             writeable = self.fwrite is not None
diff --git a/av/opaque.py b/av/opaque.py
index e55468c44..281033258 100644
--- a/av/opaque.py
+++ b/av/opaque.py
@@ -55,4 +55,4 @@ def pop(self, name) -> object:
         return self._objects.pop(key, None)
 
 
-opaque_container: OpaqueContainer = OpaqueContainer()
+opaque_container = OpaqueContainer()