Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Features:
Fixes:

- Fix ``add_mux_stream`` producing unwritable Matroska files by extracting codec extradata from the bitstream before the header is written by :gh-user:`WyattBlue` (:issue:`2198`).
- Encode GPU frames (e.g. CUDA frames from DLPack) directly with ``pix_fmt="cuda"`` by adopting the frame's ``hw_frames_ctx`` before opening the encoder by :gh-user:`WyattBlue` (:issue:`2199`).

v17.1.0
-------
Expand Down
11 changes: 11 additions & 0 deletions av/codec/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,17 @@ def _prepare_and_time_rebase_frames_for_encode(self, frame: Frame):
if self.ptr.codec_type not in [lib.AVMEDIA_TYPE_VIDEO, lib.AVMEDIA_TYPE_AUDIO]:
raise NotImplementedError("Encoding is only supported for audio and video.")

# A hardware frame (e.g. a CUDA frame from DLPack) carries its own frames
# context. Encoders like h264_nvenc require hw_frames_ctx to be set before
# avcodec_open2, so adopt the frame's if we don't already have one.
if (
not self.is_open
and frame is not None
and frame.ptr.hw_frames_ctx != cython.NULL
and self.ptr.hw_frames_ctx == cython.NULL
):
self.ptr.hw_frames_ctx = lib.av_buffer_ref(frame.ptr.hw_frames_ctx)

self.open(strict=False)

frames = self._prepare_frames_for_encode(frame)
Expand Down
2 changes: 1 addition & 1 deletion av/container/pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __cinit__(self, file, buffer_size, writeable=None):
and self.ftell is not None
and (seekable is None or seekable())
):
seek_func: seek_func_t = pyio_seek
seek_func = pyio_seek

if writeable is None:
writeable = self.fwrite is not None
Expand Down
2 changes: 1 addition & 1 deletion av/opaque.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ def pop(self, name) -> object:
return self._objects.pop(key, None)


opaque_container: OpaqueContainer = OpaqueContainer()
opaque_container = OpaqueContainer()
16 changes: 16 additions & 0 deletions av/video/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,22 @@ def _init_user_attributes(self):
def __dealloc__(self):
lib.av_frame_unref(self.ptr)

@property
def sw_format(self):
"""
For a hardware frame (e.g. ``format.name == "cuda"``), the underlying
software pixel format (``nv12``, ``yuv444p``, ``p010le``, ...). ``None``
for a regular software frame.

:type: VideoFormat | None
"""
if not self.ptr.hw_frames_ctx:
return None
frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast(
cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data
)
return get_video_format(frames_ctx.sw_format, self.ptr.width, self.ptr.height)

def __repr__(self):
return (
f"<av.{self.__class__.__name__}, pts={self.pts} {self.format.name} "
Expand Down
2 changes: 2 additions & 0 deletions av/video/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class VideoFrame(Frame):
color_trc: int
color_primaries: int

@property
def sw_format(self) -> VideoFormat | None: ...
@property
def time(self) -> float: ...
@property
Expand Down
43 changes: 43 additions & 0 deletions tests/test_dlpack.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import gc
from fractions import Fraction
from typing import cast

import numpy
import pytest

import av
from av import VideoFrame
from av.codec.hwaccel import HWAccel
from av.video.codeccontext import VideoCodecContext

from .common import assertNdarraysEqual, fate_png

Expand Down Expand Up @@ -283,6 +286,10 @@ def test_video_plane_dlpack_unsupported_format_raises() -> None:
frame.planes[0].__dlpack__()


def test_sw_format_none_for_software_frame() -> None:
assert VideoFrame(16, 16, "yuv420p").sw_format is None


def test_video_frame_from_dlpack_requires_two_planes() -> None:
y = numpy.zeros((4, 4), dtype=numpy.uint8)
with pytest.raises(ValueError, match="2-plane"):
Expand Down Expand Up @@ -612,3 +619,39 @@ def test_video_frame_from_dlpack_cuda_hw_frame_behavior_if_available() -> None:
frame.to_ndarray(format="cuda")
except av.FFmpegError as e:
pytest.skip(f"CUDA hwcontext not available in this build/runtime: {e}")


def test_encode_cuda_frame_with_nvenc_if_available() -> None:
# Issue #2199: a CUDA frame from DLPack should encode on the GPU directly.
# Its hw_frames_ctx must propagate to the encoder before avcodec_open2.
backend = _get_cuda_backend()
if backend is None:
pytest.skip("CUDA backend (cupy/torch) not available.")

name, mod = backend
width, height = 256, 256

try:
if name == "torch":
y = mod.zeros((height, width), dtype=mod.uint8, device="cuda")
uv = mod.zeros((height // 2, width // 2, 2), dtype=mod.uint8, device="cuda")
else:
y = mod.zeros((height, width), dtype=mod.uint8)
uv = mod.zeros((height // 2, width // 2, 2), dtype=mod.uint8)

frame = VideoFrame.from_dlpack((y, uv), format="nv12")
assert frame.format.name == "cuda"
assert frame.sw_format is not None and frame.sw_format.name == "nv12"

cc = cast(VideoCodecContext, av.CodecContext.create("h264_nvenc", "w"))
cc.width = width
cc.height = height
cc.time_base = Fraction(1, 24)
cc.framerate = Fraction(24, 1)
cc.pix_fmt = "cuda"

packets = cc.encode(frame)
packets += cc.encode(None) # flush
assert any(p.size for p in packets)
except av.FFmpegError as e:
pytest.skip(f"nvenc/CUDA not available in this build/runtime: {e}")
Loading