Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ v18.0.0 (next)
Breaking:

- Remove Python 3.10
- Support HW encoding via a ``hwaccel`` parameter on ``OutputContainer.add_stream`` (e.g. ``h264_vaapi``, ``h264_nvenc``, ``h264_videotoolbox``); software frames passed to ``encode`` are uploaded to the device automatically by :gh-user:`WyattBlue` (:issue:`2156`).

Features:

Expand Down
1 change: 1 addition & 0 deletions av/codec/context.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ cdef class CodecContext:
# TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing packets
# are bogus). It should take all info it needs from the context and/or stream.
cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame)
cdef void _setup_encode_hwframes(self)
cdef list _prepare_frames_for_encode(self, Frame frame)
cdef _setup_encoded_packet(self, Packet)
cdef _setup_decoded_frame(self, Frame, Packet)
Expand Down
48 changes: 48 additions & 0 deletions av/codec/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ def open(self, strict: cython.bint = True):
self.ptr.time_base.num = 1
self.ptr.time_base.den = lib.AV_TIME_BASE

self._setup_encode_hwframes()

err_check(
lib.avcodec_open2(self.ptr, self.codec.ptr, cython.address(options.ptr)),
f'avcodec_open2("{self.codec.name}", {self.options})',
Expand Down Expand Up @@ -380,6 +382,52 @@ def _send_frame_and_recv(self, frame: Frame | None):
yield packet
packet = self._recv_packet()

@cython.cfunc
def _setup_encode_hwframes(self) -> cython.void:
# Build the hardware frames context for hardware-accelerated encoding.
#
# Unlike the device context (attached at construction time), the frames
# context depends on the final width/height/pixel format, which the user
# sets after add_stream(). We therefore defer it until just before the
# codec is opened.
if self.hwaccel_ctx is None or not self.is_encoder:
return
if self.ptr.hw_frames_ctx:
return # Already set up.

hw_format: lib.AVPixelFormat = self.hwaccel_ctx.config.ptr.pix_fmt
sw_format: lib.AVPixelFormat = cython.cast(lib.AVPixelFormat, self.ptr.pix_fmt)

# The codec context's pix_fmt holds the *software* format the user feeds in.
# If they left it as the hardware format (or unset), pick a sane default.
if sw_format == hw_format or sw_format == lib.AV_PIX_FMT_NONE:
sw_format = lib.av_get_pix_fmt(b"nv12")

frames_ref: cython.pointer[lib.AVBufferRef] = lib.av_hwframe_ctx_alloc(
self.hwaccel_ctx.ptr
)
if frames_ref == cython.NULL:
raise MemoryError("av_hwframe_ctx_alloc() failed")

try:
frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast(
cython.pointer[lib.AVHWFramesContext], frames_ref.data
)
frames_ctx.format = hw_format
frames_ctx.sw_format = sw_format
frames_ctx.width = self.ptr.width
frames_ctx.height = self.ptr.height
frames_ctx.initial_pool_size = 32
err_check(lib.av_hwframe_ctx_init(frames_ref))
except Exception:
lib.av_buffer_unref(cython.address(frames_ref))
raise

# Ownership of frames_ref transfers to the codec context.
self.ptr.hw_frames_ctx = frames_ref
self.ptr.sw_pix_fmt = sw_format
self.ptr.pix_fmt = hw_format

@cython.cfunc
def _prepare_frames_for_encode(self, frame: Frame | None) -> list:
return [frame]
Expand Down
15 changes: 11 additions & 4 deletions av/codec/hwaccel.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,17 @@ def __init__(
self.ptr = cython.NULL
self.config = None

def _initialize_hw_context(self, codec: Codec):
def _initialize_hw_context(self, codec: Codec, for_encoding: bool = False):
# Decoders advertise the device-context method, while encoders (e.g.
# h264_vaapi) advertise the frames-context method. Accept either one when
# setting up an encoder.
supported_methods: cython.int = lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX
if for_encoding:
supported_methods |= lib.AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX

config: HWConfig
for config in codec.hardware_configs:
if not (config.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX):
if not (config.ptr.methods & supported_methods):
continue
if self._device_type and config.device_type != self._device_type:
continue
Expand All @@ -168,7 +175,7 @@ def _initialize_hw_context(self, codec: Codec):
)
)

def create(self, codec: Codec) -> HWAccel:
def create(self, codec: Codec, for_encoding: bool = False) -> HWAccel:
"""Create a new hardware accelerator context with the given codec"""
if self.ptr:
raise RuntimeError("Hardware context already initialized")
Expand All @@ -180,7 +187,7 @@ def create(self, codec: Codec) -> HWAccel:
options=self.options,
is_hw_owned=self.is_hw_owned,
)
ret._initialize_hw_context(codec)
ret._initialize_hw_context(codec, for_encoding=for_encoding)
return ret

def __dealloc__(self):
Expand Down
2 changes: 1 addition & 1 deletion av/codec/hwaccel.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ class HWAccel:
flags: int | None = None,
is_hw_owned: bool = False,
) -> None: ...
def create(self, codec: Codec) -> HWAccel: ...
def create(self, codec: Codec, for_encoding: bool = False) -> HWAccel: ...

def hwdevices_available() -> list[str]: ...
18 changes: 15 additions & 3 deletions av/container/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cython.cimports.av.bitstream import BitStreamFilterContext
from cython.cimports.av.codec.codec import Codec
from cython.cimports.av.codec.context import CodecContext, wrap_codec_context
from cython.cimports.av.codec.hwaccel import HWAccel
from cython.cimports.av.container.streams import StreamContainer
from cython.cimports.av.dictionary import Dictionary
from cython.cimports.av.error import err_check
Expand Down Expand Up @@ -79,15 +80,26 @@ def __dealloc__(self):
with cython.nogil:
lib.av_packet_free(cython.address(self.packet_ptr))

def add_stream(self, codec_name, rate=None, options: dict | None = None, **kwargs):
"""add_stream(codec_name, rate=None)
def add_stream(
self,
codec_name,
rate=None,
options: dict | None = None,
hwaccel: HWAccel | None = None,
**kwargs,
):
"""add_stream(codec_name, rate=None, *, hwaccel=None)

Creates a new stream from a codec name and returns it.
Supports video, audio, and subtitle streams.

:param codec_name: The name of a codec.
:type codec_name: str
:param dict options: Stream options.
:param HWAccel hwaccel: Optional settings for hardware-accelerated encoding.
Only applies to video streams (e.g. ``h264_vaapi``); software frames
passed to :meth:`~av.codec.context.CodecContext.encode` are uploaded to
the device automatically.
:param \\**kwargs: Set attributes for the stream.
:rtype: The new :class:`~av.stream.Stream`.

Expand Down Expand Up @@ -164,7 +176,7 @@ def add_stream(self, codec_name, rate=None, options: dict | None = None, **kwarg
err_check(lib.avcodec_parameters_from_context(stream.codecpar, ctx))

# Construct the user-land stream
py_codec_context: CodecContext = wrap_codec_context(ctx, codec, None)
py_codec_context: CodecContext = wrap_codec_context(ctx, codec, hwaccel)
py_stream: Stream = wrap_stream(self, stream, py_codec_context)
self.streams.add_stream(py_stream)

Expand Down
3 changes: 3 additions & 0 deletions av/container/output.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ from typing import TypeVar, overload

from av.audio import _AudioCodecName
from av.audio.stream import AudioStream
from av.codec.hwaccel import HWAccel
from av.packet import Packet
from av.stream import AttachmentStream, DataStream, Stream
from av.subtitles.stream import SubtitleStream
Expand All @@ -29,6 +30,7 @@ class OutputContainer(Container):
codec_name: _VideoCodecName,
rate: Fraction | int | None = None,
options: dict[str, str] | None = None,
hwaccel: HWAccel | None = None,
**kwargs,
) -> VideoStream: ...
@overload
Expand All @@ -37,6 +39,7 @@ class OutputContainer(Container):
codec_name: str,
rate: Fraction | int | None = None,
options: dict[str, str] | None = None,
hwaccel: HWAccel | None = None,
**kwargs,
) -> VideoStream | AudioStream | SubtitleStream: ...
def add_mux_stream(
Expand Down
1 change: 1 addition & 0 deletions av/video/codeccontext.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ cdef class VideoCodecContext(CodecContext):
cdef AVCodecPrivateData _private_data
cdef readonly VideoReformatter reformatter
cdef VideoFrame next_frame
cdef VideoFrame _encode_upload_frame(self, VideoFrame vframe)
101 changes: 78 additions & 23 deletions av/video/codeccontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,36 +49,91 @@ def _init(
):
CodecContext._init(self, ptr, codec, hwaccel)

if hwaccel is not None:
try:
self.hwaccel_ctx = hwaccel.create(self.codec)
self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
self.ptr.get_format = _get_hw_format
self._private_data.hardware_pix_fmt = (
self.hwaccel_ctx.config.ptr.pix_fmt
)
self._private_data.allow_software_fallback = (
self.hwaccel.allow_software_fallback
)
self.ptr.opaque = cython.address(self._private_data)
except NotImplementedError:
# Some streams may not have a hardware decoder. For example, many action
# cam videos have a low resolution mjpeg stream, which is usually not
# compatible with hardware decoders.
# The user may have passed in a hwaccel because they want to decode the main
# stream with it, so we shouldn't abort even if we find a stream that can't
# be HW decoded.
# If the user wants to make sure hwaccel is actually used, they can check with the
# is_hwaccel() function on each stream's codec context.
self.hwaccel_ctx = None
if hwaccel is None:
return

if self.is_encoder:
# Hardware-accelerated encoding. We only attach the device context here;
# the hardware frames context depends on the final width/height/pixel
# format (set by the user after add_stream()), so it is built lazily in
# CodecContext.open() via _setup_encode_hwframes().
self.hwaccel_ctx = hwaccel.create(self.codec, for_encoding=True)
self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
return

try:
self.hwaccel_ctx = hwaccel.create(self.codec)
self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
self.ptr.get_format = _get_hw_format
self._private_data.hardware_pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
self._private_data.allow_software_fallback = (
self.hwaccel.allow_software_fallback
)
self.ptr.opaque = cython.address(self._private_data)
except NotImplementedError:
# Some streams may not have a hardware decoder. For example, many action
# cam videos have a low resolution mjpeg stream, which is usually not
# compatible with hardware decoders.
# The user may have passed in a hwaccel because they want to decode the main
# stream with it, so we shouldn't abort even if we find a stream that can't
# be HW decoded.
# If the user wants to make sure hwaccel is actually used, they can check with the
# is_hwaccel() function on each stream's codec context.
self.hwaccel_ctx = None

@cython.cfunc
def _encode_upload_frame(self, vframe: VideoFrame) -> VideoFrame:
# Upload a software frame onto the device for hardware-accelerated encoding.
frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast(
cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data
)

# If the user already handed us a matching hardware frame, pass it through.
if vframe.ptr.format == frames_ctx.format:
return vframe

# Convert to the frames context's software format and size before uploading,
# since av_hwframe_transfer_data() does not change pixel format or scale.
if (
vframe.ptr.format != frames_ctx.sw_format
or vframe.ptr.width != frames_ctx.width
or vframe.ptr.height != frames_ctx.height
):
if not self.reformatter:
self.reformatter = VideoReformatter()
vframe = self.reformatter.reformat(
vframe,
frames_ctx.width,
frames_ctx.height,
get_video_format(
frames_ctx.sw_format, frames_ctx.width, frames_ctx.height
),
threads=self.ptr.thread_count,
)

hwframe: VideoFrame = alloc_video_frame()
err_check(lib.av_hwframe_get_buffer(self.ptr.hw_frames_ctx, hwframe.ptr, 0))
err_check(lib.av_hwframe_transfer_data(hwframe.ptr, vframe.ptr, 0))
hwframe._copy_internal_attributes(vframe, data_layout=False)
hwframe._init_user_attributes()

if hwframe.ptr.pts == lib.AV_NOPTS_VALUE:
hwframe.ptr.pts = self.ptr.frame_num

return hwframe

@cython.cfunc
def _prepare_frames_for_encode(self, input: Frame | None) -> list:
if input is None or not input:
return [None]

vframe: VideoFrame = input

# Hardware-accelerated encoding: upload the (software) frame to the device.
if self.ptr.hw_frames_ctx != cython.NULL:
return [self._encode_upload_frame(vframe)]

if (
vframe.format.pix_fmt != self.pix_fmt
or vframe.width != self.ptr.width
Expand Down
1 change: 1 addition & 0 deletions include/avcodec.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ cdef extern from "libavcodec/avcodec.h" nogil:

AVHWAccel *hwaccel
AVBufferRef *hw_device_ctx
AVBufferRef *hw_frames_ctx

int thread_count
int thread_type
Expand Down
2 changes: 2 additions & 0 deletions include/avutil.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,14 @@ cdef extern from "libavutil/hwcontext.h" nogil:
AVPixelFormat sw_format
int width
int height
int initial_pool_size

cdef int av_hwdevice_ctx_create(AVBufferRef **device_ctx, AVHWDeviceType type, const char *device, AVDictionary *opts, int flags)
cdef AVHWDeviceType av_hwdevice_find_type_by_name(const char *name)
cdef const char *av_hwdevice_get_type_name(AVHWDeviceType type)
cdef AVHWDeviceType av_hwdevice_iterate_types(AVHWDeviceType prev)
cdef int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags)
cdef int av_hwframe_get_buffer(AVBufferRef *hwframe_ctx, AVFrame *frame, int flags)

cdef AVBufferRef *av_hwframe_ctx_alloc(AVBufferRef *device_ref)
cdef int av_hwframe_ctx_init(AVBufferRef *ref)
Expand Down
Loading
Loading