PyAV-Org · WyattBlue · Jun 24, 2026 · Jun 24, 2026
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -34,6 +34,7 @@ v18.0.0 (next)
 Breaking:
 
 - Remove Python 3.10
+- Support HW encoding via a ``hwaccel`` parameter on ``OutputContainer.add_stream`` (e.g. ``h264_vaapi``, ``h264_nvenc``, ``h264_videotoolbox``); software frames passed to ``encode`` are uploaded to the device automatically by :gh-user:`WyattBlue` (:issue:`2156`).
 
 Features:
 

diff --git a/av/codec/context.pxd b/av/codec/context.pxd
@@ -41,6 +41,7 @@ cdef class CodecContext:
     # TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing packets
     # are bogus). It should take all info it needs from the context and/or stream.
     cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame)
+    cdef void _setup_encode_hwframes(self)
     cdef list _prepare_frames_for_encode(self, Frame frame)
     cdef _setup_encoded_packet(self, Packet)
     cdef _setup_decoded_frame(self, Frame, Packet)

diff --git a/av/codec/context.py b/av/codec/context.py
@@ -246,6 +246,8 @@ def open(self, strict: cython.bint = True):
                 self.ptr.time_base.num = 1
                 self.ptr.time_base.den = lib.AV_TIME_BASE
 
+        self._setup_encode_hwframes()
+
         err_check(
             lib.avcodec_open2(self.ptr, self.codec.ptr, cython.address(options.ptr)),
             f'avcodec_open2("{self.codec.name}", {self.options})',
@@ -380,6 +382,52 @@ def _send_frame_and_recv(self, frame: Frame | None):
             yield packet
             packet = self._recv_packet()
 
+    @cython.cfunc
+    def _setup_encode_hwframes(self) -> cython.void:
+        # Build the hardware frames context for hardware-accelerated encoding.
+        #
+        # Unlike the device context (attached at construction time), the frames
+        # context depends on the final width/height/pixel format, which the user
+        # sets after add_stream(). We therefore defer it until just before the
+        # codec is opened.
+        if self.hwaccel_ctx is None or not self.is_encoder:
+            return
+        if self.ptr.hw_frames_ctx:
+            return  # Already set up.
+
+        hw_format: lib.AVPixelFormat = self.hwaccel_ctx.config.ptr.pix_fmt
+        sw_format: lib.AVPixelFormat = cython.cast(lib.AVPixelFormat, self.ptr.pix_fmt)
+
+        # The codec context's pix_fmt holds the *software* format the user feeds in.
+        # If they left it as the hardware format (or unset), pick a sane default.
+        if sw_format == hw_format or sw_format == lib.AV_PIX_FMT_NONE:
+            sw_format = lib.av_get_pix_fmt(b"nv12")
+
+        frames_ref: cython.pointer[lib.AVBufferRef] = lib.av_hwframe_ctx_alloc(
+            self.hwaccel_ctx.ptr
+        )
+        if frames_ref == cython.NULL:
+            raise MemoryError("av_hwframe_ctx_alloc() failed")
+
+        try:
+            frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast(
+                cython.pointer[lib.AVHWFramesContext], frames_ref.data
+            )
+            frames_ctx.format = hw_format
+            frames_ctx.sw_format = sw_format
+            frames_ctx.width = self.ptr.width
+            frames_ctx.height = self.ptr.height
+            frames_ctx.initial_pool_size = 32
+            err_check(lib.av_hwframe_ctx_init(frames_ref))
+        except Exception:
+            lib.av_buffer_unref(cython.address(frames_ref))
+            raise
+
+        # Ownership of frames_ref transfers to the codec context.
+        self.ptr.hw_frames_ctx = frames_ref
+        self.ptr.sw_pix_fmt = sw_format
+        self.ptr.pix_fmt = hw_format
+
     @cython.cfunc
     def _prepare_frames_for_encode(self, frame: Frame | None) -> list:
         return [frame]

diff --git a/av/codec/hwaccel.py b/av/codec/hwaccel.py
@@ -140,10 +140,17 @@ def __init__(
         self.ptr = cython.NULL
         self.config = None
 
-    def _initialize_hw_context(self, codec: Codec):
+    def _initialize_hw_context(self, codec: Codec, for_encoding: bool = False):
+        # Decoders advertise the device-context method, while encoders (e.g.
+        # h264_vaapi) advertise the frames-context method. Accept either one when
+        # setting up an encoder.
+        supported_methods: cython.int = lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX
+        if for_encoding:
+            supported_methods |= lib.AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX
+
         config: HWConfig
         for config in codec.hardware_configs:
-            if not (config.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX):
+            if not (config.ptr.methods & supported_methods):
                 continue
             if self._device_type and config.device_type != self._device_type:
                 continue
@@ -168,7 +175,7 @@ def _initialize_hw_context(self, codec: Codec):
             )
         )
 
-    def create(self, codec: Codec) -> HWAccel:
+    def create(self, codec: Codec, for_encoding: bool = False) -> HWAccel:
         """Create a new hardware accelerator context with the given codec"""
         if self.ptr:
             raise RuntimeError("Hardware context already initialized")
@@ -180,7 +187,7 @@ def create(self, codec: Codec) -> HWAccel:
             options=self.options,
             is_hw_owned=self.is_hw_owned,
         )
-        ret._initialize_hw_context(codec)
+        ret._initialize_hw_context(codec, for_encoding=for_encoding)
         return ret
 
     def __dealloc__(self):

diff --git a/av/codec/hwaccel.pyi b/av/codec/hwaccel.pyi
@@ -52,6 +52,6 @@ class HWAccel:
         flags: int | None = None,
         is_hw_owned: bool = False,
     ) -> None: ...
-    def create(self, codec: Codec) -> HWAccel: ...
+    def create(self, codec: Codec, for_encoding: bool = False) -> HWAccel: ...
 
 def hwdevices_available() -> list[str]: ...
diff --git a/av/container/output.py b/av/container/output.py
@@ -5,6 +5,7 @@
 from cython.cimports.av.bitstream import BitStreamFilterContext
 from cython.cimports.av.codec.codec import Codec
 from cython.cimports.av.codec.context import CodecContext, wrap_codec_context
+from cython.cimports.av.codec.hwaccel import HWAccel
 from cython.cimports.av.container.streams import StreamContainer
 from cython.cimports.av.dictionary import Dictionary
 from cython.cimports.av.error import err_check
@@ -79,15 +80,26 @@ def __dealloc__(self):
         with cython.nogil:
             lib.av_packet_free(cython.address(self.packet_ptr))
 
-    def add_stream(self, codec_name, rate=None, options: dict | None = None, **kwargs):
-        """add_stream(codec_name, rate=None)
+    def add_stream(
+        self,
+        codec_name,
+        rate=None,
+        options: dict | None = None,
+        hwaccel: HWAccel | None = None,
+        **kwargs,
+    ):
+        """add_stream(codec_name, rate=None, *, hwaccel=None)
 
         Creates a new stream from a codec name and returns it.
         Supports video, audio, and subtitle streams.
 
         :param codec_name: The name of a codec.
         :type codec_name: str
         :param dict options: Stream options.
+        :param HWAccel hwaccel: Optional settings for hardware-accelerated encoding.
+            Only applies to video streams (e.g. ``h264_vaapi``); software frames
+            passed to :meth:`~av.codec.context.CodecContext.encode` are uploaded to
+            the device automatically.
         :param \\**kwargs: Set attributes for the stream.
         :rtype: The new :class:`~av.stream.Stream`.
 
@@ -164,7 +176,7 @@ def add_stream(self, codec_name, rate=None, options: dict | None = None, **kwarg
         err_check(lib.avcodec_parameters_from_context(stream.codecpar, ctx))
 
         # Construct the user-land stream
-        py_codec_context: CodecContext = wrap_codec_context(ctx, codec, None)
+        py_codec_context: CodecContext = wrap_codec_context(ctx, codec, hwaccel)
         py_stream: Stream = wrap_stream(self, stream, py_codec_context)
         self.streams.add_stream(py_stream)
 

diff --git a/av/container/output.pyi b/av/container/output.pyi
@@ -4,6 +4,7 @@ from typing import TypeVar, overload
 
 from av.audio import _AudioCodecName
 from av.audio.stream import AudioStream
+from av.codec.hwaccel import HWAccel
 from av.packet import Packet
 from av.stream import AttachmentStream, DataStream, Stream
 from av.subtitles.stream import SubtitleStream
@@ -29,6 +30,7 @@ class OutputContainer(Container):
         codec_name: _VideoCodecName,
         rate: Fraction | int | None = None,
         options: dict[str, str] | None = None,
+        hwaccel: HWAccel | None = None,
         **kwargs,
     ) -> VideoStream: ...
     @overload
@@ -37,6 +39,7 @@ class OutputContainer(Container):
         codec_name: str,
         rate: Fraction | int | None = None,
         options: dict[str, str] | None = None,
+        hwaccel: HWAccel | None = None,
         **kwargs,
     ) -> VideoStream | AudioStream | SubtitleStream: ...
     def add_mux_stream(

diff --git a/av/video/codeccontext.pxd b/av/video/codeccontext.pxd
@@ -19,3 +19,4 @@ cdef class VideoCodecContext(CodecContext):
     cdef AVCodecPrivateData _private_data
     cdef readonly VideoReformatter reformatter
     cdef VideoFrame next_frame
+    cdef VideoFrame _encode_upload_frame(self, VideoFrame vframe)
diff --git a/av/video/codeccontext.py b/av/video/codeccontext.py
@@ -49,36 +49,91 @@ def _init(
     ):
         CodecContext._init(self, ptr, codec, hwaccel)
 
-        if hwaccel is not None:
-            try:
-                self.hwaccel_ctx = hwaccel.create(self.codec)
-                self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
-                self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
-                self.ptr.get_format = _get_hw_format
-                self._private_data.hardware_pix_fmt = (
-                    self.hwaccel_ctx.config.ptr.pix_fmt
-                )
-                self._private_data.allow_software_fallback = (
-                    self.hwaccel.allow_software_fallback
-                )
-                self.ptr.opaque = cython.address(self._private_data)
-            except NotImplementedError:
-                # Some streams may not have a hardware decoder. For example, many action
-                # cam videos have a low resolution mjpeg stream, which is usually not
-                # compatible with hardware decoders.
-                # The user may have passed in a hwaccel because they want to decode the main
-                # stream with it, so we shouldn't abort even if we find a stream that can't
-                # be HW decoded.
-                # If the user wants to make sure hwaccel is actually used, they can check with the
-                # is_hwaccel() function on each stream's codec context.
-                self.hwaccel_ctx = None
+        if hwaccel is None:
+            return
+
+        if self.is_encoder:
+            # Hardware-accelerated encoding. We only attach the device context here;
+            # the hardware frames context depends on the final width/height/pixel
+            # format (set by the user after add_stream()), so it is built lazily in
+            # CodecContext.open() via _setup_encode_hwframes().
+            self.hwaccel_ctx = hwaccel.create(self.codec, for_encoding=True)
+            self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
+            return
+
+        try:
+            self.hwaccel_ctx = hwaccel.create(self.codec)
+            self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
+            self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
+            self.ptr.get_format = _get_hw_format
+            self._private_data.hardware_pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
+            self._private_data.allow_software_fallback = (
+                self.hwaccel.allow_software_fallback
+            )
+            self.ptr.opaque = cython.address(self._private_data)
+        except NotImplementedError:
+            # Some streams may not have a hardware decoder. For example, many action
+            # cam videos have a low resolution mjpeg stream, which is usually not
+            # compatible with hardware decoders.
+            # The user may have passed in a hwaccel because they want to decode the main
+            # stream with it, so we shouldn't abort even if we find a stream that can't
+            # be HW decoded.
+            # If the user wants to make sure hwaccel is actually used, they can check with the
+            # is_hwaccel() function on each stream's codec context.
+            self.hwaccel_ctx = None
+
+    @cython.cfunc
+    def _encode_upload_frame(self, vframe: VideoFrame) -> VideoFrame:
+        # Upload a software frame onto the device for hardware-accelerated encoding.
+        frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast(
+            cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data
+        )
+
+        # If the user already handed us a matching hardware frame, pass it through.
+        if vframe.ptr.format == frames_ctx.format:
+            return vframe
+
+        # Convert to the frames context's software format and size before uploading,
+        # since av_hwframe_transfer_data() does not change pixel format or scale.
+        if (
+            vframe.ptr.format != frames_ctx.sw_format
+            or vframe.ptr.width != frames_ctx.width
+            or vframe.ptr.height != frames_ctx.height
+        ):
+            if not self.reformatter:
+                self.reformatter = VideoReformatter()
+            vframe = self.reformatter.reformat(
+                vframe,
+                frames_ctx.width,
+                frames_ctx.height,
+                get_video_format(
+                    frames_ctx.sw_format, frames_ctx.width, frames_ctx.height
+                ),
+                threads=self.ptr.thread_count,
+            )
+
+        hwframe: VideoFrame = alloc_video_frame()
+        err_check(lib.av_hwframe_get_buffer(self.ptr.hw_frames_ctx, hwframe.ptr, 0))
+        err_check(lib.av_hwframe_transfer_data(hwframe.ptr, vframe.ptr, 0))
+        hwframe._copy_internal_attributes(vframe, data_layout=False)
+        hwframe._init_user_attributes()
+
+        if hwframe.ptr.pts == lib.AV_NOPTS_VALUE:
+            hwframe.ptr.pts = self.ptr.frame_num
+
+        return hwframe
 
     @cython.cfunc
     def _prepare_frames_for_encode(self, input: Frame | None) -> list:
         if input is None or not input:
             return [None]
 
         vframe: VideoFrame = input
+
+        # Hardware-accelerated encoding: upload the (software) frame to the device.
+        if self.ptr.hw_frames_ctx != cython.NULL:
+            return [self._encode_upload_frame(vframe)]
+
         if (
             vframe.format.pix_fmt != self.pix_fmt
             or vframe.width != self.ptr.width

diff --git a/include/avcodec.pxd b/include/avcodec.pxd
@@ -274,6 +274,7 @@ cdef extern from "libavcodec/avcodec.h" nogil:
 
         AVHWAccel *hwaccel
         AVBufferRef *hw_device_ctx
+        AVBufferRef *hw_frames_ctx
 
         int thread_count
         int thread_type

diff --git a/include/avutil.pxd b/include/avutil.pxd
@@ -216,12 +216,14 @@ cdef extern from "libavutil/hwcontext.h" nogil:
         AVPixelFormat sw_format
         int width
         int height
+        int initial_pool_size
 
     cdef int av_hwdevice_ctx_create(AVBufferRef **device_ctx, AVHWDeviceType type, const char *device, AVDictionary *opts, int flags)
     cdef AVHWDeviceType av_hwdevice_find_type_by_name(const char *name)
     cdef const char *av_hwdevice_get_type_name(AVHWDeviceType type)
     cdef AVHWDeviceType av_hwdevice_iterate_types(AVHWDeviceType prev)
     cdef int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags)
+    cdef int av_hwframe_get_buffer(AVBufferRef *hwframe_ctx, AVFrame *frame, int flags)
 
     cdef AVBufferRef *av_hwframe_ctx_alloc(AVBufferRef *device_ref)
     cdef int av_hwframe_ctx_init(AVBufferRef *ref)