From edfe3514e14747aa3d96784070dcf377c714680d Mon Sep 17 00:00:00 2001 From: Robin RICHARD Date: Wed, 15 Jan 2025 16:13:45 +0100 Subject: [PATCH 1/6] add support for grayf32le and gbrapf32le --- av/video/frame.pyx | 63 ++++++++++++++++++++++++++++++---------- tests/test_videoframe.py | 35 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 16 deletions(-) diff --git a/av/video/frame.pyx b/av/video/frame.pyx index 02cde3187..67c03b72c 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -295,6 +295,8 @@ cdef class VideoFrame(Frame): .. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned, with the palette being in ARGB (PyAV will swap bytes if needed). + .. note:: For ``gbrp`` formats, channels are fliped in RGB order. + """ cdef VideoFrame frame = self.reformat(**kwargs) @@ -312,7 +314,7 @@ cdef class VideoFrame(Frame): return np.hstack(( useful_array(frame.planes[0]), useful_array(frame.planes[1]), - useful_array(frame.planes[2]) + useful_array(frame.planes[2]), )).reshape(-1, frame.height, frame.width) elif frame.format.name == "yuyv422": assert frame.width % 2 == 0 @@ -320,21 +322,28 @@ cdef class VideoFrame(Frame): return useful_array(frame.planes[0], 2).reshape(frame.height, frame.width, -1) elif frame.format.name == "gbrp": array = np.empty((frame.height, frame.width, 3), dtype="uint8") - array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(-1, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(-1, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(-1, frame.width) + array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(frame.height, frame.width) return array elif frame.format.name in ("gbrp10be", "gbrp12be", "gbrp14be", "gbrp16be", "gbrp10le", "gbrp12le", "gbrp14le", "gbrp16le"): array = np.empty((frame.height, frame.width, 3), dtype="uint16") - array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(-1, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(-1, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(-1, frame.width) + array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(frame.height, frame.width) return byteswap_array(array, frame.format.name.endswith("be")) elif frame.format.name in ("gbrpf32be", "gbrpf32le"): array = np.empty((frame.height, frame.width, 3), dtype="float32") - array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(-1, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(-1, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(-1, frame.width) + array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width) + return byteswap_array(array, frame.format.name.endswith("be")) + elif frame.format.name in ("gbrapf32be", "gbrapf32le"): + array = np.empty((frame.height, frame.width, 4), dtype="float32") + array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 3] = useful_array(frame.planes[3], 4, "float32").reshape(frame.height, frame.width) return byteswap_array(array, frame.format.name.endswith("be")) elif frame.format.name in ("rgb24", "bgr24"): return useful_array(frame.planes[0], 3).reshape(frame.height, frame.width, -1) @@ -345,17 +354,22 @@ cdef class VideoFrame(Frame): elif frame.format.name in ("gray16be", "gray16le"): return byteswap_array( useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width), - frame.format.name == "gray16be", + frame.format.name.endswith("be"), + ) + elif frame.format.name in ("grayf32be", "grayf32le"): + return byteswap_array( + useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width), + frame.format.name.endswith("be"), ) elif frame.format.name in ("rgb48be", "rgb48le"): return byteswap_array( useful_array(frame.planes[0], 6, "uint16").reshape(frame.height, frame.width, -1), - frame.format.name == "rgb48be", + frame.format.name.endswith("be"), ) elif frame.format.name in ("rgba64be", "rgba64le"): return byteswap_array( useful_array(frame.planes[0], 8, "uint16").reshape(frame.height, frame.width, -1), - frame.format.name == "rgba64be", + frame.format.name.endswith("be"), ) elif frame.format.name == "pal8": image = useful_array(frame.planes[0]).reshape(frame.height, frame.width) @@ -491,6 +505,8 @@ cdef class VideoFrame(Frame): must be in the system's native byte order. .. note:: for ``pal8``, an ``(image, palette)`` pair must be passed. `palette` must have shape (256, 4) and is given in ARGB format (PyAV will swap bytes if needed). + + .. note:: for ``gbrp`` formats, channels are assumed to be given in RGB order. """ if format == "pal8": array, palette = array @@ -568,19 +584,34 @@ cdef class VideoFrame(Frame): elif format in ("gray16be", "gray16le"): check_ndarray(array, "uint16", 2) frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format == "gray16be"), frame.planes[0], 2) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 2) + return frame + elif format in ("grayf32be", "grayf32le"): + check_ndarray(array, "float32", 2) + frame = VideoFrame(array.shape[1], array.shape[0], format) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 4) return frame elif format in ("rgb48be", "rgb48le"): check_ndarray(array, "uint16", 3) check_ndarray_shape(array, array.shape[2] == 3) frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format == "rgb48be"), frame.planes[0], 6) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 6) return frame elif format in ("rgba64be", "rgba64le"): check_ndarray(array, "uint16", 3) check_ndarray_shape(array, array.shape[2] == 4) frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format == "rgba64be"), frame.planes[0], 8) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 8) + return frame + elif format in ("gbrapf32be", "gbrapf32le"): + check_ndarray(array, "float32", 3) + check_ndarray_shape(array, array.shape[2] == 4) + + frame = VideoFrame(array.shape[1], array.shape[0], format) + copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 4) + copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 4) + copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 4) + copy_array_to_plane(byteswap_array(array[:, :, 3], format.endswith("be")), frame.planes[3], 4) return frame elif format == "nv12": check_ndarray(array, "uint8", 2) diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py index f044be949..256fd5035 100644 --- a/tests/test_videoframe.py +++ b/tests/test_videoframe.py @@ -222,6 +222,23 @@ def test_ndarray_gray_align() -> None: assert frame.format.name == "gray" assertNdarraysEqual(frame.to_ndarray(), array) +def test_ndarray_grayf32() -> None: + array = numpy.random.random_sample(size=(480, 640)).astype(numpy.float32) + for format in ("grayf32be", "grayf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_grayf32_align() -> None: + array = numpy.random.random_sample(size=(238, 318)).astype(numpy.float32) + for format in ("grayf32be", "grayf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 318 and frame.height == 238 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + def test_ndarray_rgb() -> None: array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8) @@ -365,6 +382,24 @@ def test_ndarray_gbrpf32_align() -> None: assertNdarraysEqual(frame.to_ndarray(), array) +def test_ndarray_gbrapf32() -> None: + array = numpy.random.random_sample(size=(480, 640, 4)).astype(numpy.float32) + for format in ("gbrapf32be", "gbrapf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_gbrapf32_allign() -> None: + array = numpy.random.random_sample(size=(238, 318, 4)).astype(numpy.float32) + for format in ("gbrapf32be", "gbrapf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 318 and frame.height == 238 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + def test_ndarray_yuv420p() -> None: array = numpy.random.randint(0, 256, size=(720, 640), dtype=numpy.uint8) frame = VideoFrame.from_ndarray(array, format="yuv420p") From df75ae8c071801141ec7355c173c6a8b2560e497 Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Wed, 15 Jan 2025 12:09:10 -0500 Subject: [PATCH 2/6] Fixes --- CHANGELOG.rst | 1 + av/video/frame.pyx | 2 +- tests/test_videoframe.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index eed7b2528..7baadb4a8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -24,6 +24,7 @@ Features - Add hardware decoding by :gh-user:`matthewlai` and :gh-user:`WyattBlue` in (:pr:`1685`). - Add ``VideoFrame.rotation`` by :gh-user:`lgeiger` in (:pr:`1675`). +- Support grayf32le and gbrapf32le in numpy convertion by :gh-user:`robinechuca` in (:pr:`1712`). v14.0.1 diff --git a/av/video/frame.pyx b/av/video/frame.pyx index 67c03b72c..6e4a1dbdf 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -295,7 +295,7 @@ cdef class VideoFrame(Frame): .. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned, with the palette being in ARGB (PyAV will swap bytes if needed). - .. note:: For ``gbrp`` formats, channels are fliped in RGB order. + .. note:: For ``gbrp`` formats, channels are flipped to RGB order. """ cdef VideoFrame frame = self.reformat(**kwargs) diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py index 256fd5035..250641676 100644 --- a/tests/test_videoframe.py +++ b/tests/test_videoframe.py @@ -222,6 +222,7 @@ def test_ndarray_gray_align() -> None: assert frame.format.name == "gray" assertNdarraysEqual(frame.to_ndarray(), array) + def test_ndarray_grayf32() -> None: array = numpy.random.random_sample(size=(480, 640)).astype(numpy.float32) for format in ("grayf32be", "grayf32le"): From 19c1f9d46755f7f70e27c238403833b3b3cee47c Mon Sep 17 00:00:00 2001 From: Robin RICHARD Date: Mon, 20 Jan 2025 15:08:47 +0100 Subject: [PATCH 3/6] add support of yuv[a]p16, and refactor to_ndarray and from_ndarray --- av/video/frame.pyx | 285 +++++++++++++++++++-------------------- tests/test_videoframe.py | 40 +++++- 2 files changed, 180 insertions(+), 145 deletions(-) diff --git a/av/video/frame.pyx b/av/video/frame.pyx index 6e4a1dbdf..c9793c3c7 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -282,11 +282,17 @@ cdef class VideoFrame(Frame): return Image.frombytes("RGB", (plane.width, plane.height), bytes(o_buf), "raw", "RGB", 0, 1) - def to_ndarray(self, **kwargs): + def to_ndarray(self, skip_channel: bool=True, gbr_to_rgb: bool=True, **kwargs): """Get a numpy array of this frame. Any ``**kwargs`` are passed to :meth:`.VideoReformatter.reformat`. + The array returned is generally of dimension (height, width, channels). + + :param bool skip_channel: If True, squeeze the channel dimension for grayscale frames. + :param bool gbr_to_rgb: If True, for ``gbrp`` formats, + channels are flipped to RGB order for backward compatibility. + .. note:: Numpy must be installed. .. note:: For formats which return an array of ``uint16`, the samples @@ -295,95 +301,97 @@ cdef class VideoFrame(Frame): .. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned, with the palette being in ARGB (PyAV will swap bytes if needed). - .. note:: For ``gbrp`` formats, channels are flipped to RGB order. - """ cdef VideoFrame frame = self.reformat(**kwargs) import numpy as np - if frame.format.name in ("yuv420p", "yuvj420p"): - assert frame.width % 2 == 0 - assert frame.height % 2 == 0 - return np.hstack(( - useful_array(frame.planes[0]), - useful_array(frame.planes[1]), - useful_array(frame.planes[2]) - )).reshape(-1, frame.width) - elif frame.format.name in ("yuv444p", "yuvj444p"): - return np.hstack(( + # check size + if frame.format.name in {"yuv420p", "yuvj420p", "yuyv422"}: + assert frame.width % 2 == 0, "the width has to be even for this pixel format" + assert frame.height % 2 == 0, "the height has to be even for this pixel format" + + # cases planes are simply concatenated in shape (height, width, channels) + itemsize, dtype = { + "abgr": (4, "uint8"), + "argb": (4, "uint8"), + "bgr24": (3, "uint8"), + "bgr8": (1, "uint8"), + "bgra": (4, "uint8"), + "gbrapf32be": (4, "float32"), + "gbrapf32le": (4, "float32"), + "gbrp": (1, "uint8"), + "gbrp10be": (2, "uint16"), + "gbrp10le": (2, "uint16"), + "gbrp12be": (2, "uint16"), + "gbrp12le": (2, "uint16"), + "gbrp14be": (2, "uint16"), + "gbrp14le": (2, "uint16"), + "gbrp16be": (2, "uint16"), + "gbrp16le": (2, "uint16"), + "gbrpf32be": (4, "float32"), + "gbrpf32le": (4, "float32"), + "gray": (1, "uint8"), + "gray16be": (2, "uint16"), + "gray16le": (2, "uint16"), + "gray8": (1, "uint8"), + "grayf32be": (4, "float32"), + "grayf32le": (4, "float32"), + "rgb24": (3, "uint8"), + "rgb48be": (6, "uint16"), + "rgb48le": (6, "uint16"), + "rgb8": (1, "uint8"), + "rgba": (4, "uint8"), + "rgba64be": (8, "uint16"), + "rgba64le": (8, "uint16"), + "yuv444p": (1, "uint8"), + "yuv444p16be": (2, "uint16"), + "yuv444p16le": (2, "uint16"), + "yuva444p16be": (2, "uint16"), + "yuva444p16le": (2, "uint16"), + "yuvj444p": (1, "uint8"), + "yuyv422": (2, "uint8"), + }.get(frame.format.name, (None, None)) + if itemsize is not None: + layers = [ + useful_array(plan, itemsize, dtype) + .reshape(frame.height, frame.width, -1) + for plan in frame.planes + ] + if len(layers) == 1: # shortcut, avoid memory copy + array = layers[0] + else: # general case + array = np.concatenate(layers, axis=2) + array = byteswap_array(array, frame.format.name.endswith("be")) + if array.shape[2] == 1 and skip_channel: + return array.squeeze(2) + if gbr_to_rgb and frame.format.name.startswith("gbr"): + buffer = array[:, :, 0].copy() + array[:, :, 0] = array[:, :, 2] + array[:, :, 2] = array[:, :, 1] + array[:, :, 1] = buffer + return array + + # special cases + if frame.format.name in {"yuv420p", "yuvj420p"}: + return np.hstack([ useful_array(frame.planes[0]), useful_array(frame.planes[1]), useful_array(frame.planes[2]), - )).reshape(-1, frame.height, frame.width) - elif frame.format.name == "yuyv422": - assert frame.width % 2 == 0 - assert frame.height % 2 == 0 - return useful_array(frame.planes[0], 2).reshape(frame.height, frame.width, -1) - elif frame.format.name == "gbrp": - array = np.empty((frame.height, frame.width, 3), dtype="uint8") - array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(frame.height, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(frame.height, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(frame.height, frame.width) - return array - elif frame.format.name in ("gbrp10be", "gbrp12be", "gbrp14be", "gbrp16be", "gbrp10le", "gbrp12le", "gbrp14le", "gbrp16le"): - array = np.empty((frame.height, frame.width, 3), dtype="uint16") - array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(frame.height, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(frame.height, frame.width) - return byteswap_array(array, frame.format.name.endswith("be")) - elif frame.format.name in ("gbrpf32be", "gbrpf32le"): - array = np.empty((frame.height, frame.width, 3), dtype="float32") - array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width) - return byteswap_array(array, frame.format.name.endswith("be")) - elif frame.format.name in ("gbrapf32be", "gbrapf32le"): - array = np.empty((frame.height, frame.width, 4), dtype="float32") - array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width) - array[:, :, 3] = useful_array(frame.planes[3], 4, "float32").reshape(frame.height, frame.width) - return byteswap_array(array, frame.format.name.endswith("be")) - elif frame.format.name in ("rgb24", "bgr24"): - return useful_array(frame.planes[0], 3).reshape(frame.height, frame.width, -1) - elif frame.format.name in ("argb", "rgba", "abgr", "bgra"): - return useful_array(frame.planes[0], 4).reshape(frame.height, frame.width, -1) - elif frame.format.name in ("gray", "gray8", "rgb8", "bgr8"): - return useful_array(frame.planes[0]).reshape(frame.height, frame.width) - elif frame.format.name in ("gray16be", "gray16le"): - return byteswap_array( - useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width), - frame.format.name.endswith("be"), - ) - elif frame.format.name in ("grayf32be", "grayf32le"): - return byteswap_array( - useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width), - frame.format.name.endswith("be"), - ) - elif frame.format.name in ("rgb48be", "rgb48le"): - return byteswap_array( - useful_array(frame.planes[0], 6, "uint16").reshape(frame.height, frame.width, -1), - frame.format.name.endswith("be"), - ) - elif frame.format.name in ("rgba64be", "rgba64le"): - return byteswap_array( - useful_array(frame.planes[0], 8, "uint16").reshape(frame.height, frame.width, -1), - frame.format.name.endswith("be"), - ) - elif frame.format.name == "pal8": + ]).reshape(-1, frame.width) + if frame.format.name == "pal8": image = useful_array(frame.planes[0]).reshape(frame.height, frame.width) palette = np.frombuffer(frame.planes[1], "i4").astype(">i4").reshape(-1, 1).view(np.uint8) return image, palette - elif frame.format.name == "nv12": - return np.hstack(( + if frame.format.name == "nv12": + return np.hstack([ useful_array(frame.planes[0]), - useful_array(frame.planes[1], 2) - )).reshape(-1, frame.width) - else: - raise ValueError( - f"Conversion to numpy array with format `{frame.format.name}` is not yet supported" - ) + useful_array(frame.planes[1], 2), + ]).reshape(-1, frame.width) + + raise ValueError( + f"Conversion to numpy array with format `{frame.format.name}` is not yet supported" + ) @staticmethod def from_image(img): @@ -497,17 +505,67 @@ cdef class VideoFrame(Frame): self._init_user_attributes() @staticmethod - def from_ndarray(array, format="rgb24"): + def from_ndarray(array, format="rgb24", rgb_to_gbr: bool=True): """ Construct a frame from a numpy array. + :param bool rgb_to_gbr: If True, for ``gbrp`` formats, + channels are assumed to be given in RGB order, for backward compatibility. + .. note:: For formats which expect an array of ``uint16``, the samples must be in the system's native byte order. .. note:: for ``pal8``, an ``(image, palette)`` pair must be passed. `palette` must have shape (256, 4) and is given in ARGB format (PyAV will swap bytes if needed). - .. note:: for ``gbrp`` formats, channels are assumed to be given in RGB order. """ + import numpy as np + + # case layers are concatenated + channels, itemsize, dtype = { + "yuv444p": (3, 1, "uint8"), + "yuvj444p": (3, 1, "uint8"), + "gbrp": (3, 1, "uint8"), + "gbrp10be": (3, 2, "uint16"), + "gbrp12be": (3, 2, "uint16"), + "gbrp14be": (3, 2, "uint16"), + "gbrp16be": (3, 2, "uint16"), + "gbrp10le": (3, 2, "uint16"), + "gbrp12le": (3, 2, "uint16"), + "gbrp14le": (3, 2, "uint16"), + "gbrp16le": (3, 2, "uint16"), + "gbrpf32be": (3, 4, "float32"), + "gbrpf32le": (3, 4, "float32"), + "gray": (1, 1, "uint8"), + "gray8": (1, 1, "uint8"), + "rgb8": (1, 1, "uint8"), + "bgr8": (1, 1, "uint8"), + "gray16be": (1, 2, "uint16"), + "gray16le": (1, 2, "uint16"), + "grayf32be": (1, 4, "float32"), + "grayf32le": (1, 4, "float32"), + "gbrapf32be": (4, 4, "float32"), + "gbrapf32le": (4, 4, "float32"), + "yuv444p16be": (3, 2, "uint16"), + "yuv444p16le": (3, 2, "uint16"), + "yuva444p16be": (4, 2, "uint16"), + "yuva444p16le": (4, 2, "uint16"), + }.get(format, (None, None, None)) + if channels is not None: + if array.ndim == 2: # (height, width) -> (height, width, 1) + array = array[:, :, None] + check_ndarray(array, dtype, 3) + check_ndarray_shape(array, array.shape[2] == channels) + array = byteswap_array(array, format.endswith("be")) + frame = VideoFrame(array.shape[1], array.shape[0], format) + if rgb_to_gbr and frame.format.name.startswith("gbr"): + array = np.concatenate([ # not inplace to avoid bad surprises + array[:, :, 1:3], array[:, :, 0:1], array[:, :, 3:], + ], axis=2) + for i in range(channels): + copy_array_to_plane(array[:, :, i], frame.planes[i], itemsize) + return frame + + # other cases if format == "pal8": array, palette = array check_ndarray(array, "uint8", 2) @@ -531,88 +589,29 @@ cdef class VideoFrame(Frame): copy_array_to_plane(flat[u_start:v_start], frame.planes[1], 1) copy_array_to_plane(flat[v_start:], frame.planes[2], 1) return frame - elif format in ("yuv444p", "yuvj444p"): - check_ndarray(array, "uint8", 3) - check_ndarray_shape(array, array.shape[0] == 3) - - frame = VideoFrame(array.shape[2], array.shape[1], format) - array = array.reshape(3, -1) - copy_array_to_plane(array[0], frame.planes[0], 1) - copy_array_to_plane(array[1], frame.planes[1], 1) - copy_array_to_plane(array[2], frame.planes[2], 1) - return frame elif format == "yuyv422": check_ndarray(array, "uint8", 3) check_ndarray_shape(array, array.shape[0] % 2 == 0) check_ndarray_shape(array, array.shape[1] % 2 == 0) check_ndarray_shape(array, array.shape[2] == 2) - elif format == "gbrp": - check_ndarray(array, "uint8", 3) - check_ndarray_shape(array, array.shape[2] == 3) - - frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(array[:, :, 1], frame.planes[0], 1) - copy_array_to_plane(array[:, :, 2], frame.planes[1], 1) - copy_array_to_plane(array[:, :, 0], frame.planes[2], 1) - return frame - elif format in ("gbrp10be", "gbrp12be", "gbrp14be", "gbrp16be", "gbrp10le", "gbrp12le", "gbrp14le", "gbrp16le"): - check_ndarray(array, "uint16", 3) - check_ndarray_shape(array, array.shape[2] == 3) - - frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 2) - copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 2) - copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 2) - return frame - elif format in ("gbrpf32be", "gbrpf32le"): - check_ndarray(array, "float32", 3) - check_ndarray_shape(array, array.shape[2] == 3) - - frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 4) - copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 4) - copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 4) - return frame - elif format in ("rgb24", "bgr24"): + elif format in {"rgb24", "bgr24"}: check_ndarray(array, "uint8", 3) check_ndarray_shape(array, array.shape[2] == 3) - elif format in ("argb", "rgba", "abgr", "bgra"): + elif format in {"argb", "rgba", "abgr", "bgra"}: check_ndarray(array, "uint8", 3) check_ndarray_shape(array, array.shape[2] == 4) - elif format in ("gray", "gray8", "rgb8", "bgr8"): - check_ndarray(array, "uint8", 2) - elif format in ("gray16be", "gray16le"): - check_ndarray(array, "uint16", 2) - frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 2) - return frame - elif format in ("grayf32be", "grayf32le"): - check_ndarray(array, "float32", 2) - frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 4) - return frame - elif format in ("rgb48be", "rgb48le"): + elif format in {"rgb48be", "rgb48le"}: check_ndarray(array, "uint16", 3) check_ndarray_shape(array, array.shape[2] == 3) frame = VideoFrame(array.shape[1], array.shape[0], format) copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 6) return frame - elif format in ("rgba64be", "rgba64le"): + elif format in {"rgba64be", "rgba64le"}: check_ndarray(array, "uint16", 3) check_ndarray_shape(array, array.shape[2] == 4) frame = VideoFrame(array.shape[1], array.shape[0], format) copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 8) return frame - elif format in ("gbrapf32be", "gbrapf32le"): - check_ndarray(array, "float32", 3) - check_ndarray_shape(array, array.shape[2] == 4) - - frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 4) - copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 4) - copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 4) - copy_array_to_plane(byteswap_array(array[:, :, 3], format.endswith("be")), frame.planes[3], 4) - return frame elif format == "nv12": check_ndarray(array, "uint8", 2) check_ndarray_shape(array, array.shape[0] % 3 == 0) diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py index 250641676..19471b673 100644 --- a/tests/test_videoframe.py +++ b/tests/test_videoframe.py @@ -434,7 +434,7 @@ def test_ndarray_yuyv422() -> None: def test_ndarray_yuv444p() -> None: - array = numpy.random.randint(0, 256, size=(3, 480, 640), dtype=numpy.uint8) + array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8) frame = VideoFrame.from_ndarray(array, format="yuv444p") assert frame.width == 640 and frame.height == 480 assert frame.format.name == "yuv444p" @@ -442,13 +442,49 @@ def test_ndarray_yuv444p() -> None: def test_ndarray_yuvj444p() -> None: - array = numpy.random.randint(0, 256, size=(3, 480, 640), dtype=numpy.uint8) + array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8) frame = VideoFrame.from_ndarray(array, format="yuvj444p") assert frame.width == 640 and frame.height == 480 assert frame.format.name == "yuvj444p" assertNdarraysEqual(frame.to_ndarray(), array) +def test_ndarray_yuv444p16() -> None: + array = numpy.random.randint(0, 65536, size=(480, 640, 3), dtype=numpy.uint16) + for format in ("yuv444p16be", "yuv444p16le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_yuv444p16_allign() -> None: + array = numpy.random.randint(0, 65536, size=(238, 318, 3), dtype=numpy.uint16) + for format in ("yuv444p16be", "yuv444p16le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 318 and frame.height == 238 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_yuva444p16() -> None: + array = numpy.random.randint(0, 65536, size=(480, 640, 4), dtype=numpy.uint16) + for format in ("yuva444p16be", "yuva444p16le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_yuva444p16_allign() -> None: + array = numpy.random.randint(0, 65536, size=(238, 318, 4), dtype=numpy.uint16) + for format in ("yuva444p16be", "yuva444p16le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 318 and frame.height == 238 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + def test_ndarray_yuyv422_align() -> None: array = numpy.random.randint(0, 256, size=(238, 318, 2), dtype=numpy.uint8) frame = VideoFrame.from_ndarray(array, format="yuyv422") From 826eb321a53274cb8b12e6246f4360b8781eebf9 Mon Sep 17 00:00:00 2001 From: Robin RICHARD Date: Tue, 21 Jan 2025 09:28:04 +0100 Subject: [PATCH 4/6] fix broxken backward compatibility --- av/video/frame.pyx | 22 +++++++++++++++++++--- tests/test_videoframe.py | 4 ++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/av/video/frame.pyx b/av/video/frame.pyx index c9793c3c7..4d8ffced0 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -282,7 +282,13 @@ cdef class VideoFrame(Frame): return Image.frombytes("RGB", (plane.width, plane.height), bytes(o_buf), "raw", "RGB", 0, 1) - def to_ndarray(self, skip_channel: bool=True, gbr_to_rgb: bool=True, **kwargs): + def to_ndarray( + self, + skip_channel: bool=True, + gbr_to_rgb: bool=True, + yuv444p_channel_first: bool=True, + **kwargs + ): """Get a numpy array of this frame. Any ``**kwargs`` are passed to :meth:`.VideoReformatter.reformat`. @@ -292,6 +298,9 @@ cdef class VideoFrame(Frame): :param bool skip_channel: If True, squeeze the channel dimension for grayscale frames. :param bool gbr_to_rgb: If True, for ``gbrp`` formats, channels are flipped to RGB order for backward compatibility. + :param bool yuv444p_channel_first: If True, the shape for the yuv444p and yuvj444p + will be (channels, height, width) rather than (height, width, channels) as usual. + This is for backward compatibility. .. note:: Numpy must be installed. @@ -370,6 +379,8 @@ cdef class VideoFrame(Frame): array[:, :, 0] = array[:, :, 2] array[:, :, 2] = array[:, :, 1] array[:, :, 1] = buffer + if yuv444p_channel_first and frame.format.name in {"yuv444p", "yuvj444p"}: + array = np.moveaxis(array, 2, 0) return array # special cases @@ -505,12 +516,15 @@ cdef class VideoFrame(Frame): self._init_user_attributes() @staticmethod - def from_ndarray(array, format="rgb24", rgb_to_gbr: bool=True): + def from_ndarray(array, format: str="rgb24", rgb_to_gbr: bool=True, yuv444p_channel_first: bool=True): """ Construct a frame from a numpy array. :param bool rgb_to_gbr: If True, for ``gbrp`` formats, channels are assumed to be given in RGB order, for backward compatibility. + :param bool yuv444p_channel_first: If True, the shape for the yuv444p and yuvj444p + is given by (channels, height, width) rather than (height, width, channels). + This is for backward compatibility. .. note:: For formats which expect an array of ``uint16``, the samples must be in the system's native byte order. @@ -554,6 +568,8 @@ cdef class VideoFrame(Frame): if array.ndim == 2: # (height, width) -> (height, width, 1) array = array[:, :, None] check_ndarray(array, dtype, 3) + if format in {"yuv444p", "yuvj444p"}: + array = np.moveaxis(array, 0, 2) check_ndarray_shape(array, array.shape[2] == channels) array = byteswap_array(array, format.endswith("be")) frame = VideoFrame(array.shape[1], array.shape[0], format) @@ -576,7 +592,7 @@ cdef class VideoFrame(Frame): copy_array_to_plane(array, frame.planes[0], 1) frame.planes[1].update(palette.view(">i4").astype("i4").tobytes()) return frame - elif format in ("yuv420p", "yuvj420p"): + elif format in {"yuv420p", "yuvj420p"}: check_ndarray(array, "uint8", 2) check_ndarray_shape(array, array.shape[0] % 3 == 0) check_ndarray_shape(array, array.shape[1] % 2 == 0) diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py index 19471b673..8cb520b82 100644 --- a/tests/test_videoframe.py +++ b/tests/test_videoframe.py @@ -434,7 +434,7 @@ def test_ndarray_yuyv422() -> None: def test_ndarray_yuv444p() -> None: - array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8) + array = numpy.random.randint(0, 256, size=(3, 480, 640), dtype=numpy.uint8) frame = VideoFrame.from_ndarray(array, format="yuv444p") assert frame.width == 640 and frame.height == 480 assert frame.format.name == "yuv444p" @@ -442,7 +442,7 @@ def test_ndarray_yuv444p() -> None: def test_ndarray_yuvj444p() -> None: - array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8) + array = numpy.random.randint(0, 256, size=(3, 480, 640), dtype=numpy.uint8) frame = VideoFrame.from_ndarray(array, format="yuvj444p") assert frame.width == 640 and frame.height == 480 assert frame.format.name == "yuvj444p" From 5e3b2570625ffb10f0b81bc1961e75dc9c8bffdd Mon Sep 17 00:00:00 2001 From: Robin RICHARD Date: Wed, 22 Jan 2025 09:34:27 +0100 Subject: [PATCH 5/6] remove rgb vs gbr, remove skip_last and rename channel_last --- av/video/frame.pyi | 4 ++-- av/video/frame.pyx | 54 +++++++++++++++++++++------------------------- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/av/video/frame.pyi b/av/video/frame.pyi index a3eea373d..8f6e349f0 100644 --- a/av/video/frame.pyi +++ b/av/video/frame.pyi @@ -59,7 +59,7 @@ class VideoFrame(Frame): ) -> VideoFrame: ... def to_rgb(self, **kwargs: Any) -> VideoFrame: ... def to_image(self, **kwargs: Any) -> Image.Image: ... - def to_ndarray(self, **kwargs: Any) -> _SupportedNDarray: ... + def to_ndarray(self, force_channel_last: bool = False, **kwargs: Any) -> _SupportedNDarray: ... @staticmethod def from_image(img: Image.Image) -> VideoFrame: ... @staticmethod @@ -67,7 +67,7 @@ class VideoFrame(Frame): array: _SupportedNDarray, format: str = "rgb24", width: int = 0 ) -> VideoFrame: ... @staticmethod - def from_ndarray(array: _SupportedNDarray, format: str = "rgb24") -> VideoFrame: ... + def from_ndarray(array: _SupportedNDarray, format: str = "rgb24", channel_last: bool = False) -> VideoFrame: ... @staticmethod def from_bytes( data: bytes, diff --git a/av/video/frame.pyx b/av/video/frame.pyx index 4d8ffced0..5cdfb233b 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -282,33 +282,28 @@ cdef class VideoFrame(Frame): return Image.frombytes("RGB", (plane.width, plane.height), bytes(o_buf), "raw", "RGB", 0, 1) - def to_ndarray( - self, - skip_channel: bool=True, - gbr_to_rgb: bool=True, - yuv444p_channel_first: bool=True, - **kwargs - ): + def to_ndarray(self, force_channel_last=False, **kwargs): """Get a numpy array of this frame. Any ``**kwargs`` are passed to :meth:`.VideoReformatter.reformat`. The array returned is generally of dimension (height, width, channels). - :param bool skip_channel: If True, squeeze the channel dimension for grayscale frames. - :param bool gbr_to_rgb: If True, for ``gbrp`` formats, - channels are flipped to RGB order for backward compatibility. - :param bool yuv444p_channel_first: If True, the shape for the yuv444p and yuvj444p - will be (channels, height, width) rather than (height, width, channels) as usual. - This is for backward compatibility. + :param bool force_channel_last: If False (default), the shape for the yuv444p and yuvj444p + will be (channels, height, width) rather than (height, width, channels) as usual. + This is for backward compatibility and also for keeping that + `bytes(to_ndarray(frame))` should be the same as the ffmpeg cli + when returning the pix_fmt with `-c:v rawvideo`. .. note:: Numpy must be installed. - .. note:: For formats which return an array of ``uint16`, the samples - will be in the system's native byte order. + .. note:: For formats which return an array of ``uint16`` or ``float32``, + the samples will be in the system's native byte order. .. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned, - with the palette being in ARGB (PyAV will swap bytes if needed). + with the palette being in ARGB (PyAV will swap bytes if needed). + + .. note:: For ``gbrp`` formats, channels are flipped to RGB order. """ cdef VideoFrame frame = self.reformat(**kwargs) @@ -372,14 +367,14 @@ cdef class VideoFrame(Frame): else: # general case array = np.concatenate(layers, axis=2) array = byteswap_array(array, frame.format.name.endswith("be")) - if array.shape[2] == 1 and skip_channel: + if array.shape[2] == 1: # skip last channel for gray images return array.squeeze(2) - if gbr_to_rgb and frame.format.name.startswith("gbr"): + if frame.format.name.startswith("gbr"): # gbr -> rgb buffer = array[:, :, 0].copy() array[:, :, 0] = array[:, :, 2] array[:, :, 2] = array[:, :, 1] array[:, :, 1] = buffer - if yuv444p_channel_first and frame.format.name in {"yuv444p", "yuvj444p"}: + if not force_channel_last and frame.format.name in {"yuv444p", "yuvj444p"}: array = np.moveaxis(array, 2, 0) return array @@ -516,21 +511,20 @@ cdef class VideoFrame(Frame): self._init_user_attributes() @staticmethod - def from_ndarray(array, format: str="rgb24", rgb_to_gbr: bool=True, yuv444p_channel_first: bool=True): + def from_ndarray(array, format="rgb24", channel_last=False): """ Construct a frame from a numpy array. - :param bool rgb_to_gbr: If True, for ``gbrp`` formats, - channels are assumed to be given in RGB order, for backward compatibility. - :param bool yuv444p_channel_first: If True, the shape for the yuv444p and yuvj444p - is given by (channels, height, width) rather than (height, width, channels). - This is for backward compatibility. + :param bool channel_last: If False (default), the shape for the yuv444p and yuvj444p + is given by (channels, height, width) rather than (height, width, channels). - .. note:: For formats which expect an array of ``uint16``, the samples - must be in the system's native byte order. + .. note:: For formats which expect an array of ``uint16``, + the samples must be in the system's native byte order. .. note:: for ``pal8``, an ``(image, palette)`` pair must be passed. `palette` must have shape (256, 4) and is given in ARGB format (PyAV will swap bytes if needed). + .. note:: for ``gbrp`` formats, channels are assumed to be given in RGB order. + """ import numpy as np @@ -568,12 +562,12 @@ cdef class VideoFrame(Frame): if array.ndim == 2: # (height, width) -> (height, width, 1) array = array[:, :, None] check_ndarray(array, dtype, 3) - if format in {"yuv444p", "yuvj444p"}: - array = np.moveaxis(array, 0, 2) + if not channel_last and format in {"yuv444p", "yuvj444p"}: + array = np.moveaxis(array, 0, 2) # (channels, h, w) -> (h, w, channels) check_ndarray_shape(array, array.shape[2] == channels) array = byteswap_array(array, format.endswith("be")) frame = VideoFrame(array.shape[1], array.shape[0], format) - if rgb_to_gbr and frame.format.name.startswith("gbr"): + if frame.format.name.startswith("gbr"): # rgb -> gbr array = np.concatenate([ # not inplace to avoid bad surprises array[:, :, 1:3], array[:, :, 0:1], array[:, :, 3:], ], axis=2) From 531d0ad53e9b9b3d76ae2b662985564c246a751a Mon Sep 17 00:00:00 2001 From: Robin RICHARD Date: Wed, 22 Jan 2025 09:39:07 +0100 Subject: [PATCH 6/6] truncate line at 80 rather than 100 --- av/video/frame.pyi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/av/video/frame.pyi b/av/video/frame.pyi index 8f6e349f0..f58554b8f 100644 --- a/av/video/frame.pyi +++ b/av/video/frame.pyi @@ -59,7 +59,9 @@ class VideoFrame(Frame): ) -> VideoFrame: ... def to_rgb(self, **kwargs: Any) -> VideoFrame: ... def to_image(self, **kwargs: Any) -> Image.Image: ... - def to_ndarray(self, force_channel_last: bool = False, **kwargs: Any) -> _SupportedNDarray: ... + def to_ndarray( + self, force_channel_last: bool = False, **kwargs: Any + ) -> _SupportedNDarray: ... @staticmethod def from_image(img: Image.Image) -> VideoFrame: ... @staticmethod @@ -67,7 +69,9 @@ class VideoFrame(Frame): array: _SupportedNDarray, format: str = "rgb24", width: int = 0 ) -> VideoFrame: ... @staticmethod - def from_ndarray(array: _SupportedNDarray, format: str = "rgb24", channel_last: bool = False) -> VideoFrame: ... + def from_ndarray( + array: _SupportedNDarray, format: str = "rgb24", channel_last: bool = False + ) -> VideoFrame: ... @staticmethod def from_bytes( data: bytes,