Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions av/filter/loudnorm.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from av.audio.stream cimport AudioStream


cpdef bytes stats(str loudnorm_args, AudioStream stream)
3 changes: 3 additions & 0 deletions av/filter/loudnorm.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from av.audio.stream import AudioStream

def stats(loudnorm_args: str, stream: AudioStream) -> bytes: ...
63 changes: 63 additions & 0 deletions av/filter/loudnorm.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# av/filter/loudnorm.pyx

cimport libav as lib
from cpython.bytes cimport PyBytes_FromString
from libc.stdlib cimport free

from av.audio.codeccontext cimport AudioCodecContext
from av.audio.stream cimport AudioStream
from av.container.core cimport Container
from av.stream cimport Stream


cdef extern from "libavcodec/avcodec.h":
ctypedef struct AVCodecContext:
pass

cdef extern from "libavformat/avformat.h":
ctypedef struct AVFormatContext:
pass

cdef extern from "loudnorm_impl.h":
char* loudnorm_get_stats(
AVFormatContext* fmt_ctx,
int audio_stream_index,
const char* loudnorm_args
) nogil


cpdef bytes stats(str loudnorm_args, AudioStream stream):
"""
Get loudnorm statistics for an audio stream.

Args:
loudnorm_args (str): Arguments for the loudnorm filter (e.g. "i=-24.0:lra=7.0:tp=-2.0")
stream (AudioStream): Input audio stream to analyze

Returns:
bytes: JSON string containing the loudnorm statistics
"""

if "print_format=json" not in loudnorm_args:
loudnorm_args = loudnorm_args + ":print_format=json"

cdef Container container = stream.container
cdef AVFormatContext* format_ptr = container.ptr

container.ptr = NULL # Prevent double-free

cdef int stream_index = stream.index
cdef bytes py_args = loudnorm_args.encode("utf-8")
cdef const char* c_args = py_args
cdef char* result

with nogil:
result = loudnorm_get_stats(format_ptr, stream_index, c_args)

if result == NULL:
raise RuntimeError("Failed to get loudnorm stats")

py_result = result[:] # Make a copy of the string
free(result) # Free the C string

return py_result
211 changes: 211 additions & 0 deletions av/filter/loudnorm_impl.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <string.h>

#ifdef _WIN32
#include <windows.h>
#else
#include <pthread.h>
#endif

#ifdef _WIN32
static CRITICAL_SECTION json_mutex;
static CONDITION_VARIABLE json_cond;
static int mutex_initialized = 0;
#else
static pthread_mutex_t json_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t json_cond = PTHREAD_COND_INITIALIZER;
#endif

static char json_buffer[2048] = {0};
static int json_captured = 0;

// Custom logging callback
static void logging_callback(void *ptr, int level, const char *fmt, va_list vl) {
char line[2048];
vsnprintf(line, sizeof(line), fmt, vl);

const char *json_start = strstr(line, "{");
if (json_start) {
#ifdef _WIN32
EnterCriticalSection(&json_mutex);
#else
pthread_mutex_lock(&json_mutex);
#endif

strncpy(json_buffer, json_start, sizeof(json_buffer) - 1);
json_captured = 1;

#ifdef _WIN32
WakeConditionVariable(&json_cond);
LeaveCriticalSection(&json_mutex);
#else
pthread_cond_signal(&json_cond);
pthread_mutex_unlock(&json_mutex);
#endif
}
}

char* loudnorm_get_stats(
AVFormatContext* fmt_ctx,
int audio_stream_index,
const char* loudnorm_args
) {
char* result = NULL;
json_captured = 0; // Reset the captured flag
memset(json_buffer, 0, sizeof(json_buffer)); // Clear the buffer

#ifdef _WIN32
// Initialize synchronization objects if needed
if (!mutex_initialized) {
InitializeCriticalSection(&json_mutex);
InitializeConditionVariable(&json_cond);
mutex_initialized = 1;
}
#endif

av_log_set_callback(logging_callback);

AVFilterGraph *filter_graph = NULL;
AVFilterContext *src_ctx = NULL, *sink_ctx = NULL, *loudnorm_ctx = NULL;

AVCodec *codec = NULL;
AVCodecContext *codec_ctx = NULL;
int ret;

AVCodecParameters *codecpar = fmt_ctx->streams[audio_stream_index]->codecpar;
codec = (AVCodec *)avcodec_find_decoder(codecpar->codec_id);
codec_ctx = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(codec_ctx, codecpar);
avcodec_open2(codec_ctx, codec, NULL);

char ch_layout_str[64];
av_channel_layout_describe(&codecpar->ch_layout, ch_layout_str, sizeof(ch_layout_str));

filter_graph = avfilter_graph_alloc();

char args[512];
snprintf(args, sizeof(args),
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=%s",
fmt_ctx->streams[audio_stream_index]->time_base.num,
fmt_ctx->streams[audio_stream_index]->time_base.den,
codecpar->sample_rate,
av_get_sample_fmt_name(codec_ctx->sample_fmt),
ch_layout_str);

avfilter_graph_create_filter(&src_ctx, avfilter_get_by_name("abuffer"),
"src", args, NULL, filter_graph);
avfilter_graph_create_filter(&sink_ctx, avfilter_get_by_name("abuffersink"),
"sink", NULL, NULL, filter_graph);
avfilter_graph_create_filter(&loudnorm_ctx, avfilter_get_by_name("loudnorm"),
"loudnorm", loudnorm_args, NULL, filter_graph);

avfilter_link(src_ctx, 0, loudnorm_ctx, 0);
avfilter_link(loudnorm_ctx, 0, sink_ctx, 0);
avfilter_graph_config(filter_graph, NULL);

AVPacket *packet = av_packet_alloc();
AVFrame *frame = av_frame_alloc();
AVFrame *filt_frame = av_frame_alloc();

while ((ret = av_read_frame(fmt_ctx, packet)) >= 0) {
if (packet->stream_index != audio_stream_index) {
av_packet_unref(packet);
continue;
}

ret = avcodec_send_packet(codec_ctx, packet);
if (ret < 0) {
av_packet_unref(packet);
continue;
}

while (ret >= 0) {
ret = avcodec_receive_frame(codec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
if (ret < 0) goto end;

ret = av_buffersrc_add_frame_flags(src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
if (ret < 0) goto end;

while (1) {
ret = av_buffersink_get_frame(sink_ctx, filt_frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
if (ret < 0) goto end;
av_frame_unref(filt_frame);
}
}
av_packet_unref(packet);
}

// Flush decoder
avcodec_send_packet(codec_ctx, NULL);
while (avcodec_receive_frame(codec_ctx, frame) >= 0) {
av_buffersrc_add_frame(src_ctx, frame);
}

// Flush filter
av_buffersrc_add_frame(src_ctx, NULL);
while (av_buffersink_get_frame(sink_ctx, filt_frame) >= 0) {
av_frame_unref(filt_frame);
}

// Force stats print
if (loudnorm_ctx) {
av_log_set_level(AV_LOG_INFO);
av_opt_set(loudnorm_ctx, "print_format", "json", AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_i", NULL, AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_lra", NULL, AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_tp", NULL, AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_thresh", NULL, AV_OPT_SEARCH_CHILDREN);
avfilter_init_str(loudnorm_ctx, NULL);
}

avfilter_graph_request_oldest(filter_graph);

end:
avcodec_free_context(&codec_ctx);
avfilter_graph_free(&filter_graph);
avformat_close_input(&fmt_ctx);
av_frame_free(&filt_frame);
av_frame_free(&frame);
av_packet_free(&packet);

#ifdef _WIN32
EnterCriticalSection(&json_mutex);
while (!json_captured) {
if (!SleepConditionVariableCS(&json_cond, &json_mutex, 5000)) { // 5 second timeout
fprintf(stderr, "Timeout waiting for JSON data\n");
break;
}
}
if (json_captured) {
result = _strdup(json_buffer); // Use _strdup on Windows
}
LeaveCriticalSection(&json_mutex);
#else
struct timespec timeout;
clock_gettime(CLOCK_REALTIME, &timeout);
timeout.tv_sec += 5; // 5 second timeout

pthread_mutex_lock(&json_mutex);
while (json_captured == 0) {
int ret = pthread_cond_timedwait(&json_cond, &json_mutex, &timeout);
if (ret == ETIMEDOUT) {
fprintf(stderr, "Timeout waiting for JSON data\n");
break;
}
}
if (json_captured) {
result = strdup(json_buffer);
}
pthread_mutex_unlock(&json_mutex);
#endif

av_log_set_callback(av_log_default_callback);
return result;
}
12 changes: 12 additions & 0 deletions av/filter/loudnorm_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef AV_FILTER_LOUDNORM_H
#define AV_FILTER_LOUDNORM_H

#include <libavcodec/avcodec.h>

char* loudnorm_get_stats(
AVFormatContext* fmt_ctx,
int audio_stream_index,
const char* loudnorm_args
);

#endif // AV_FILTER_LOUDNORM_H
28 changes: 27 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,36 @@ def parse_cflags(raw_flags):
"library_dirs": [],
}

loudnorm_extension = Extension(
"av.filter.loudnorm",
sources=[
"av/filter/loudnorm.pyx",
"av/filter/loudnorm_impl.c",
],
include_dirs=["av/filter"] + extension_extra["include_dirs"],
libraries=extension_extra["libraries"],
library_dirs=extension_extra["library_dirs"],
)

# Add the cythonized loudnorm extension to ext_modules
ext_modules = cythonize(
loudnorm_extension,
compiler_directives={
"c_string_type": "str",
"c_string_encoding": "ascii",
"embedsignature": True,
"language_level": 3,
},
build_dir="src",
include_path=["include"],
)

# Construct the modules that we find in the "av" directory.
ext_modules = []
for dirname, dirnames, filenames in os.walk("av"):
for filename in filenames:
if filename == "loudnorm.pyx":
continue

# We are looking for Cython sources.
if filename.startswith(".") or os.path.splitext(filename)[1] != ".pyx":
continue
Expand Down
11 changes: 11 additions & 0 deletions tests/test_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ def test_stream_tuples(self) -> None:
audio_streams = tuple([s for s in container.streams if s.type == "audio"])
assert audio_streams == container.streams.audio

def test_loudnorm(self) -> None:
container = av.open(
fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")
)
audio = container.streams.audio[0]
stats = av.filter.loudnorm.stats("i=-24.0:lra=7.0:tp=-2.0", audio)

assert isinstance(stats, bytes) and len(stats) > 30
assert b"inf" not in stats
assert b'"input_i"' in stats

def test_selection(self) -> None:
container = av.open(
fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")
Expand Down
Loading