From a50071a4062cd98433ca64d59a3b27d27be0c492 Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Wed, 20 Nov 2024 17:36:55 -0500 Subject: [PATCH 1/2] Add function for getting loudnorm stats Getting these stats is otherwise impossible. --- av/filter/loudnorm.pxd | 4 + av/filter/loudnorm.pyi | 3 + av/filter/loudnorm.pyx | 63 ++++++++++++++ av/filter/loudnorm_impl.c | 168 ++++++++++++++++++++++++++++++++++++++ av/filter/loudnorm_impl.h | 12 +++ setup.py | 28 ++++++- tests/test_streams.py | 11 +++ 7 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 av/filter/loudnorm.pxd create mode 100644 av/filter/loudnorm.pyi create mode 100644 av/filter/loudnorm.pyx create mode 100644 av/filter/loudnorm_impl.c create mode 100644 av/filter/loudnorm_impl.h diff --git a/av/filter/loudnorm.pxd b/av/filter/loudnorm.pxd new file mode 100644 index 000000000..b08d3502f --- /dev/null +++ b/av/filter/loudnorm.pxd @@ -0,0 +1,4 @@ +from av.audio.stream cimport AudioStream + + +cpdef bytes stats(str loudnorm_args, AudioStream stream) diff --git a/av/filter/loudnorm.pyi b/av/filter/loudnorm.pyi new file mode 100644 index 000000000..fcd8ac22d --- /dev/null +++ b/av/filter/loudnorm.pyi @@ -0,0 +1,3 @@ +from av.stream.audio import AudioStream + +def stats(loudnorm_args: str, AudioStream: stream) -> bytes: ... diff --git a/av/filter/loudnorm.pyx b/av/filter/loudnorm.pyx new file mode 100644 index 000000000..662bbd230 --- /dev/null +++ b/av/filter/loudnorm.pyx @@ -0,0 +1,63 @@ +# av/filter/loudnorm.pyx + +cimport libav as lib +from cpython.bytes cimport PyBytes_FromString +from libc.stdlib cimport free + +from av.audio.codeccontext cimport AudioCodecContext +from av.audio.stream cimport AudioStream +from av.container.core cimport Container +from av.stream cimport Stream + + +cdef extern from "libavcodec/avcodec.h": + ctypedef struct AVCodecContext: + pass + +cdef extern from "libavformat/avformat.h": + ctypedef struct AVFormatContext: + pass + +cdef extern from "loudnorm_impl.h": + char* loudnorm_get_stats( + AVFormatContext* fmt_ctx, + int audio_stream_index, + const char* loudnorm_args + ) nogil + + +cpdef bytes stats(str loudnorm_args, AudioStream stream): + """ + Get loudnorm statistics for an audio stream. + + Args: + loudnorm_args (str): Arguments for the loudnorm filter (e.g. "i=-24.0:lra=7.0:tp=-2.0") + stream (AudioStream): Input audio stream to analyze + + Returns: + bytes: JSON string containing the loudnorm statistics + """ + + if "print_format=json" not in loudnorm_args: + loudnorm_args = loudnorm_args + ":print_format=json" + + cdef Container container = stream.container + cdef AVFormatContext* format_ptr = container.ptr + + container.ptr = NULL # Prevent double-free + + cdef int stream_index = stream.index + cdef bytes py_args = loudnorm_args.encode("utf-8") + cdef const char* c_args = py_args + cdef char* result + + with nogil: + result = loudnorm_get_stats(format_ptr, stream_index, c_args) + + if result == NULL: + raise RuntimeError("Failed to get loudnorm stats") + + py_result = result[:] # Make a copy of the string + free(result) # Free the C string + + return py_result diff --git a/av/filter/loudnorm_impl.c b/av/filter/loudnorm_impl.c new file mode 100644 index 000000000..79872874f --- /dev/null +++ b/av/filter/loudnorm_impl.c @@ -0,0 +1,168 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +static pthread_mutex_t json_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t json_cond = PTHREAD_COND_INITIALIZER; + +static char json_buffer[2048] = {0}; +static int json_captured = 0; + +// Custom logging callback +static void logging_callback(void *ptr, int level, const char *fmt, va_list vl) { + char line[2048]; + vsnprintf(line, sizeof(line), fmt, vl); + + const char *json_start = strstr(line, "{"); + if (json_start) { + pthread_mutex_lock(&json_mutex); + strncpy(json_buffer, json_start, sizeof(json_buffer) - 1); + json_captured = 1; + pthread_cond_signal(&json_cond); // Signal that we have the JSON + pthread_mutex_unlock(&json_mutex); + } +} + +char* loudnorm_get_stats( + AVFormatContext* fmt_ctx, + int audio_stream_index, + const char* loudnorm_args +) { + char* result = NULL; + json_captured = 0; // Reset the captured flag + memset(json_buffer, 0, sizeof(json_buffer)); // Clear the buffer + av_log_set_callback(logging_callback); + + AVFilterGraph *filter_graph = NULL; + AVFilterContext *src_ctx = NULL, *sink_ctx = NULL, *loudnorm_ctx = NULL; + + AVCodec *codec = NULL; + AVCodecContext *codec_ctx = NULL; + int ret; + + AVCodecParameters *codecpar = fmt_ctx->streams[audio_stream_index]->codecpar; + codec = (AVCodec *)avcodec_find_decoder(codecpar->codec_id); + codec_ctx = avcodec_alloc_context3(codec); + avcodec_parameters_to_context(codec_ctx, codecpar); + avcodec_open2(codec_ctx, codec, NULL); + + char ch_layout_str[64]; + av_channel_layout_describe(&codecpar->ch_layout, ch_layout_str, sizeof(ch_layout_str)); + + filter_graph = avfilter_graph_alloc(); + + char args[512]; + snprintf(args, sizeof(args), + "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=%s", + fmt_ctx->streams[audio_stream_index]->time_base.num, + fmt_ctx->streams[audio_stream_index]->time_base.den, + codecpar->sample_rate, + av_get_sample_fmt_name(codec_ctx->sample_fmt), + ch_layout_str); + + + + avfilter_graph_create_filter(&src_ctx, avfilter_get_by_name("abuffer"), + "src", args, NULL, filter_graph); + avfilter_graph_create_filter(&sink_ctx, avfilter_get_by_name("abuffersink"), + "sink", NULL, NULL, filter_graph); + avfilter_graph_create_filter(&loudnorm_ctx, avfilter_get_by_name("loudnorm"), + "loudnorm", loudnorm_args, NULL, filter_graph); + + avfilter_link(src_ctx, 0, loudnorm_ctx, 0); + avfilter_link(loudnorm_ctx, 0, sink_ctx, 0); + avfilter_graph_config(filter_graph, NULL); + + AVPacket *packet = av_packet_alloc(); + AVFrame *frame = av_frame_alloc(); + AVFrame *filt_frame = av_frame_alloc(); + + while ((ret = av_read_frame(fmt_ctx, packet)) >= 0) { + if (packet->stream_index != audio_stream_index) { + av_packet_unref(packet); + continue; + } + + ret = avcodec_send_packet(codec_ctx, packet); + if (ret < 0) { + av_packet_unref(packet); + continue; + } + + while (ret >= 0) { + ret = avcodec_receive_frame(codec_ctx, frame); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; + if (ret < 0) goto end; + + ret = av_buffersrc_add_frame_flags(src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF); + if (ret < 0) goto end; + + while (1) { + ret = av_buffersink_get_frame(sink_ctx, filt_frame); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; + if (ret < 0) goto end; + av_frame_unref(filt_frame); + } + } + av_packet_unref(packet); + } + + // Flush decoder + avcodec_send_packet(codec_ctx, NULL); + while (avcodec_receive_frame(codec_ctx, frame) >= 0) { + av_buffersrc_add_frame(src_ctx, frame); + } + + // Flush filter + av_buffersrc_add_frame(src_ctx, NULL); + while (av_buffersink_get_frame(sink_ctx, filt_frame) >= 0) { + av_frame_unref(filt_frame); + } + + // Force stats print + if (loudnorm_ctx) { + av_log_set_level(AV_LOG_INFO); // Make sure log level is high enough + // Trigger stats print + av_opt_set(loudnorm_ctx, "print_format", "json", AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_i", NULL, AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_lra", NULL, AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_tp", NULL, AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_thresh", NULL, AV_OPT_SEARCH_CHILDREN); + avfilter_init_str(loudnorm_ctx, NULL); + } + + // Flush the filter graph to ensure all processing is done + avfilter_graph_request_oldest(filter_graph); + +end: + avcodec_free_context(&codec_ctx); + avfilter_graph_free(&filter_graph); + avformat_close_input(&fmt_ctx); + av_frame_free(&filt_frame); + av_frame_free(&frame); + av_packet_free(&packet); + + struct timespec timeout; + clock_gettime(CLOCK_REALTIME, &timeout); + //timeout.tv_sec += 0; + + pthread_mutex_lock(&json_mutex); + while (json_captured == 0) { + int ret = pthread_cond_timedwait(&json_cond, &json_mutex, &timeout); + if (ret == ETIMEDOUT) { + fprintf(stderr, "Timeout waiting for JSON data\n"); + break; + } + } + if (json_captured) { + result = strdup(json_buffer); + } + pthread_mutex_unlock(&json_mutex); + av_log_set_callback(av_log_default_callback); + return result; +} \ No newline at end of file diff --git a/av/filter/loudnorm_impl.h b/av/filter/loudnorm_impl.h new file mode 100644 index 000000000..7357e4668 --- /dev/null +++ b/av/filter/loudnorm_impl.h @@ -0,0 +1,12 @@ +#ifndef AV_FILTER_LOUDNORM_H +#define AV_FILTER_LOUDNORM_H + +#include + +char* loudnorm_get_stats( + AVFormatContext* fmt_ctx, + int audio_stream_index, + const char* loudnorm_args +); + +#endif // AV_FILTER_LOUDNORM_H \ No newline at end of file diff --git a/setup.py b/setup.py index 935d233aa..685412b99 100644 --- a/setup.py +++ b/setup.py @@ -153,10 +153,36 @@ def parse_cflags(raw_flags): "library_dirs": [], } +loudnorm_extension = Extension( + "av.filter.loudnorm", + sources=[ + "av/filter/loudnorm.pyx", + "av/filter/loudnorm_impl.c", + ], + include_dirs=["av/filter"] + extension_extra["include_dirs"], + libraries=extension_extra["libraries"], + library_dirs=extension_extra["library_dirs"], +) + +# Add the cythonized loudnorm extension to ext_modules +ext_modules = cythonize( + loudnorm_extension, + compiler_directives={ + "c_string_type": "str", + "c_string_encoding": "ascii", + "embedsignature": True, + "language_level": 3, + }, + build_dir="src", + include_path=["include"], +) + # Construct the modules that we find in the "av" directory. -ext_modules = [] for dirname, dirnames, filenames in os.walk("av"): for filename in filenames: + if filename == "loudnorm.pyx": + continue + # We are looking for Cython sources. if filename.startswith(".") or os.path.splitext(filename)[1] != ".pyx": continue diff --git a/tests/test_streams.py b/tests/test_streams.py index b7699e622..1db466652 100644 --- a/tests/test_streams.py +++ b/tests/test_streams.py @@ -26,6 +26,17 @@ def test_stream_tuples(self) -> None: audio_streams = tuple([s for s in container.streams if s.type == "audio"]) assert audio_streams == container.streams.audio + def test_loudnorm(self) -> None: + container = av.open( + fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv") + ) + audio = container.streams.audio[0] + stats = av.filter.loudnorm.stats("i=-24.0:lra=7.0:tp=-2.0", audio) + + assert isinstance(stats, bytes) and len(stats) > 30 + assert b"inf" not in stats + assert b"\"input_i\"" in stats + def test_selection(self) -> None: container = av.open( fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv") From 46c07c4ac18b96cd47907dc2803485cb940171b8 Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Wed, 20 Nov 2024 19:14:50 -0500 Subject: [PATCH 2/2] Make a Windows port --- av/filter/loudnorm.pyi | 4 +-- av/filter/loudnorm_impl.c | 63 ++++++++++++++++++++++++++++++++------- tests/test_streams.py | 2 +- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/av/filter/loudnorm.pyi b/av/filter/loudnorm.pyi index fcd8ac22d..c680f638d 100644 --- a/av/filter/loudnorm.pyi +++ b/av/filter/loudnorm.pyi @@ -1,3 +1,3 @@ -from av.stream.audio import AudioStream +from av.audio.stream import AudioStream -def stats(loudnorm_args: str, AudioStream: stream) -> bytes: ... +def stats(loudnorm_args: str, stream: AudioStream) -> bytes: ... diff --git a/av/filter/loudnorm_impl.c b/av/filter/loudnorm_impl.c index 79872874f..e8d56ddba 100644 --- a/av/filter/loudnorm_impl.c +++ b/av/filter/loudnorm_impl.c @@ -4,11 +4,22 @@ #include #include #include -#include #include -static pthread_mutex_t json_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t json_cond = PTHREAD_COND_INITIALIZER; +#ifdef _WIN32 + #include +#else + #include +#endif + +#ifdef _WIN32 + static CRITICAL_SECTION json_mutex; + static CONDITION_VARIABLE json_cond; + static int mutex_initialized = 0; +#else + static pthread_mutex_t json_mutex = PTHREAD_MUTEX_INITIALIZER; + static pthread_cond_t json_cond = PTHREAD_COND_INITIALIZER; +#endif static char json_buffer[2048] = {0}; static int json_captured = 0; @@ -20,11 +31,22 @@ static void logging_callback(void *ptr, int level, const char *fmt, va_list vl) const char *json_start = strstr(line, "{"); if (json_start) { + #ifdef _WIN32 + EnterCriticalSection(&json_mutex); + #else pthread_mutex_lock(&json_mutex); + #endif + strncpy(json_buffer, json_start, sizeof(json_buffer) - 1); json_captured = 1; - pthread_cond_signal(&json_cond); // Signal that we have the JSON + + #ifdef _WIN32 + WakeConditionVariable(&json_cond); + LeaveCriticalSection(&json_mutex); + #else + pthread_cond_signal(&json_cond); pthread_mutex_unlock(&json_mutex); + #endif } } @@ -36,6 +58,16 @@ char* loudnorm_get_stats( char* result = NULL; json_captured = 0; // Reset the captured flag memset(json_buffer, 0, sizeof(json_buffer)); // Clear the buffer + + #ifdef _WIN32 + // Initialize synchronization objects if needed + if (!mutex_initialized) { + InitializeCriticalSection(&json_mutex); + InitializeConditionVariable(&json_cond); + mutex_initialized = 1; + } + #endif + av_log_set_callback(logging_callback); AVFilterGraph *filter_graph = NULL; @@ -65,8 +97,6 @@ char* loudnorm_get_stats( av_get_sample_fmt_name(codec_ctx->sample_fmt), ch_layout_str); - - avfilter_graph_create_filter(&src_ctx, avfilter_get_by_name("abuffer"), "src", args, NULL, filter_graph); avfilter_graph_create_filter(&sink_ctx, avfilter_get_by_name("abuffersink"), @@ -126,8 +156,7 @@ char* loudnorm_get_stats( // Force stats print if (loudnorm_ctx) { - av_log_set_level(AV_LOG_INFO); // Make sure log level is high enough - // Trigger stats print + av_log_set_level(AV_LOG_INFO); av_opt_set(loudnorm_ctx, "print_format", "json", AV_OPT_SEARCH_CHILDREN); av_opt_set(loudnorm_ctx, "measured_i", NULL, AV_OPT_SEARCH_CHILDREN); av_opt_set(loudnorm_ctx, "measured_lra", NULL, AV_OPT_SEARCH_CHILDREN); @@ -136,7 +165,6 @@ char* loudnorm_get_stats( avfilter_init_str(loudnorm_ctx, NULL); } - // Flush the filter graph to ensure all processing is done avfilter_graph_request_oldest(filter_graph); end: @@ -147,9 +175,22 @@ char* loudnorm_get_stats( av_frame_free(&frame); av_packet_free(&packet); + #ifdef _WIN32 + EnterCriticalSection(&json_mutex); + while (!json_captured) { + if (!SleepConditionVariableCS(&json_cond, &json_mutex, 5000)) { // 5 second timeout + fprintf(stderr, "Timeout waiting for JSON data\n"); + break; + } + } + if (json_captured) { + result = _strdup(json_buffer); // Use _strdup on Windows + } + LeaveCriticalSection(&json_mutex); + #else struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); - //timeout.tv_sec += 0; + timeout.tv_sec += 5; // 5 second timeout pthread_mutex_lock(&json_mutex); while (json_captured == 0) { @@ -163,6 +204,8 @@ char* loudnorm_get_stats( result = strdup(json_buffer); } pthread_mutex_unlock(&json_mutex); + #endif + av_log_set_callback(av_log_default_callback); return result; } \ No newline at end of file diff --git a/tests/test_streams.py b/tests/test_streams.py index 1db466652..c7b234d48 100644 --- a/tests/test_streams.py +++ b/tests/test_streams.py @@ -35,7 +35,7 @@ def test_loudnorm(self) -> None: assert isinstance(stats, bytes) and len(stats) > 30 assert b"inf" not in stats - assert b"\"input_i\"" in stats + assert b'"input_i"' in stats def test_selection(self) -> None: container = av.open(