From 52bf43eb4959918f75a7e3c9678812521ef2efff Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 9 Jan 2020 20:06:57 +0100 Subject: [PATCH] avfilter/af_afir: add support for switching impulse response streams at runtime Currently, switching is not free of artifacts, to be resolved later. --- doc/filters.texi | 17 ++- libavfilter/af_afir.c | 275 ++++++++++++++++++++++++++---------------- libavfilter/af_afir.h | 4 +- 3 files changed, 184 insertions(+), 112 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index 9d18880913..9ff7bc2814 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -1183,7 +1183,7 @@ afftfilt="real='hypot(re,im)*cos((random(0)*2-1)*2*3.14)':imag='hypot(re,im)*sin @anchor{afir} @section afir -Apply an arbitrary Frequency Impulse Response filter. +Apply an arbitrary Finite Impulse Response filter. This filter is designed for applying long FIR filters, up to 60 seconds long. @@ -1192,10 +1192,10 @@ It can be used as component for digital crossover filters, room equalization, cross talk cancellation, wavefield synthesis, auralization, ambiophonics, ambisonics and spatialization. -This filter uses the second stream as FIR coefficients. -If the second stream holds a single channel, it will be used +This filter uses the streams higher than first one as FIR coefficients. +If the non-first stream holds a single channel, it will be used for all input channels in the first stream, otherwise -the number of channels in the second stream must be same as +the number of channels in the non-first stream must be same as the number of channels in the first stream. It accepts the following parameters: @@ -1264,6 +1264,15 @@ Lower values decreases latency at cost of higher CPU usage. Set maximal partition size used for convolution. Default is @var{8192}. Allowed range is from @var{8} to @var{32768}. Lower values may increase CPU usage. + +@item nbirs +Set number of input impulse responses streams which will be switchable at runtime. +Allowed range is from @var{1} to @var{32}. Default is @var{1}. + +@item ir +Set IR stream which will be used for convolution, starting from @var{0}, should always be +lower than supplied value by @code{nbirs} option. Default is @var{0}. +This option can be changed at runtime via @ref{commands}. @end table @subsection Examples diff --git a/libavfilter/af_afir.c b/libavfilter/af_afir.c index 2545039a9e..077f9c7962 100644 --- a/libavfilter/af_afir.c +++ b/libavfilter/af_afir.c @@ -25,6 +25,7 @@ #include +#include "libavutil/avstring.h" #include "libavutil/common.h" #include "libavutil/float_dsp.h" #include "libavutil/intreadwrite.h" @@ -298,9 +299,9 @@ static void draw_response(AVFilterContext *ctx, AVFrame *out) if (!mag || !phase || !delay) goto end; - channel = av_clip(s->ir_channel, 0, s->ir[0]->channels - 1); + channel = av_clip(s->ir_channel, 0, s->ir[s->selir]->channels - 1); for (i = 0; i < s->w; i++) { - const float *src = (const float *)s->ir[0]->extended_data[channel]; + const float *src = (const float *)s->ir[s->selir]->extended_data[channel]; double w = i * M_PI / (s->w - 1); double div, real_num = 0., imag_num = 0., real = 0., imag = 0.; @@ -403,7 +404,7 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, seg->sum = ff_get_audio_buffer(ctx->inputs[0], seg->fft_length); seg->block = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->block_size); seg->buffer = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); - seg->coeff = ff_get_audio_buffer(ctx->inputs[1], seg->nb_partitions * seg->coeff_size * 2); + seg->coeff = ff_get_audio_buffer(ctx->inputs[1 + s->selir], seg->nb_partitions * seg->coeff_size * 2); seg->input = ff_get_audio_buffer(ctx->inputs[0], seg->input_size); seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); if (!seg->buffer || !seg->sum || !seg->block || !seg->coeff || !seg->input || !seg->output) @@ -412,79 +413,116 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, return 0; } +static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg) +{ + AudioFIRContext *s = ctx->priv; + + if (seg->rdft) { + for (int ch = 0; ch < s->nb_channels; ch++) { + av_rdft_end(seg->rdft[ch]); + } + } + av_freep(&seg->rdft); + + if (seg->irdft) { + for (int ch = 0; ch < s->nb_channels; ch++) { + av_rdft_end(seg->irdft[ch]); + } + } + av_freep(&seg->irdft); + + av_freep(&seg->output_offset); + av_freep(&seg->part_index); + + av_frame_free(&seg->block); + av_frame_free(&seg->sum); + av_frame_free(&seg->buffer); + av_frame_free(&seg->coeff); + av_frame_free(&seg->input); + av_frame_free(&seg->output); + seg->input_size = 0; +} + static int convert_coeffs(AVFilterContext *ctx) { AudioFIRContext *s = ctx->priv; - int left, offset = 0, part_size, max_part_size; - int ret, i, ch, n; + int ret, i, ch, n, cur_nb_taps; float power = 0; - s->nb_taps = ff_inlink_queued_samples(ctx->inputs[1]); - if (s->nb_taps <= 0) - return AVERROR(EINVAL); + if (!s->nb_taps) { + int part_size, max_part_size; + int left, offset = 0; - if (s->minp > s->maxp) { - s->maxp = s->minp; + s->nb_taps = ff_inlink_queued_samples(ctx->inputs[1 + s->selir]); + if (s->nb_taps <= 0) + return AVERROR(EINVAL); + + if (s->minp > s->maxp) { + s->maxp = s->minp; + } + + left = s->nb_taps; + part_size = 1 << av_log2(s->minp); + max_part_size = 1 << av_log2(s->maxp); + + s->min_part_size = part_size; + + for (i = 0; left > 0; i++) { + int step = part_size == max_part_size ? INT_MAX : 1 + (i == 0); + int nb_partitions = FFMIN(step, (left + part_size - 1) / part_size); + + s->nb_segments = i + 1; + ret = init_segment(ctx, &s->seg[i], offset, nb_partitions, part_size); + if (ret < 0) + return ret; + offset += nb_partitions * part_size; + left -= nb_partitions * part_size; + part_size *= 2; + part_size = FFMIN(part_size, max_part_size); + } } - left = s->nb_taps; - part_size = 1 << av_log2(s->minp); - max_part_size = 1 << av_log2(s->maxp); - - s->min_part_size = part_size; - - for (i = 0; left > 0; i++) { - int step = part_size == max_part_size ? INT_MAX : 1 + (i == 0); - int nb_partitions = FFMIN(step, (left + part_size - 1) / part_size); - - s->nb_segments = i + 1; - ret = init_segment(ctx, &s->seg[i], offset, nb_partitions, part_size); + if (!s->ir[s->selir]) { + ret = ff_inlink_consume_samples(ctx->inputs[1 + s->selir], s->nb_taps, s->nb_taps, &s->ir[s->selir]); if (ret < 0) return ret; - offset += nb_partitions * part_size; - left -= nb_partitions * part_size; - part_size *= 2; - part_size = FFMIN(part_size, max_part_size); + if (ret == 0) + return AVERROR_BUG; } - ret = ff_inlink_consume_samples(ctx->inputs[1], s->nb_taps, s->nb_taps, &s->ir[0]); - if (ret < 0) - return ret; - if (ret == 0) - return AVERROR_BUG; - if (s->response) draw_response(ctx, s->video); s->gain = 1; + cur_nb_taps = s->ir[s->selir]->nb_samples; switch (s->gtype) { case -1: /* nothing to do */ break; case 0: - for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { - float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; + for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) { + float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch]; - for (i = 0; i < s->nb_taps; i++) + for (i = 0; i < cur_nb_taps; i++) power += FFABS(time[i]); } - s->gain = ctx->inputs[1]->channels / power; + s->gain = ctx->inputs[1 + s->selir]->channels / power; break; case 1: - for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { - float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; + for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) { + float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch]; - for (i = 0; i < s->nb_taps; i++) + for (i = 0; i < cur_nb_taps; i++) power += time[i]; } - s->gain = ctx->inputs[1]->channels / power; + s->gain = ctx->inputs[1 + s->selir]->channels / power; break; case 2: - for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { - float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; + for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) { + float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch]; - for (i = 0; i < s->nb_taps; i++) + for (i = 0; i < cur_nb_taps; i++) power += time[i] * time[i]; } s->gain = sqrtf(ch / power); @@ -495,17 +533,17 @@ static int convert_coeffs(AVFilterContext *ctx) s->gain = FFMIN(s->gain * s->ir_gain, 1.f); av_log(ctx, AV_LOG_DEBUG, "power %f, gain %f\n", power, s->gain); - for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { - float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; + for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) { + float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch]; - s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(s->nb_taps, 4)); + s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(cur_nb_taps, 4)); } - av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", s->nb_taps); + av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", cur_nb_taps); av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments); - for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { - float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; + for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) { + float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch]; int toffset = 0; for (i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++) @@ -561,7 +599,6 @@ static int convert_coeffs(AVFilterContext *ctx) } } - av_frame_free(&s->ir[0]); s->have_coeffs = 1; return 0; @@ -594,26 +631,26 @@ static int activate(AVFilterContext *ctx) FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx); if (s->response) FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[1], ctx); - if (!s->eof_coeffs) { + if (!s->eof_coeffs[s->selir]) { AVFrame *ir = NULL; - ret = check_ir(ctx->inputs[1], ir); + ret = check_ir(ctx->inputs[1 + s->selir], ir); if (ret < 0) return ret; - if (ff_outlink_get_status(ctx->inputs[1]) == AVERROR_EOF) - s->eof_coeffs = 1; + if (ff_outlink_get_status(ctx->inputs[1 + s->selir]) == AVERROR_EOF) + s->eof_coeffs[s->selir] = 1; - if (!s->eof_coeffs) { + if (!s->eof_coeffs[s->selir]) { if (ff_outlink_frame_wanted(ctx->outputs[0])) - ff_inlink_request_frame(ctx->inputs[1]); + ff_inlink_request_frame(ctx->inputs[1 + s->selir]); else if (s->response && ff_outlink_frame_wanted(ctx->outputs[1])) - ff_inlink_request_frame(ctx->inputs[1]); + ff_inlink_request_frame(ctx->inputs[1 + s->selir]); return 0; } } - if (!s->have_coeffs && s->eof_coeffs) { + if (!s->have_coeffs && s->eof_coeffs[s->selir]) { ret = convert_coeffs(ctx); if (ret < 0) return ret; @@ -709,8 +746,10 @@ static int query_formats(AVFilterContext *ctx) return ret; if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0) return ret; - if ((ret = ff_channel_layouts_ref(mono, &ctx->inputs[1]->out_channel_layouts)) < 0) - return ret; + for (int i = 1; i < ctx->nb_inputs; i++) { + if ((ret = ff_channel_layouts_ref(mono, &ctx->inputs[i]->out_channel_layouts)) < 0) + return ret; + } } formats = ff_make_format_list(sample_fmts); @@ -726,49 +765,19 @@ static int config_output(AVFilterLink *outlink) AVFilterContext *ctx = outlink->src; AudioFIRContext *s = ctx->priv; - s->one2many = ctx->inputs[1]->channels == 1; + s->one2many = ctx->inputs[1 + s->selir]->channels == 1; outlink->sample_rate = ctx->inputs[0]->sample_rate; outlink->time_base = ctx->inputs[0]->time_base; outlink->channel_layout = ctx->inputs[0]->channel_layout; outlink->channels = ctx->inputs[0]->channels; s->nb_channels = outlink->channels; - s->nb_coef_channels = ctx->inputs[1]->channels; + s->nb_coef_channels = ctx->inputs[1 + s->selir]->channels; s->pts = AV_NOPTS_VALUE; return 0; } -static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg) -{ - AudioFIRContext *s = ctx->priv; - - if (seg->rdft) { - for (int ch = 0; ch < s->nb_channels; ch++) { - av_rdft_end(seg->rdft[ch]); - } - } - av_freep(&seg->rdft); - - if (seg->irdft) { - for (int ch = 0; ch < s->nb_channels; ch++) { - av_rdft_end(seg->irdft[ch]); - } - } - av_freep(&seg->irdft); - - av_freep(&seg->output_offset); - av_freep(&seg->part_index); - - av_frame_free(&seg->block); - av_frame_free(&seg->sum); - av_frame_free(&seg->buffer); - av_frame_free(&seg->coeff); - av_frame_free(&seg->input); - av_frame_free(&seg->output); - seg->input_size = 0; -} - static av_cold void uninit(AVFilterContext *ctx) { AudioFIRContext *s = ctx->priv; @@ -778,7 +787,13 @@ static av_cold void uninit(AVFilterContext *ctx) } av_freep(&s->fdsp); - av_frame_free(&s->ir[0]); + + for (int i = 0; i < s->nb_irs; i++) { + av_frame_free(&s->ir[i]); + } + + for (int i = 0; i < ctx->nb_inputs; i++) + av_freep(&ctx->input_pads[i].name); for (int i = 0; i < ctx->nb_outputs; i++) av_freep(&ctx->output_pads[i].name); @@ -818,7 +833,37 @@ static av_cold int init(AVFilterContext *ctx) AVFilterPad pad, vpad; int ret; - pad = (AVFilterPad){ + pad = (AVFilterPad) { + .name = av_strdup("main"), + .type = AVMEDIA_TYPE_AUDIO, + }; + + if (!pad.name) + return AVERROR(ENOMEM); + + ret = ff_insert_inpad(ctx, 0, &pad); + if (ret < 0) { + av_freep(&pad.name); + return ret; + } + + for (int n = 0; n < s->nb_irs; n++) { + pad = (AVFilterPad) { + .name = av_asprintf("ir%d", n), + .type = AVMEDIA_TYPE_AUDIO, + }; + + if (!pad.name) + return AVERROR(ENOMEM); + + ret = ff_insert_inpad(ctx, n + 1, &pad); + if (ret < 0) { + av_freep(&pad.name); + return ret; + } + } + + pad = (AVFilterPad) { .name = av_strdup("default"), .type = AVMEDIA_TYPE_AUDIO, .config_props = config_output, @@ -860,18 +905,31 @@ static av_cold int init(AVFilterContext *ctx) return 0; } -static const AVFilterPad afir_inputs[] = { - { - .name = "main", - .type = AVMEDIA_TYPE_AUDIO, - },{ - .name = "ir", - .type = AVMEDIA_TYPE_AUDIO, - }, - { NULL } -}; +static int process_command(AVFilterContext *ctx, + const char *cmd, + const char *arg, + char *res, + int res_len, + int flags) +{ + AudioFIRContext *s = ctx->priv; + int prev_ir = s->selir; + int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags); + + if (ret < 0) + return ret; + + s->selir = FFMIN(s->nb_irs - 1, s->selir); + + if (prev_ir != s->selir) { + s->have_coeffs = 0; + } + + return 0; +} #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM +#define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM #define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM #define OFFSET(x) offsetof(AudioFIRContext, x) @@ -895,6 +953,8 @@ static const AVOption afir_options[] = { { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT32_MAX, VF }, { "minp", "set min partition size", OFFSET(minp), AV_OPT_TYPE_INT, {.i64=8192}, 1, 32768, AF }, { "maxp", "set max partition size", OFFSET(maxp), AV_OPT_TYPE_INT, {.i64=8192}, 8, 32768, AF }, + { "nbirs", "set number of input IRs",OFFSET(nb_irs),AV_OPT_TYPE_INT, {.i64=1}, 1, 32, AF }, + { "ir", "select IR", OFFSET(selir), AV_OPT_TYPE_INT, {.i64=0}, 0, 31, AFR }, { NULL } }; @@ -902,14 +962,15 @@ AVFILTER_DEFINE_CLASS(afir); AVFilter ff_af_afir = { .name = "afir", - .description = NULL_IF_CONFIG_SMALL("Apply Finite Impulse Response filter with supplied coefficients in 2nd stream."), + .description = NULL_IF_CONFIG_SMALL("Apply Finite Impulse Response filter with supplied coefficients in additional stream(s)."), .priv_size = sizeof(AudioFIRContext), .priv_class = &afir_class, .query_formats = query_formats, .init = init, .activate = activate, .uninit = uninit, - .inputs = afir_inputs, - .flags = AVFILTER_FLAG_DYNAMIC_OUTPUTS | + .process_command = process_command, + .flags = AVFILTER_FLAG_DYNAMIC_INPUTS | + AVFILTER_FLAG_DYNAMIC_OUTPUTS | AVFILTER_FLAG_SLICE_THREADS, }; diff --git a/libavfilter/af_afir.h b/libavfilter/af_afir.h index 1b59d85175..4f44675848 100644 --- a/libavfilter/af_afir.h +++ b/libavfilter/af_afir.h @@ -74,10 +74,12 @@ typedef struct AudioFIRContext { int ir_channel; int minp; int maxp; + int nb_irs; + int selir; float gain; - int eof_coeffs; + int eof_coeffs[32]; int have_coeffs; int nb_taps; int nb_channels;