avfilter/af_afir: add support for switching impulse response streams at runtime

Currently, switching is not free of artifacts, to be resolved later.
This commit is contained in:
Paul B Mahol 2020-01-09 20:06:57 +01:00
parent 03a7240a73
commit 52bf43eb49
3 changed files with 184 additions and 112 deletions

View File

@ -1183,7 +1183,7 @@ afftfilt="real='hypot(re,im)*cos((random(0)*2-1)*2*3.14)':imag='hypot(re,im)*sin
@anchor{afir} @anchor{afir}
@section afir @section afir
Apply an arbitrary Frequency Impulse Response filter. Apply an arbitrary Finite Impulse Response filter.
This filter is designed for applying long FIR filters, This filter is designed for applying long FIR filters,
up to 60 seconds long. up to 60 seconds long.
@ -1192,10 +1192,10 @@ It can be used as component for digital crossover filters,
room equalization, cross talk cancellation, wavefield synthesis, room equalization, cross talk cancellation, wavefield synthesis,
auralization, ambiophonics, ambisonics and spatialization. auralization, ambiophonics, ambisonics and spatialization.
This filter uses the second stream as FIR coefficients. This filter uses the streams higher than first one as FIR coefficients.
If the second stream holds a single channel, it will be used If the non-first stream holds a single channel, it will be used
for all input channels in the first stream, otherwise for all input channels in the first stream, otherwise
the number of channels in the second stream must be same as the number of channels in the non-first stream must be same as
the number of channels in the first stream. the number of channels in the first stream.
It accepts the following parameters: It accepts the following parameters:
@ -1264,6 +1264,15 @@ Lower values decreases latency at cost of higher CPU usage.
Set maximal partition size used for convolution. Default is @var{8192}. Set maximal partition size used for convolution. Default is @var{8192}.
Allowed range is from @var{8} to @var{32768}. Allowed range is from @var{8} to @var{32768}.
Lower values may increase CPU usage. Lower values may increase CPU usage.
@item nbirs
Set number of input impulse responses streams which will be switchable at runtime.
Allowed range is from @var{1} to @var{32}. Default is @var{1}.
@item ir
Set IR stream which will be used for convolution, starting from @var{0}, should always be
lower than supplied value by @code{nbirs} option. Default is @var{0}.
This option can be changed at runtime via @ref{commands}.
@end table @end table
@subsection Examples @subsection Examples

View File

@ -25,6 +25,7 @@
#include <float.h> #include <float.h>
#include "libavutil/avstring.h"
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/float_dsp.h" #include "libavutil/float_dsp.h"
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
@ -298,9 +299,9 @@ static void draw_response(AVFilterContext *ctx, AVFrame *out)
if (!mag || !phase || !delay) if (!mag || !phase || !delay)
goto end; goto end;
channel = av_clip(s->ir_channel, 0, s->ir[0]->channels - 1); channel = av_clip(s->ir_channel, 0, s->ir[s->selir]->channels - 1);
for (i = 0; i < s->w; i++) { for (i = 0; i < s->w; i++) {
const float *src = (const float *)s->ir[0]->extended_data[channel]; const float *src = (const float *)s->ir[s->selir]->extended_data[channel];
double w = i * M_PI / (s->w - 1); double w = i * M_PI / (s->w - 1);
double div, real_num = 0., imag_num = 0., real = 0., imag = 0.; double div, real_num = 0., imag_num = 0., real = 0., imag = 0.;
@ -403,7 +404,7 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg,
seg->sum = ff_get_audio_buffer(ctx->inputs[0], seg->fft_length); seg->sum = ff_get_audio_buffer(ctx->inputs[0], seg->fft_length);
seg->block = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->block_size); seg->block = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->block_size);
seg->buffer = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); seg->buffer = ff_get_audio_buffer(ctx->inputs[0], seg->part_size);
seg->coeff = ff_get_audio_buffer(ctx->inputs[1], seg->nb_partitions * seg->coeff_size * 2); seg->coeff = ff_get_audio_buffer(ctx->inputs[1 + s->selir], seg->nb_partitions * seg->coeff_size * 2);
seg->input = ff_get_audio_buffer(ctx->inputs[0], seg->input_size); seg->input = ff_get_audio_buffer(ctx->inputs[0], seg->input_size);
seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size);
if (!seg->buffer || !seg->sum || !seg->block || !seg->coeff || !seg->input || !seg->output) if (!seg->buffer || !seg->sum || !seg->block || !seg->coeff || !seg->input || !seg->output)
@ -412,14 +413,47 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg,
return 0; return 0;
} }
static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg)
{
AudioFIRContext *s = ctx->priv;
if (seg->rdft) {
for (int ch = 0; ch < s->nb_channels; ch++) {
av_rdft_end(seg->rdft[ch]);
}
}
av_freep(&seg->rdft);
if (seg->irdft) {
for (int ch = 0; ch < s->nb_channels; ch++) {
av_rdft_end(seg->irdft[ch]);
}
}
av_freep(&seg->irdft);
av_freep(&seg->output_offset);
av_freep(&seg->part_index);
av_frame_free(&seg->block);
av_frame_free(&seg->sum);
av_frame_free(&seg->buffer);
av_frame_free(&seg->coeff);
av_frame_free(&seg->input);
av_frame_free(&seg->output);
seg->input_size = 0;
}
static int convert_coeffs(AVFilterContext *ctx) static int convert_coeffs(AVFilterContext *ctx)
{ {
AudioFIRContext *s = ctx->priv; AudioFIRContext *s = ctx->priv;
int left, offset = 0, part_size, max_part_size; int ret, i, ch, n, cur_nb_taps;
int ret, i, ch, n;
float power = 0; float power = 0;
s->nb_taps = ff_inlink_queued_samples(ctx->inputs[1]); if (!s->nb_taps) {
int part_size, max_part_size;
int left, offset = 0;
s->nb_taps = ff_inlink_queued_samples(ctx->inputs[1 + s->selir]);
if (s->nb_taps <= 0) if (s->nb_taps <= 0)
return AVERROR(EINVAL); return AVERROR(EINVAL);
@ -446,45 +480,49 @@ static int convert_coeffs(AVFilterContext *ctx)
part_size *= 2; part_size *= 2;
part_size = FFMIN(part_size, max_part_size); part_size = FFMIN(part_size, max_part_size);
} }
}
ret = ff_inlink_consume_samples(ctx->inputs[1], s->nb_taps, s->nb_taps, &s->ir[0]); if (!s->ir[s->selir]) {
ret = ff_inlink_consume_samples(ctx->inputs[1 + s->selir], s->nb_taps, s->nb_taps, &s->ir[s->selir]);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (ret == 0) if (ret == 0)
return AVERROR_BUG; return AVERROR_BUG;
}
if (s->response) if (s->response)
draw_response(ctx, s->video); draw_response(ctx, s->video);
s->gain = 1; s->gain = 1;
cur_nb_taps = s->ir[s->selir]->nb_samples;
switch (s->gtype) { switch (s->gtype) {
case -1: case -1:
/* nothing to do */ /* nothing to do */
break; break;
case 0: case 0:
for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) {
float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch];
for (i = 0; i < s->nb_taps; i++) for (i = 0; i < cur_nb_taps; i++)
power += FFABS(time[i]); power += FFABS(time[i]);
} }
s->gain = ctx->inputs[1]->channels / power; s->gain = ctx->inputs[1 + s->selir]->channels / power;
break; break;
case 1: case 1:
for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) {
float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch];
for (i = 0; i < s->nb_taps; i++) for (i = 0; i < cur_nb_taps; i++)
power += time[i]; power += time[i];
} }
s->gain = ctx->inputs[1]->channels / power; s->gain = ctx->inputs[1 + s->selir]->channels / power;
break; break;
case 2: case 2:
for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) {
float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch];
for (i = 0; i < s->nb_taps; i++) for (i = 0; i < cur_nb_taps; i++)
power += time[i] * time[i]; power += time[i] * time[i];
} }
s->gain = sqrtf(ch / power); s->gain = sqrtf(ch / power);
@ -495,17 +533,17 @@ static int convert_coeffs(AVFilterContext *ctx)
s->gain = FFMIN(s->gain * s->ir_gain, 1.f); s->gain = FFMIN(s->gain * s->ir_gain, 1.f);
av_log(ctx, AV_LOG_DEBUG, "power %f, gain %f\n", power, s->gain); av_log(ctx, AV_LOG_DEBUG, "power %f, gain %f\n", power, s->gain);
for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) {
float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch];
s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(s->nb_taps, 4)); s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(cur_nb_taps, 4));
} }
av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", s->nb_taps); av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", cur_nb_taps);
av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments); av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments);
for (ch = 0; ch < ctx->inputs[1]->channels; ch++) { for (ch = 0; ch < ctx->inputs[1 + s->selir]->channels; ch++) {
float *time = (float *)s->ir[0]->extended_data[!s->one2many * ch]; float *time = (float *)s->ir[s->selir]->extended_data[!s->one2many * ch];
int toffset = 0; int toffset = 0;
for (i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++) for (i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++)
@ -561,7 +599,6 @@ static int convert_coeffs(AVFilterContext *ctx)
} }
} }
av_frame_free(&s->ir[0]);
s->have_coeffs = 1; s->have_coeffs = 1;
return 0; return 0;
@ -594,26 +631,26 @@ static int activate(AVFilterContext *ctx)
FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx); FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
if (s->response) if (s->response)
FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[1], ctx); FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[1], ctx);
if (!s->eof_coeffs) { if (!s->eof_coeffs[s->selir]) {
AVFrame *ir = NULL; AVFrame *ir = NULL;
ret = check_ir(ctx->inputs[1], ir); ret = check_ir(ctx->inputs[1 + s->selir], ir);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (ff_outlink_get_status(ctx->inputs[1]) == AVERROR_EOF) if (ff_outlink_get_status(ctx->inputs[1 + s->selir]) == AVERROR_EOF)
s->eof_coeffs = 1; s->eof_coeffs[s->selir] = 1;
if (!s->eof_coeffs) { if (!s->eof_coeffs[s->selir]) {
if (ff_outlink_frame_wanted(ctx->outputs[0])) if (ff_outlink_frame_wanted(ctx->outputs[0]))
ff_inlink_request_frame(ctx->inputs[1]); ff_inlink_request_frame(ctx->inputs[1 + s->selir]);
else if (s->response && ff_outlink_frame_wanted(ctx->outputs[1])) else if (s->response && ff_outlink_frame_wanted(ctx->outputs[1]))
ff_inlink_request_frame(ctx->inputs[1]); ff_inlink_request_frame(ctx->inputs[1 + s->selir]);
return 0; return 0;
} }
} }
if (!s->have_coeffs && s->eof_coeffs) { if (!s->have_coeffs && s->eof_coeffs[s->selir]) {
ret = convert_coeffs(ctx); ret = convert_coeffs(ctx);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -709,9 +746,11 @@ static int query_formats(AVFilterContext *ctx)
return ret; return ret;
if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0) if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
return ret; return ret;
if ((ret = ff_channel_layouts_ref(mono, &ctx->inputs[1]->out_channel_layouts)) < 0) for (int i = 1; i < ctx->nb_inputs; i++) {
if ((ret = ff_channel_layouts_ref(mono, &ctx->inputs[i]->out_channel_layouts)) < 0)
return ret; return ret;
} }
}
formats = ff_make_format_list(sample_fmts); formats = ff_make_format_list(sample_fmts);
if ((ret = ff_set_common_formats(ctx, formats)) < 0) if ((ret = ff_set_common_formats(ctx, formats)) < 0)
@ -726,49 +765,19 @@ static int config_output(AVFilterLink *outlink)
AVFilterContext *ctx = outlink->src; AVFilterContext *ctx = outlink->src;
AudioFIRContext *s = ctx->priv; AudioFIRContext *s = ctx->priv;
s->one2many = ctx->inputs[1]->channels == 1; s->one2many = ctx->inputs[1 + s->selir]->channels == 1;
outlink->sample_rate = ctx->inputs[0]->sample_rate; outlink->sample_rate = ctx->inputs[0]->sample_rate;
outlink->time_base = ctx->inputs[0]->time_base; outlink->time_base = ctx->inputs[0]->time_base;
outlink->channel_layout = ctx->inputs[0]->channel_layout; outlink->channel_layout = ctx->inputs[0]->channel_layout;
outlink->channels = ctx->inputs[0]->channels; outlink->channels = ctx->inputs[0]->channels;
s->nb_channels = outlink->channels; s->nb_channels = outlink->channels;
s->nb_coef_channels = ctx->inputs[1]->channels; s->nb_coef_channels = ctx->inputs[1 + s->selir]->channels;
s->pts = AV_NOPTS_VALUE; s->pts = AV_NOPTS_VALUE;
return 0; return 0;
} }
static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg)
{
AudioFIRContext *s = ctx->priv;
if (seg->rdft) {
for (int ch = 0; ch < s->nb_channels; ch++) {
av_rdft_end(seg->rdft[ch]);
}
}
av_freep(&seg->rdft);
if (seg->irdft) {
for (int ch = 0; ch < s->nb_channels; ch++) {
av_rdft_end(seg->irdft[ch]);
}
}
av_freep(&seg->irdft);
av_freep(&seg->output_offset);
av_freep(&seg->part_index);
av_frame_free(&seg->block);
av_frame_free(&seg->sum);
av_frame_free(&seg->buffer);
av_frame_free(&seg->coeff);
av_frame_free(&seg->input);
av_frame_free(&seg->output);
seg->input_size = 0;
}
static av_cold void uninit(AVFilterContext *ctx) static av_cold void uninit(AVFilterContext *ctx)
{ {
AudioFIRContext *s = ctx->priv; AudioFIRContext *s = ctx->priv;
@ -778,7 +787,13 @@ static av_cold void uninit(AVFilterContext *ctx)
} }
av_freep(&s->fdsp); av_freep(&s->fdsp);
av_frame_free(&s->ir[0]);
for (int i = 0; i < s->nb_irs; i++) {
av_frame_free(&s->ir[i]);
}
for (int i = 0; i < ctx->nb_inputs; i++)
av_freep(&ctx->input_pads[i].name);
for (int i = 0; i < ctx->nb_outputs; i++) for (int i = 0; i < ctx->nb_outputs; i++)
av_freep(&ctx->output_pads[i].name); av_freep(&ctx->output_pads[i].name);
@ -818,6 +833,36 @@ static av_cold int init(AVFilterContext *ctx)
AVFilterPad pad, vpad; AVFilterPad pad, vpad;
int ret; int ret;
pad = (AVFilterPad) {
.name = av_strdup("main"),
.type = AVMEDIA_TYPE_AUDIO,
};
if (!pad.name)
return AVERROR(ENOMEM);
ret = ff_insert_inpad(ctx, 0, &pad);
if (ret < 0) {
av_freep(&pad.name);
return ret;
}
for (int n = 0; n < s->nb_irs; n++) {
pad = (AVFilterPad) {
.name = av_asprintf("ir%d", n),
.type = AVMEDIA_TYPE_AUDIO,
};
if (!pad.name)
return AVERROR(ENOMEM);
ret = ff_insert_inpad(ctx, n + 1, &pad);
if (ret < 0) {
av_freep(&pad.name);
return ret;
}
}
pad = (AVFilterPad) { pad = (AVFilterPad) {
.name = av_strdup("default"), .name = av_strdup("default"),
.type = AVMEDIA_TYPE_AUDIO, .type = AVMEDIA_TYPE_AUDIO,
@ -860,18 +905,31 @@ static av_cold int init(AVFilterContext *ctx)
return 0; return 0;
} }
static const AVFilterPad afir_inputs[] = { static int process_command(AVFilterContext *ctx,
const char *cmd,
const char *arg,
char *res,
int res_len,
int flags)
{ {
.name = "main", AudioFIRContext *s = ctx->priv;
.type = AVMEDIA_TYPE_AUDIO, int prev_ir = s->selir;
},{ int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
.name = "ir",
.type = AVMEDIA_TYPE_AUDIO, if (ret < 0)
}, return ret;
{ NULL }
}; s->selir = FFMIN(s->nb_irs - 1, s->selir);
if (prev_ir != s->selir) {
s->have_coeffs = 0;
}
return 0;
}
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
#define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
#define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM #define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
#define OFFSET(x) offsetof(AudioFIRContext, x) #define OFFSET(x) offsetof(AudioFIRContext, x)
@ -895,6 +953,8 @@ static const AVOption afir_options[] = {
{ "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT32_MAX, VF }, { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT32_MAX, VF },
{ "minp", "set min partition size", OFFSET(minp), AV_OPT_TYPE_INT, {.i64=8192}, 1, 32768, AF }, { "minp", "set min partition size", OFFSET(minp), AV_OPT_TYPE_INT, {.i64=8192}, 1, 32768, AF },
{ "maxp", "set max partition size", OFFSET(maxp), AV_OPT_TYPE_INT, {.i64=8192}, 8, 32768, AF }, { "maxp", "set max partition size", OFFSET(maxp), AV_OPT_TYPE_INT, {.i64=8192}, 8, 32768, AF },
{ "nbirs", "set number of input IRs",OFFSET(nb_irs),AV_OPT_TYPE_INT, {.i64=1}, 1, 32, AF },
{ "ir", "select IR", OFFSET(selir), AV_OPT_TYPE_INT, {.i64=0}, 0, 31, AFR },
{ NULL } { NULL }
}; };
@ -902,14 +962,15 @@ AVFILTER_DEFINE_CLASS(afir);
AVFilter ff_af_afir = { AVFilter ff_af_afir = {
.name = "afir", .name = "afir",
.description = NULL_IF_CONFIG_SMALL("Apply Finite Impulse Response filter with supplied coefficients in 2nd stream."), .description = NULL_IF_CONFIG_SMALL("Apply Finite Impulse Response filter with supplied coefficients in additional stream(s)."),
.priv_size = sizeof(AudioFIRContext), .priv_size = sizeof(AudioFIRContext),
.priv_class = &afir_class, .priv_class = &afir_class,
.query_formats = query_formats, .query_formats = query_formats,
.init = init, .init = init,
.activate = activate, .activate = activate,
.uninit = uninit, .uninit = uninit,
.inputs = afir_inputs, .process_command = process_command,
.flags = AVFILTER_FLAG_DYNAMIC_OUTPUTS | .flags = AVFILTER_FLAG_DYNAMIC_INPUTS |
AVFILTER_FLAG_DYNAMIC_OUTPUTS |
AVFILTER_FLAG_SLICE_THREADS, AVFILTER_FLAG_SLICE_THREADS,
}; };

View File

@ -74,10 +74,12 @@ typedef struct AudioFIRContext {
int ir_channel; int ir_channel;
int minp; int minp;
int maxp; int maxp;
int nb_irs;
int selir;
float gain; float gain;
int eof_coeffs; int eof_coeffs[32];
int have_coeffs; int have_coeffs;
int nb_taps; int nb_taps;
int nb_channels; int nb_channels;