avfilter/af_dynaudnorm: add support for commands

This commit is contained in:
Paul B Mahol 2020-01-05 12:09:01 +01:00
parent 1e3f4b5f19
commit 27ec72db06
2 changed files with 102 additions and 62 deletions

View File

@ -3448,6 +3448,10 @@ to 0, which means all input frames will be normalized.
This option is mostly useful if digital noise is not wanted to be amplified. This option is mostly useful if digital noise is not wanted to be amplified.
@end table @end table
@subsection Commands
This filter supports the all above options as @ref{commands}.
@section earwax @section earwax
Make audio easier to listen to on headphones. Make audio easier to listen to on headphones.

View File

@ -29,7 +29,10 @@
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#define FF_BUFQUEUE_SIZE 302 #define MIN_FILTER_SIZE 3
#define MAX_FILTER_SIZE 301
#define FF_BUFQUEUE_SIZE (MAX_FILTER_SIZE + 1)
#include "libavfilter/bufferqueue.h" #include "libavfilter/bufferqueue.h"
#include "audio.h" #include "audio.h"
@ -45,8 +48,8 @@ typedef struct local_gain {
typedef struct cqueue { typedef struct cqueue {
double *elements; double *elements;
int size; int size;
int max_size;
int nb_elements; int nb_elements;
int first;
} cqueue; } cqueue;
typedef struct DynamicAudioNormalizerContext { typedef struct DynamicAudioNormalizerContext {
@ -69,7 +72,6 @@ typedef struct DynamicAudioNormalizerContext {
double *prev_amplification_factor; double *prev_amplification_factor;
double *dc_correction_value; double *dc_correction_value;
double *compress_threshold; double *compress_threshold;
double *fade_factors[2];
double *weights; double *weights;
int channels; int channels;
@ -85,7 +87,7 @@ typedef struct DynamicAudioNormalizerContext {
} DynamicAudioNormalizerContext; } DynamicAudioNormalizerContext;
#define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x) #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
static const AVOption dynaudnorm_options[] = { static const AVOption dynaudnorm_options[] = {
{ "framelen", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS }, { "framelen", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS },
@ -161,30 +163,22 @@ static inline int frame_size(int sample_rate, int frame_len_msec)
return frame_size + (frame_size % 2); return frame_size + (frame_size % 2);
} }
static void precalculate_fade_factors(double *fade_factors[2], int frame_len) static cqueue *cqueue_create(int size, int max_size)
{
const double step_size = 1.0 / frame_len;
int pos;
for (pos = 0; pos < frame_len; pos++) {
fade_factors[0][pos] = 1.0 - (step_size * (pos + 1.0));
fade_factors[1][pos] = 1.0 - fade_factors[0][pos];
}
}
static cqueue *cqueue_create(int size)
{ {
cqueue *q; cqueue *q;
if (max_size < size)
return NULL;
q = av_malloc(sizeof(cqueue)); q = av_malloc(sizeof(cqueue));
if (!q) if (!q)
return NULL; return NULL;
q->max_size = max_size;
q->size = size; q->size = size;
q->nb_elements = 0; q->nb_elements = 0;
q->first = 0;
q->elements = av_malloc_array(size, sizeof(double)); q->elements = av_malloc_array(max_size, sizeof(double));
if (!q->elements) { if (!q->elements) {
av_free(q); av_free(q);
return NULL; return NULL;
@ -207,17 +201,14 @@ static int cqueue_size(cqueue *q)
static int cqueue_empty(cqueue *q) static int cqueue_empty(cqueue *q)
{ {
return !q->nb_elements; return q->nb_elements <= 0;
} }
static int cqueue_enqueue(cqueue *q, double element) static int cqueue_enqueue(cqueue *q, double element)
{ {
int i; av_assert2(q->nb_elements < q->max_size);
av_assert2(q->nb_elements != q->size); q->elements[q->nb_elements] = element;
i = (q->first + q->nb_elements) % q->size;
q->elements[i] = element;
q->nb_elements++; q->nb_elements++;
return 0; return 0;
@ -226,15 +217,15 @@ static int cqueue_enqueue(cqueue *q, double element)
static double cqueue_peek(cqueue *q, int index) static double cqueue_peek(cqueue *q, int index)
{ {
av_assert2(index < q->nb_elements); av_assert2(index < q->nb_elements);
return q->elements[(q->first + index) % q->size]; return q->elements[index];
} }
static int cqueue_dequeue(cqueue *q, double *element) static int cqueue_dequeue(cqueue *q, double *element)
{ {
av_assert2(!cqueue_empty(q)); av_assert2(!cqueue_empty(q));
*element = q->elements[q->first]; *element = q->elements[0];
q->first = (q->first + 1) % q->size; memmove(&q->elements[0], &q->elements[1], (q->nb_elements - 1) * sizeof(double));
q->nb_elements--; q->nb_elements--;
return 0; return 0;
@ -244,12 +235,34 @@ static int cqueue_pop(cqueue *q)
{ {
av_assert2(!cqueue_empty(q)); av_assert2(!cqueue_empty(q));
q->first = (q->first + 1) % q->size; memmove(&q->elements[0], &q->elements[1], (q->nb_elements - 1) * sizeof(double));
q->nb_elements--; q->nb_elements--;
return 0; return 0;
} }
static void cqueue_resize(cqueue *q, int new_size)
{
av_assert2(q->max_size >= new_size);
av_assert2(MIN_FILTER_SIZE <= new_size);
if (new_size > q->nb_elements) {
const int side = (new_size - q->nb_elements) / 2;
memmove(q->elements + side, q->elements, sizeof(double) * q->nb_elements);
for (int i = 0; i < side; i++)
q->elements[i] = q->elements[side];
q->nb_elements = new_size - 1 - side;
} else {
int count = (q->size - new_size + 1) / 2;
while (count-- > 0)
cqueue_pop(q);
}
q->size = new_size;
}
static void init_gaussian_filter(DynamicAudioNormalizerContext *s) static void init_gaussian_filter(DynamicAudioNormalizerContext *s)
{ {
double total_weight = 0.0; double total_weight = 0.0;
@ -285,8 +298,6 @@ static av_cold void uninit(AVFilterContext *ctx)
av_freep(&s->prev_amplification_factor); av_freep(&s->prev_amplification_factor);
av_freep(&s->dc_correction_value); av_freep(&s->dc_correction_value);
av_freep(&s->compress_threshold); av_freep(&s->compress_threshold);
av_freep(&s->fade_factors[0]);
av_freep(&s->fade_factors[1]);
for (c = 0; c < s->channels; c++) { for (c = 0; c < s->channels; c++) {
if (s->gain_history_original) if (s->gain_history_original)
@ -324,9 +335,6 @@ static int config_input(AVFilterLink *inlink)
s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec); s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec);
av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len); av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len);
s->fade_factors[0] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[0]));
s->fade_factors[1] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[1]));
s->prev_amplification_factor = av_malloc_array(inlink->channels, sizeof(*s->prev_amplification_factor)); s->prev_amplification_factor = av_malloc_array(inlink->channels, sizeof(*s->prev_amplification_factor));
s->dc_correction_value = av_calloc(inlink->channels, sizeof(*s->dc_correction_value)); s->dc_correction_value = av_calloc(inlink->channels, sizeof(*s->dc_correction_value));
s->compress_threshold = av_calloc(inlink->channels, sizeof(*s->compress_threshold)); s->compress_threshold = av_calloc(inlink->channels, sizeof(*s->compress_threshold));
@ -334,10 +342,10 @@ static int config_input(AVFilterLink *inlink)
s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum)); s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum));
s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed)); s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed));
s->threshold_history = av_calloc(inlink->channels, sizeof(*s->threshold_history)); s->threshold_history = av_calloc(inlink->channels, sizeof(*s->threshold_history));
s->weights = av_malloc_array(s->filter_size, sizeof(*s->weights)); s->weights = av_malloc_array(MAX_FILTER_SIZE, sizeof(*s->weights));
s->is_enabled = cqueue_create(s->filter_size); s->is_enabled = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
if (!s->prev_amplification_factor || !s->dc_correction_value || if (!s->prev_amplification_factor || !s->dc_correction_value ||
!s->compress_threshold || !s->fade_factors[0] || !s->fade_factors[1] || !s->compress_threshold ||
!s->gain_history_original || !s->gain_history_minimum || !s->gain_history_original || !s->gain_history_minimum ||
!s->gain_history_smoothed || !s->threshold_history || !s->gain_history_smoothed || !s->threshold_history ||
!s->is_enabled || !s->weights) !s->is_enabled || !s->weights)
@ -346,26 +354,27 @@ static int config_input(AVFilterLink *inlink)
for (c = 0; c < inlink->channels; c++) { for (c = 0; c < inlink->channels; c++) {
s->prev_amplification_factor[c] = 1.0; s->prev_amplification_factor[c] = 1.0;
s->gain_history_original[c] = cqueue_create(s->filter_size); s->gain_history_original[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
s->gain_history_minimum[c] = cqueue_create(s->filter_size); s->gain_history_minimum[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
s->gain_history_smoothed[c] = cqueue_create(s->filter_size); s->gain_history_smoothed[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
s->threshold_history[c] = cqueue_create(s->filter_size); s->threshold_history[c] = cqueue_create(s->filter_size, MAX_FILTER_SIZE);
if (!s->gain_history_original[c] || !s->gain_history_minimum[c] || if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
!s->gain_history_smoothed[c] || !s->threshold_history[c]) !s->gain_history_smoothed[c] || !s->threshold_history[c])
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
precalculate_fade_factors(s->fade_factors, s->frame_len);
init_gaussian_filter(s); init_gaussian_filter(s);
return 0; return 0;
} }
static inline double fade(double prev, double next, int pos, static inline double fade(double prev, double next, int pos, int length)
double *fade_factors[2])
{ {
return fade_factors[0][pos] * prev + fade_factors[1][pos] * next; const double step_size = 1.0 / length;
const double f0 = 1.0 - (step_size * (pos + 1.0));
const double f1 = 1.0 - f0;
return f0 * prev + f1 * next;
} }
static inline double pow_2(const double value) static inline double pow_2(const double value)
@ -473,8 +482,7 @@ static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q, cqueu
static void update_gain_history(DynamicAudioNormalizerContext *s, int channel, static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
local_gain gain) local_gain gain)
{ {
if (cqueue_empty(s->gain_history_original[channel]) || if (cqueue_empty(s->gain_history_original[channel])) {
cqueue_empty(s->gain_history_minimum[channel])) {
const int pre_fill_size = s->filter_size / 2; const int pre_fill_size = s->filter_size / 2;
const double initial_value = s->alt_boundary_mode ? gain.max_gain : s->peak_value; const double initial_value = s->alt_boundary_mode ? gain.max_gain : s->peak_value;
@ -487,11 +495,9 @@ static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
} }
cqueue_enqueue(s->gain_history_original[channel], gain.max_gain); cqueue_enqueue(s->gain_history_original[channel], gain.max_gain);
cqueue_enqueue(s->threshold_history[channel], gain.threshold);
while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) { while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
double minimum; double minimum;
av_assert0(cqueue_size(s->gain_history_original[channel]) == s->filter_size);
if (cqueue_empty(s->gain_history_minimum[channel])) { if (cqueue_empty(s->gain_history_minimum[channel])) {
const int pre_fill_size = s->filter_size / 2; const int pre_fill_size = s->filter_size / 2;
@ -509,12 +515,14 @@ static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
cqueue_enqueue(s->gain_history_minimum[channel], minimum); cqueue_enqueue(s->gain_history_minimum[channel], minimum);
cqueue_enqueue(s->threshold_history[channel], gain.threshold);
cqueue_pop(s->gain_history_original[channel]); cqueue_pop(s->gain_history_original[channel]);
} }
while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) { while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
double smoothed; double smoothed;
av_assert0(cqueue_size(s->gain_history_minimum[channel]) == s->filter_size);
smoothed = gaussian_filter(s, s->gain_history_minimum[channel], s->threshold_history[channel]); smoothed = gaussian_filter(s, s->gain_history_minimum[channel], s->threshold_history[channel]);
smoothed = FFMIN(smoothed, cqueue_peek(s->gain_history_minimum[channel], s->filter_size / 2)); smoothed = FFMIN(smoothed, cqueue_peek(s->gain_history_minimum[channel], s->filter_size / 2));
@ -549,7 +557,7 @@ static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *fra
s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1); s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1);
for (i = 0; i < frame->nb_samples; i++) { for (i = 0; i < frame->nb_samples; i++) {
dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, s->fade_factors); dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, frame->nb_samples);
} }
} }
} }
@ -622,7 +630,7 @@ static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame
for (c = 0; c < s->channels; c++) { for (c = 0; c < s->channels; c++) {
double *const dst_ptr = (double *)frame->extended_data[c]; double *const dst_ptr = (double *)frame->extended_data[c];
for (i = 0; i < frame->nb_samples; i++) { for (i = 0; i < frame->nb_samples; i++) {
const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors); const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, frame->nb_samples);
dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]); dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
} }
} }
@ -641,7 +649,7 @@ static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame
dst_ptr = (double *)frame->extended_data[c]; dst_ptr = (double *)frame->extended_data[c];
for (i = 0; i < frame->nb_samples; i++) { for (i = 0; i < frame->nb_samples; i++) {
const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors); const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, frame->nb_samples);
dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]); dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
} }
} }
@ -685,12 +693,9 @@ static void amplify_frame(DynamicAudioNormalizerContext *s, AVFrame *frame, int
for (i = 0; i < frame->nb_samples && enabled; i++) { for (i = 0; i < frame->nb_samples && enabled; i++) {
const double amplification_factor = fade(s->prev_amplification_factor[c], const double amplification_factor = fade(s->prev_amplification_factor[c],
current_amplification_factor, i, current_amplification_factor, i,
s->fade_factors); frame->nb_samples);
dst_ptr[i] *= amplification_factor; dst_ptr[i] *= amplification_factor;
if (fabs(dst_ptr[i]) > s->peak_value)
dst_ptr[i] = copysign(s->peak_value, dst_ptr[i]);
} }
s->prev_amplification_factor[c] = current_amplification_factor; s->prev_amplification_factor[c] = current_amplification_factor;
@ -704,9 +709,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
AVFilterLink *outlink = ctx->outputs[0]; AVFilterLink *outlink = ctx->outputs[0];
int ret = 1; int ret = 1;
if (!cqueue_empty(s->gain_history_smoothed[0])) { while (((s->queue.available >= s->filter_size) ||
double is_enabled; (s->eof && s->queue.available)) &&
!cqueue_empty(s->gain_history_smoothed[0])) {
AVFrame *out = ff_bufqueue_get(&s->queue); AVFrame *out = ff_bufqueue_get(&s->queue);
double is_enabled;
cqueue_dequeue(s->is_enabled, &is_enabled); cqueue_dequeue(s->is_enabled, &is_enabled);
@ -715,13 +722,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
} }
av_frame_make_writable(in); av_frame_make_writable(in);
if (!s->eof)
cqueue_enqueue(s->is_enabled, !ctx->is_disabled);
analyze_frame(s, in); analyze_frame(s, in);
if (!s->eof) if (!s->eof) {
ff_bufqueue_add(ctx, &s->queue, in); ff_bufqueue_add(ctx, &s->queue, in);
else cqueue_enqueue(s->is_enabled, !ctx->is_disabled);
} else {
av_frame_free(&in); av_frame_free(&in);
}
return ret; return ret;
} }
@ -814,6 +821,34 @@ static int activate(AVFilterContext *ctx)
return FFERROR_NOT_READY; return FFERROR_NOT_READY;
} }
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
char *res, int res_len, int flags)
{
DynamicAudioNormalizerContext *s = ctx->priv;
AVFilterLink *inlink = ctx->inputs[0];
int prev_filter_size = s->filter_size;
int ret;
ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
if (ret < 0)
return ret;
s->filter_size |= 1;
if (prev_filter_size != s->filter_size) {
init_gaussian_filter(s);
for (int c = 0; c < s->channels; c++) {
cqueue_resize(s->gain_history_original[c], s->filter_size);
cqueue_resize(s->gain_history_minimum[c], s->filter_size);
cqueue_resize(s->threshold_history[c], s->filter_size);
}
}
s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec);
return 0;
}
static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = { static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = {
{ {
.name = "default", .name = "default",
@ -843,4 +878,5 @@ AVFilter ff_af_dynaudnorm = {
.outputs = avfilter_af_dynaudnorm_outputs, .outputs = avfilter_af_dynaudnorm_outputs,
.priv_class = &dynaudnorm_class, .priv_class = &dynaudnorm_class,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
.process_command = process_command,
}; };