avfilter/af_silenceremove: add peak detector

Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
Paul B Mahol 2015-12-27 20:45:46 +01:00
parent 3215342121
commit 1809894b2a
2 changed files with 77 additions and 33 deletions

View File

@ -2969,6 +2969,10 @@ at the beginning of each period of silence.
For example, if you want to remove long pauses between words but do not want
to remove the pauses completely. Default value is @code{0}.
@item detection
Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
and works better with digital silence which is exactly 0.
Default value is @code{rms}.
@end table
@subsection Examples

View File

@ -65,11 +65,15 @@ typedef struct SilenceRemoveContext {
double *window_current;
double *window_end;
int window_size;
double rms_sum;
double sum;
int leave_silence;
int restart;
int64_t next_pts;
int detection;
void (*update)(struct SilenceRemoveContext *s, double sample);
double(*compute)(struct SilenceRemoveContext *s, double sample);
} SilenceRemoveContext;
#define OFFSET(x) offsetof(SilenceRemoveContext, x)
@ -82,11 +86,58 @@ static const AVOption silenceremove_options[] = {
{ "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
{ "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
{ "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "detection", NULL, OFFSET(detection), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "detection" },
{ "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" },
{ "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" },
{ NULL }
};
AVFILTER_DEFINE_CLASS(silenceremove);
static double compute_peak(SilenceRemoveContext *s, double sample)
{
double new_sum;
new_sum = s->sum;
new_sum -= *s->window_current;
new_sum += fabs(sample);
return new_sum / s->window_size;
}
static void update_peak(SilenceRemoveContext *s, double sample)
{
s->sum -= *s->window_current;
*s->window_current = fabs(sample);
s->sum += *s->window_current;
s->window_current++;
if (s->window_current >= s->window_end)
s->window_current = s->window;
}
static double compute_rms(SilenceRemoveContext *s, double sample)
{
double new_sum;
new_sum = s->sum;
new_sum -= *s->window_current;
new_sum += sample * sample;
return sqrt(new_sum / s->window_size);
}
static void update_rms(SilenceRemoveContext *s, double sample)
{
s->sum -= *s->window_current;
*s->window_current = sample * sample;
s->sum += *s->window_current;
s->window_current++;
if (s->window_current >= s->window_end)
s->window_current = s->window;
}
static av_cold int init(AVFilterContext *ctx)
{
SilenceRemoveContext *s = ctx->priv;
@ -96,16 +147,27 @@ static av_cold int init(AVFilterContext *ctx)
s->restart = 1;
}
switch (s->detection) {
case 0:
s->update = update_peak;
s->compute = compute_peak;
break;
case 1:
s->update = update_rms;
s->compute = compute_rms;
break;
};
return 0;
}
static void clear_rms(SilenceRemoveContext *s)
static void clear_window(SilenceRemoveContext *s)
{
memset(s->window, 0, s->window_size * sizeof(*s->window));
s->window_current = s->window;
s->window_end = s->window + s->window_size;
s->rms_sum = 0;
s->sum = 0;
}
static int config_input(AVFilterLink *inlink)
@ -118,7 +180,7 @@ static int config_input(AVFilterLink *inlink)
if (!s->window)
return AVERROR(ENOMEM);
clear_rms(s);
clear_window(s);
s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
AV_TIME_BASE);
@ -153,28 +215,6 @@ static int config_input(AVFilterLink *inlink)
return 0;
}
static double compute_rms(SilenceRemoveContext *s, double sample)
{
double new_sum;
new_sum = s->rms_sum;
new_sum -= *s->window_current;
new_sum += sample * sample;
return sqrt(new_sum / s->window_size);
}
static void update_rms(SilenceRemoveContext *s, double sample)
{
s->rms_sum -= *s->window_current;
*s->window_current = sample * sample;
s->rms_sum += *s->window_current;
s->window_current++;
if (s->window_current >= s->window_end)
s->window_current = s->window;
}
static void flush(AVFrame *out, AVFilterLink *outlink,
int *nb_samples_written, int *ret)
{
@ -209,12 +249,12 @@ silence_trim:
for (i = 0; i < nbs; i++) {
threshold = 0;
for (j = 0; j < inlink->channels; j++) {
threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
}
if (threshold) {
for (j = 0; j < inlink->channels; j++) {
update_rms(s, *ibuf);
s->update(s, *ibuf);
s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
nb_samples_read++;
}
@ -232,7 +272,7 @@ silence_trim:
s->start_holdoff_end = 0;
for (j = 0; j < inlink->channels; j++)
update_rms(s, ibuf[j]);
s->update(s, ibuf[j]);
ibuf += inlink->channels;
nb_samples_read += inlink->channels;
@ -284,7 +324,7 @@ silence_copy:
for (i = 0; i < nbs; i++) {
threshold = 1;
for (j = 0; j < inlink->channels; j++)
threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
if (threshold && s->stop_holdoff_end && !s->leave_silence) {
s->mode = SILENCE_COPY_FLUSH;
@ -292,14 +332,14 @@ silence_copy:
goto silence_copy_flush;
} else if (threshold) {
for (j = 0; j < inlink->channels; j++) {
update_rms(s, *ibuf);
s->update(s, *ibuf);
*obuf++ = *ibuf++;
nb_samples_read++;
nb_samples_written++;
}
} else if (!threshold) {
for (j = 0; j < inlink->channels; j++) {
update_rms(s, *ibuf);
s->update(s, *ibuf);
if (s->leave_silence) {
*obuf++ = *ibuf;
nb_samples_written++;
@ -323,7 +363,7 @@ silence_copy:
s->start_found_periods = 0;
s->start_holdoff_offset = 0;
s->start_holdoff_end = 0;
clear_rms(s);
clear_window(s);
s->mode = SILENCE_TRIM;
flush(out, outlink, &nb_samples_written, &ret);
goto silence_trim;