diff --git a/libavfilter/vf_bilateral.c b/libavfilter/vf_bilateral.c index b236efe6c3..41dc38d6f2 100644 --- a/libavfilter/vf_bilateral.c +++ b/libavfilter/vf_bilateral.c @@ -36,6 +36,7 @@ typedef struct BilateralContext { float sigmaR; int planes; + int nb_threads; int nb_planes; int depth; int planewidth[4]; @@ -44,14 +45,14 @@ typedef struct BilateralContext { float alpha; float range_table[65536]; - float *img_out_f; - float *img_temp; - float *map_factor_a; - float *map_factor_b; - float *slice_factor_a; - float *slice_factor_b; - float *line_factor_a; - float *line_factor_b; + float *img_out_f[4]; + float *img_temp[4]; + float *map_factor_a[4]; + float *map_factor_b[4]; + float *slice_factor_a[4]; + float *slice_factor_b[4]; + float *line_factor_a[4]; + float *line_factor_b[4]; } BilateralContext; #define OFFSET(x) offsetof(BilateralContext, x) @@ -102,6 +103,10 @@ static int config_params(AVFilterContext *ctx) return 0; } +typedef struct ThreadData { + AVFrame *in, *out; +} ThreadData; + static int config_input(AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; @@ -109,7 +114,6 @@ static int config_input(AVFilterLink *inlink) const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); s->depth = desc->comp[0].depth; - config_params(ctx); s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); @@ -118,133 +122,169 @@ static int config_input(AVFilterLink *inlink) s->planeheight[0] = s->planeheight[3] = inlink->h; s->nb_planes = av_pix_fmt_count_planes(inlink->format); + s->nb_threads = ff_filter_get_nb_threads(ctx); - s->img_out_f = av_calloc(inlink->w * inlink->h, sizeof(float)); - s->img_temp = av_calloc(inlink->w * inlink->h, sizeof(float)); - s->map_factor_a = av_calloc(inlink->w * inlink->h, sizeof(float)); - s->map_factor_b = av_calloc(inlink->w * inlink->h, sizeof(float)); - s->slice_factor_a = av_calloc(inlink->w, sizeof(float)); - s->slice_factor_b = av_calloc(inlink->w, sizeof(float)); - s->line_factor_a = av_calloc(inlink->w, sizeof(float)); - s->line_factor_b = av_calloc(inlink->w, sizeof(float)); + for (int p = 0; p < s->nb_planes; p++) { + const int w = s->planewidth[p]; + const int h = s->planeheight[p]; - if (!s->img_out_f || - !s->img_temp || - !s->map_factor_a || - !s->map_factor_b || - !s->slice_factor_a || - !s->slice_factor_a || - !s->line_factor_a || - !s->line_factor_a) - return AVERROR(ENOMEM); + s->img_out_f[p] = av_calloc(w * h, sizeof(float)); + s->img_temp[p] = av_calloc(w * h, sizeof(float)); + s->map_factor_a[p] = av_calloc(w * h, sizeof(float)); + s->map_factor_b[p] = av_calloc(w * h, sizeof(float)); + s->slice_factor_a[p] = av_calloc(w, sizeof(float)); + s->slice_factor_b[p] = av_calloc(w, sizeof(float)); + s->line_factor_a[p] = av_calloc(w, sizeof(float)); + s->line_factor_b[p] = av_calloc(w, sizeof(float)); + + if (!s->img_out_f[p] || + !s->img_temp[p] || + !s->map_factor_a[p] || + !s->map_factor_b[p] || + !s->slice_factor_a[p] || + !s->slice_factor_a[p] || + !s->line_factor_a[p] || + !s->line_factor_a[p]) + return AVERROR(ENOMEM); + } return 0; } -#define BILATERAL(type, name) \ -static void bilateral_##name(BilateralContext *s, const uint8_t *ssrc, uint8_t *ddst, \ - int width, int height, int src_linesize, int dst_linesize) \ -{ \ - type *dst = (type *)ddst; \ - const type *src = (const type *)ssrc; \ - float *img_out_f = s->img_out_f, *img_temp = s->img_temp; \ - float *map_factor_a = s->map_factor_a, *map_factor_b = s->map_factor_b; \ - float *slice_factor_a = s->slice_factor_a, *slice_factor_b = s->slice_factor_b; \ - float *line_factor_a = s->line_factor_a, *line_factor_b = s->line_factor_b; \ - const float *range_table = s->range_table; \ - const float alpha = s->alpha; \ - float ypr, ycr, *ycy, *ypy, *xcy, fp, fc; \ - const float inv_alpha_ = 1.f - alpha; \ - float *ycf, *ypf, *xcf, *in_factor; \ - const type *tcy, *tpy; \ - int h1; \ - \ - for (int y = 0; y < height; y++) { \ - float *temp_factor_x, *temp_x = &img_temp[y * width]; \ - const type *in_x = &src[y * src_linesize]; \ - const type *texture_x = &src[y * src_linesize]; \ - type tpr; \ - \ - *temp_x++ = ypr = *in_x++; \ - tpr = *texture_x++; \ - \ - temp_factor_x = &map_factor_a[y * width]; \ - *temp_factor_x++ = fp = 1; \ - \ - for (int x = 1; x < width; x++) { \ - float alpha_; \ - int range_dist; \ - type tcr = *texture_x++; \ - type dr = abs(tcr - tpr); \ - \ - range_dist = dr; \ - alpha_ = range_table[range_dist]; \ - *temp_x++ = ycr = inv_alpha_*(*in_x++) + alpha_*ypr; \ - tpr = tcr; \ - ypr = ycr; \ - *temp_factor_x++ = fc = inv_alpha_ + alpha_ * fp; \ - fp = fc; \ - } \ - --temp_x; *temp_x = ((*temp_x) + (*--in_x)); \ - tpr = *--texture_x; \ - ypr = *in_x; \ - \ - --temp_factor_x; *temp_factor_x = ((*temp_factor_x) + 1); \ - fp = 1; \ - \ - for (int x = width - 2; x >= 0; x--) { \ - type tcr = *--texture_x; \ - type dr = abs(tcr - tpr); \ - int range_dist = dr; \ - float alpha_ = range_table[range_dist]; \ - \ - ycr = inv_alpha_ * (*--in_x) + alpha_ * ypr; \ - --temp_x; *temp_x = ((*temp_x) + ycr); \ - tpr = tcr; \ - ypr = ycr; \ - \ - fc = inv_alpha_ + alpha_*fp; \ - --temp_factor_x; \ - *temp_factor_x = ((*temp_factor_x) + fc); \ - fp = fc; \ - } \ - } \ - memcpy(img_out_f, img_temp, sizeof(float) * width); \ - \ - in_factor = map_factor_a; \ - memcpy(map_factor_b, in_factor, sizeof(float) * width); \ - for (int y = 1; y < height; y++) { \ - tpy = &src[(y - 1) * src_linesize]; \ - tcy = &src[y * src_linesize]; \ - xcy = &img_temp[y * width]; \ - ypy = &img_out_f[(y - 1) * width]; \ - ycy = &img_out_f[y * width]; \ - \ - xcf = &in_factor[y * width]; \ - ypf = &map_factor_b[(y - 1) * width]; \ - ycf = &map_factor_b[y * width]; \ - for (int x = 0; x < width; x++) { \ - type dr = abs((*tcy++) - (*tpy++)); \ - int range_dist = dr; \ - float alpha_ = range_table[range_dist]; \ - \ - *ycy++ = inv_alpha_*(*xcy++) + alpha_*(*ypy++); \ - *ycf++ = inv_alpha_*(*xcf++) + alpha_*(*ypf++); \ - } \ - } \ - h1 = height - 1; \ - ycf = line_factor_a; \ - ypf = line_factor_b; \ - memcpy(ypf, &in_factor[h1 * width], sizeof(float) * width); \ - for (int x = 0; x < width; x++) \ - map_factor_b[h1 * width + x] = (map_factor_b[h1 * width + x] + ypf[x]); \ - \ - ycy = slice_factor_a; \ - ypy = slice_factor_b; \ - memcpy(ypy, &img_temp[h1 * width], sizeof(float) * width); \ - for (int x = 0, k = 0; x < width; x++) { \ - int idx = h1 * width + x; \ - img_out_f[idx] = (img_out_f[idx] + ypy[k++]) / map_factor_b[h1 * width + x]; \ +#define BILATERAL_H(type, name) \ +static void bilateralh_##name(BilateralContext *s, AVFrame *out, AVFrame *in, \ + int jobnr, int nb_jobs, int plane) \ +{ \ + const int width = s->planewidth[plane]; \ + const int height = s->planeheight[plane]; \ + const int slice_start = (height * jobnr) / nb_jobs; \ + const int slice_end = (height * (jobnr+1)) / nb_jobs; \ + const int src_linesize = in->linesize[plane] / sizeof(type); \ + const type *src = (const type *)in->data[plane]; \ + float *img_temp = s->img_temp[plane]; \ + float *map_factor_a = s->map_factor_a[plane]; \ + const float *const range_table = s->range_table; \ + const float alpha = s->alpha; \ + float ypr, ycr, fp, fc; \ + const float inv_alpha_ = 1.f - alpha; \ + \ + for (int y = slice_start; y < slice_end; y++) { \ + float *temp_factor_x, *temp_x = &img_temp[y * width]; \ + const type *in_x = &src[y * src_linesize]; \ + const type *texture_x = &src[y * src_linesize]; \ + type tpr; \ + \ + *temp_x++ = ypr = *in_x++; \ + tpr = *texture_x++; \ + \ + temp_factor_x = &map_factor_a[y * width]; \ + *temp_factor_x++ = fp = 1; \ + \ + for (int x = 1; x < width; x++) { \ + float alpha_; \ + int range_dist; \ + type tcr = *texture_x++; \ + type dr = abs(tcr - tpr); \ + \ + range_dist = dr; \ + alpha_ = range_table[range_dist]; \ + *temp_x++ = ycr = inv_alpha_*(*in_x++) + alpha_*ypr; \ + tpr = tcr; \ + ypr = ycr; \ + *temp_factor_x++ = fc = inv_alpha_ + alpha_ * fp; \ + fp = fc; \ + } \ + --temp_x; *temp_x = ((*temp_x) + (*--in_x)); \ + tpr = *--texture_x; \ + ypr = *in_x; \ + \ + --temp_factor_x; *temp_factor_x = ((*temp_factor_x) + 1); \ + fp = 1; \ + \ + for (int x = width - 2; x >= 0; x--) { \ + type tcr = *--texture_x; \ + type dr = abs(tcr - tpr); \ + int range_dist = dr; \ + float alpha_ = range_table[range_dist]; \ + \ + ycr = inv_alpha_ * (*--in_x) + alpha_ * ypr; \ + --temp_x; *temp_x = ((*temp_x) + ycr); \ + tpr = tcr; \ + ypr = ycr; \ + \ + fc = inv_alpha_ + alpha_*fp; \ + --temp_factor_x; \ + *temp_factor_x = ((*temp_factor_x) + fc); \ + fp = fc; \ + } \ + } \ +} + +BILATERAL_H(uint8_t, byte) +BILATERAL_H(uint16_t, word) + +#define BILATERAL_V(type, name) \ +static void bilateralv_##name(BilateralContext *s, AVFrame *out, AVFrame *in, \ + int jobnr, int nb_jobs, int plane) \ +{ \ + const int width = s->planewidth[plane]; \ + const int height = s->planeheight[plane]; \ + const int slice_start = (width * jobnr) / nb_jobs; \ + const int slice_end = (width * (jobnr+1)) / nb_jobs; \ + const int src_linesize = in->linesize[plane] / sizeof(type); \ + const type *src = (const type *)in->data[plane] + slice_start; \ + float *img_out_f = s->img_out_f[plane] + slice_start; \ + float *img_temp = s->img_temp[plane] + slice_start; \ + float *map_factor_a = s->map_factor_a[plane] + slice_start; \ + float *map_factor_b = s->map_factor_b[plane] + slice_start; \ + float *slice_factor_a = s->slice_factor_a[plane] + slice_start; \ + float *slice_factor_b = s->slice_factor_b[plane] + slice_start; \ + float *line_factor_a = s->line_factor_a[plane] + slice_start; \ + float *line_factor_b = s->line_factor_b[plane] + slice_start; \ + const float *const range_table = s->range_table; \ + const float alpha = s->alpha; \ + float *ycy, *ypy, *xcy; \ + const float inv_alpha_ = 1.f - alpha; \ + float *ycf, *ypf, *xcf, *in_factor; \ + const type *tcy, *tpy; \ + int h1; \ + \ + memcpy(img_out_f, img_temp, sizeof(float) * (slice_end - slice_start)); \ + \ + in_factor = map_factor_a; \ + memcpy(map_factor_b, in_factor, sizeof(float) * (slice_end - slice_start)); \ + for (int y = 1; y < height; y++) { \ + tpy = &src[(y - 1) * src_linesize]; \ + tcy = &src[y * src_linesize]; \ + xcy = &img_temp[y * width]; \ + ypy = &img_out_f[(y - 1) * width]; \ + ycy = &img_out_f[y * width]; \ + \ + xcf = &in_factor[y * width]; \ + ypf = &map_factor_b[(y - 1) * width]; \ + ycf = &map_factor_b[y * width]; \ + for (int x = 0; x < slice_end - slice_start; x++) { \ + type dr = abs((*tcy++) - (*tpy++)); \ + int range_dist = dr; \ + float alpha_ = range_table[range_dist]; \ + \ + *ycy++ = inv_alpha_*(*xcy++) + alpha_*(*ypy++); \ + *ycf++ = inv_alpha_*(*xcf++) + alpha_*(*ypf++); \ + } \ + } \ + h1 = height - 1; \ + ycf = line_factor_a; \ + ypf = line_factor_b; \ + memcpy(ypf, &in_factor[h1 * width], sizeof(float) * (slice_end - slice_start)); \ + for (int x = 0, k = 0; x < slice_end - slice_start; x++) \ + map_factor_b[h1 * width + x] = (map_factor_b[h1 * width + x] + ypf[k++]); \ + \ + ycy = slice_factor_a; \ + ypy = slice_factor_b; \ + memcpy(ypy, &img_temp[h1 * width], sizeof(float) * (slice_end - slice_start)); \ + for (int x = 0, k = 0; x < slice_end - slice_start; x++) { \ + int idx = h1 * width + x; \ + img_out_f[idx] = (img_out_f[idx] + ypy[k++]) / map_factor_b[h1 * width + x]; \ } \ \ for (int y = h1 - 1; y >= 0; y--) { \ @@ -262,7 +302,7 @@ static void bilateral_##name(BilateralContext *s, const uint8_t *ssrc, uint8_t * ycf_ = ycf; \ ypf_ = ypf; \ factor_ = &map_factor_b[y * width]; \ - for (int x = 0; x < width; x++) { \ + for (int x = 0; x < slice_end - slice_start; x++) { \ type dr = abs((*tcy++) - (*tpy++)); \ int range_dist = dr; \ float alpha_ = range_table[range_dist]; \ @@ -281,20 +321,119 @@ static void bilateral_##name(BilateralContext *s, const uint8_t *ssrc, uint8_t * ypy = ycy; \ ypf = ycf; \ } \ - \ - for (int i = 0; i < height; i++) \ - for (int j = 0; j < width; j++) \ - dst[j + i * dst_linesize] = lrintf(img_out_f[i * width + j]); \ } -BILATERAL(uint8_t, byte) -BILATERAL(uint16_t, word) +BILATERAL_V(uint8_t, byte) +BILATERAL_V(uint16_t, word) + +#define BILATERAL_O(type, name) \ +static void bilateralo_##name(BilateralContext *s, AVFrame *out, AVFrame *in, \ + int jobnr, int nb_jobs, int plane) \ +{ \ + const int width = s->planewidth[plane]; \ + const int height = s->planeheight[plane]; \ + const int slice_start = (height * jobnr) / nb_jobs; \ + const int slice_end = (height * (jobnr+1)) / nb_jobs; \ + const int dst_linesize = out->linesize[plane] / sizeof(type); \ + \ + for (int i = slice_start; i < slice_end; i++) { \ + type *dst = (type *)out->data[plane] + i * dst_linesize; \ + const float *const img_out_f = s->img_out_f[plane] + i * width; \ + for (int j = 0; j < width; j++) \ + dst[j] = lrintf(img_out_f[j]); \ + } \ +} + +BILATERAL_O(uint8_t, byte) +BILATERAL_O(uint16_t, word) + +static int bilateralh_planes(AVFilterContext *ctx, void *arg, + int jobnr, int nb_jobs) +{ + BilateralContext *s = ctx->priv; + ThreadData *td = arg; + AVFrame *out = td->out; + AVFrame *in = td->in; + + for (int plane = 0; plane < s->nb_planes; plane++) { + if (!(s->planes & (1 << plane))) + continue; + + if (s->depth <= 8) + bilateralh_byte(s, out, in, jobnr, nb_jobs, plane); + else + bilateralh_word(s, out, in, jobnr, nb_jobs, plane); + } + + return 0; +} + +static int bilateralv_planes(AVFilterContext *ctx, void *arg, + int jobnr, int nb_jobs) +{ + BilateralContext *s = ctx->priv; + ThreadData *td = arg; + AVFrame *out = td->out; + AVFrame *in = td->in; + + for (int plane = 0; plane < s->nb_planes; plane++) { + if (!(s->planes & (1 << plane))) + continue; + + if (s->depth <= 8) + bilateralv_byte(s, out, in, jobnr, nb_jobs, plane); + else + bilateralv_word(s, out, in, jobnr, nb_jobs, plane); + } + + return 0; +} + +static int bilateralo_planes(AVFilterContext *ctx, void *arg, + int jobnr, int nb_jobs) +{ + BilateralContext *s = ctx->priv; + ThreadData *td = arg; + AVFrame *out = td->out; + AVFrame *in = td->in; + + for (int plane = 0; plane < s->nb_planes; plane++) { + if (!(s->planes & (1 << plane))) { + if (out != in) { + const int height = s->planeheight[plane]; + const int slice_start = (height * jobnr) / nb_jobs; + const int slice_end = (height * (jobnr+1)) / nb_jobs; + const int width = s->planewidth[plane]; + const int linesize = in->linesize[plane]; + const int dst_linesize = out->linesize[plane]; + const uint8_t *src = in->data[plane]; + uint8_t *dst = out->data[plane]; + + av_image_copy_plane(dst + slice_start * dst_linesize, + dst_linesize, + src + slice_start * linesize, + linesize, + width * ((s->depth + 7) / 8), + slice_end - slice_start); + } + continue; + } + + if (s->depth <= 8) + bilateralo_byte(s, out, in, jobnr, nb_jobs, plane); + else + bilateralo_word(s, out, in, jobnr, nb_jobs, plane); + } + + return 0; +} static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; BilateralContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; + ThreadData td; AVFrame *out; if (av_frame_is_writable(in)) { @@ -308,24 +447,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) av_frame_copy_props(out, in); } - for (int plane = 0; plane < s->nb_planes; plane++) { - if (!(s->planes & (1 << plane))) { - if (out != in) - av_image_copy_plane(out->data[plane], out->linesize[plane], - in->data[plane], in->linesize[plane], - s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]); - continue; - } - - if (s->depth <= 8) - bilateral_byte(s, in->data[plane], out->data[plane], - s->planewidth[plane], s->planeheight[plane], - in->linesize[plane], out->linesize[plane]); - else - bilateral_word(s, in->data[plane], out->data[plane], - s->planewidth[plane], s->planeheight[plane], - in->linesize[plane] / 2, out->linesize[plane] / 2); - } + td.in = in; + td.out = out; + ff_filter_execute(ctx, bilateralh_planes, &td, NULL, s->nb_threads); + ff_filter_execute(ctx, bilateralv_planes, &td, NULL, s->nb_threads); + ff_filter_execute(ctx, bilateralo_planes, &td, NULL, s->nb_threads); if (out != in) av_frame_free(&in); @@ -336,14 +462,16 @@ static av_cold void uninit(AVFilterContext *ctx) { BilateralContext *s = ctx->priv; - av_freep(&s->img_out_f); - av_freep(&s->img_temp); - av_freep(&s->map_factor_a); - av_freep(&s->map_factor_b); - av_freep(&s->slice_factor_a); - av_freep(&s->slice_factor_b); - av_freep(&s->line_factor_a); - av_freep(&s->line_factor_b); + for (int p = 0; p < s->nb_planes; p++) { + av_freep(&s->img_out_f[p]); + av_freep(&s->img_temp[p]); + av_freep(&s->map_factor_a[p]); + av_freep(&s->map_factor_b[p]); + av_freep(&s->slice_factor_a[p]); + av_freep(&s->slice_factor_b[p]); + av_freep(&s->line_factor_a[p]); + av_freep(&s->line_factor_b[p]); + } } static int process_command(AVFilterContext *ctx, @@ -386,6 +514,7 @@ const AVFilter ff_vf_bilateral = { FILTER_INPUTS(bilateral_inputs), FILTER_OUTPUTS(bilateral_outputs), FILTER_PIXFMTS_ARRAY(pix_fmts), - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | + AVFILTER_FLAG_SLICE_THREADS, .process_command = process_command, };