avfilter/vf_colorchannelmixer: add slice threading

Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
Paul B Mahol 2018-05-05 16:09:29 +02:00
parent 15a2e35e9e
commit d1e1872418

View File

@ -30,6 +30,10 @@
#define B 2
#define A 3
typedef struct ThreadData {
AVFrame *in, *out;
} ThreadData;
typedef struct ColorChannelMixerContext {
const AVClass *class;
double rr, rg, rb, ra;
@ -42,6 +46,8 @@ typedef struct ColorChannelMixerContext {
int *buffer;
uint8_t rgba_map[4];
int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
} ColorChannelMixerContext;
#define OFFSET(x) offsetof(ColorChannelMixerContext, x)
@ -87,6 +93,237 @@ static int query_formats(AVFilterContext *ctx)
return ff_set_common_formats(ctx, fmts_list);
}
static int filter_slice_rgba64(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ColorChannelMixerContext *s = ctx->priv;
ThreadData *td = arg;
AVFrame *in = td->in;
AVFrame *out = td->out;
const int slice_start = (out->height * jobnr) / nb_jobs;
const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
const uint8_t roffset = s->rgba_map[R];
const uint8_t goffset = s->rgba_map[G];
const uint8_t boffset = s->rgba_map[B];
const uint8_t aoffset = s->rgba_map[A];
const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];
int i, j;
for (i = slice_start; i < slice_end; i++) {
const uint16_t *src = (const uint16_t *)srcrow;
uint16_t *dst = (uint16_t *)dstrow;
for (j = 0; j < out->width * 4; j += 4) {
const uint16_t rin = src[j + roffset];
const uint16_t gin = src[j + goffset];
const uint16_t bin = src[j + boffset];
const uint16_t ain = src[j + aoffset];
dst[j + roffset] = av_clip_uint16(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin] +
s->lut[R][A][ain]);
dst[j + goffset] = av_clip_uint16(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin] +
s->lut[G][A][ain]);
dst[j + boffset] = av_clip_uint16(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin] +
s->lut[B][A][ain]);
dst[j + aoffset] = av_clip_uint16(s->lut[A][R][rin] +
s->lut[A][G][gin] +
s->lut[A][B][bin] +
s->lut[A][A][ain]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
return 0;
}
static int filter_slice_rgb48(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ColorChannelMixerContext *s = ctx->priv;
ThreadData *td = arg;
AVFrame *in = td->in;
AVFrame *out = td->out;
const int slice_start = (out->height * jobnr) / nb_jobs;
const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
const uint8_t roffset = s->rgba_map[R];
const uint8_t goffset = s->rgba_map[G];
const uint8_t boffset = s->rgba_map[B];
const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];
int i, j;
for (i = slice_start; i < slice_end; i++) {
const uint16_t *src = (const uint16_t *)srcrow;
uint16_t *dst = (uint16_t *)dstrow;
for (j = 0; j < out->width * 3; j += 3) {
const uint16_t rin = src[j + roffset];
const uint16_t gin = src[j + goffset];
const uint16_t bin = src[j + boffset];
dst[j + roffset] = av_clip_uint16(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin]);
dst[j + goffset] = av_clip_uint16(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin]);
dst[j + boffset] = av_clip_uint16(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
return 0;
}
static int filter_slice_rgba(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ColorChannelMixerContext *s = ctx->priv;
ThreadData *td = arg;
AVFrame *in = td->in;
AVFrame *out = td->out;
const int slice_start = (out->height * jobnr) / nb_jobs;
const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
const uint8_t roffset = s->rgba_map[R];
const uint8_t goffset = s->rgba_map[G];
const uint8_t boffset = s->rgba_map[B];
const uint8_t aoffset = s->rgba_map[A];
const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];
int i, j;
for (i = slice_start; i < slice_end; i++) {
const uint8_t *src = srcrow;
uint8_t *dst = dstrow;
for (j = 0; j < out->width * 4; j += 4) {
const uint8_t rin = src[j + roffset];
const uint8_t gin = src[j + goffset];
const uint8_t bin = src[j + boffset];
const uint8_t ain = src[j + aoffset];
dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin] +
s->lut[R][A][ain]);
dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin] +
s->lut[G][A][ain]);
dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin] +
s->lut[B][A][ain]);
dst[j + aoffset] = av_clip_uint8(s->lut[A][R][rin] +
s->lut[A][G][gin] +
s->lut[A][B][bin] +
s->lut[A][A][ain]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
return 0;
}
static int filter_slice_rgb24(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ColorChannelMixerContext *s = ctx->priv;
ThreadData *td = arg;
AVFrame *in = td->in;
AVFrame *out = td->out;
const int slice_start = (out->height * jobnr) / nb_jobs;
const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
const uint8_t roffset = s->rgba_map[R];
const uint8_t goffset = s->rgba_map[G];
const uint8_t boffset = s->rgba_map[B];
const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];
int i, j;
for (i = slice_start; i < slice_end; i++) {
const uint8_t *src = srcrow;
uint8_t *dst = dstrow;
for (j = 0; j < out->width * 3; j += 3) {
const uint8_t rin = src[j + roffset];
const uint8_t gin = src[j + goffset];
const uint8_t bin = src[j + boffset];
dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin]);
dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin]);
dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
return 0;
}
static int filter_slice_rgb0(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ColorChannelMixerContext *s = ctx->priv;
ThreadData *td = arg;
AVFrame *in = td->in;
AVFrame *out = td->out;
const int slice_start = (out->height * jobnr) / nb_jobs;
const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
const uint8_t roffset = s->rgba_map[R];
const uint8_t goffset = s->rgba_map[G];
const uint8_t boffset = s->rgba_map[B];
const uint8_t aoffset = s->rgba_map[A];
const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];
int i, j;
for (i = slice_start; i < slice_end; i++) {
const uint8_t *src = srcrow;
uint8_t *dst = dstrow;
for (j = 0; j < out->width * 4; j += 4) {
const uint8_t rin = src[j + roffset];
const uint8_t gin = src[j + goffset];
const uint8_t bin = src[j + boffset];
dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin]);
dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin]);
dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin]);
if (in != out)
dst[j + aoffset] = 0;
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
return 0;
}
static int config_output(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
@ -136,6 +373,33 @@ static int config_output(AVFilterLink *outlink)
s->lut[A][A][i] = lrint(i * s->aa);
}
switch (outlink->format) {
case AV_PIX_FMT_BGR24:
case AV_PIX_FMT_RGB24:
s->filter_slice = filter_slice_rgb24;
break;
case AV_PIX_FMT_0BGR:
case AV_PIX_FMT_0RGB:
case AV_PIX_FMT_BGR0:
case AV_PIX_FMT_RGB0:
s->filter_slice = filter_slice_rgb0;
break;
case AV_PIX_FMT_ABGR:
case AV_PIX_FMT_ARGB:
case AV_PIX_FMT_BGRA:
case AV_PIX_FMT_RGBA:
s->filter_slice = filter_slice_rgba;
break;
case AV_PIX_FMT_BGR48:
case AV_PIX_FMT_RGB48:
s->filter_slice = filter_slice_rgb48;
break;
case AV_PIX_FMT_BGRA64:
case AV_PIX_FMT_RGBA64:
s->filter_slice = filter_slice_rgba64;
break;
}
return 0;
}
@ -144,14 +408,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
AVFilterContext *ctx = inlink->dst;
ColorChannelMixerContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
const uint8_t roffset = s->rgba_map[R];
const uint8_t goffset = s->rgba_map[G];
const uint8_t boffset = s->rgba_map[B];
const uint8_t aoffset = s->rgba_map[A];
const uint8_t *srcrow = in->data[0];
uint8_t *dstrow;
ThreadData td;
AVFrame *out;
int i, j;
if (av_frame_is_writable(in)) {
out = in;
@ -164,164 +422,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
av_frame_copy_props(out, in);
}
dstrow = out->data[0];
switch (outlink->format) {
case AV_PIX_FMT_BGR24:
case AV_PIX_FMT_RGB24:
for (i = 0; i < outlink->h; i++) {
const uint8_t *src = srcrow;
uint8_t *dst = dstrow;
for (j = 0; j < outlink->w * 3; j += 3) {
const uint8_t rin = src[j + roffset];
const uint8_t gin = src[j + goffset];
const uint8_t bin = src[j + boffset];
dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin]);
dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin]);
dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
break;
case AV_PIX_FMT_0BGR:
case AV_PIX_FMT_0RGB:
case AV_PIX_FMT_BGR0:
case AV_PIX_FMT_RGB0:
for (i = 0; i < outlink->h; i++) {
const uint8_t *src = srcrow;
uint8_t *dst = dstrow;
for (j = 0; j < outlink->w * 4; j += 4) {
const uint8_t rin = src[j + roffset];
const uint8_t gin = src[j + goffset];
const uint8_t bin = src[j + boffset];
dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin]);
dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin]);
dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin]);
if (in != out)
dst[j + aoffset] = 0;
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
break;
case AV_PIX_FMT_ABGR:
case AV_PIX_FMT_ARGB:
case AV_PIX_FMT_BGRA:
case AV_PIX_FMT_RGBA:
for (i = 0; i < outlink->h; i++) {
const uint8_t *src = srcrow;
uint8_t *dst = dstrow;
for (j = 0; j < outlink->w * 4; j += 4) {
const uint8_t rin = src[j + roffset];
const uint8_t gin = src[j + goffset];
const uint8_t bin = src[j + boffset];
const uint8_t ain = src[j + aoffset];
dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin] +
s->lut[R][A][ain]);
dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin] +
s->lut[G][A][ain]);
dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin] +
s->lut[B][A][ain]);
dst[j + aoffset] = av_clip_uint8(s->lut[A][R][rin] +
s->lut[A][G][gin] +
s->lut[A][B][bin] +
s->lut[A][A][ain]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
break;
case AV_PIX_FMT_BGR48:
case AV_PIX_FMT_RGB48:
for (i = 0; i < outlink->h; i++) {
const uint16_t *src = (const uint16_t *)srcrow;
uint16_t *dst = (uint16_t *)dstrow;
for (j = 0; j < outlink->w * 3; j += 3) {
const uint16_t rin = src[j + roffset];
const uint16_t gin = src[j + goffset];
const uint16_t bin = src[j + boffset];
dst[j + roffset] = av_clip_uint16(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin]);
dst[j + goffset] = av_clip_uint16(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin]);
dst[j + boffset] = av_clip_uint16(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
break;
case AV_PIX_FMT_BGRA64:
case AV_PIX_FMT_RGBA64:
for (i = 0; i < outlink->h; i++) {
const uint16_t *src = (const uint16_t *)srcrow;
uint16_t *dst = (uint16_t *)dstrow;
for (j = 0; j < outlink->w * 4; j += 4) {
const uint16_t rin = src[j + roffset];
const uint16_t gin = src[j + goffset];
const uint16_t bin = src[j + boffset];
const uint16_t ain = src[j + aoffset];
dst[j + roffset] = av_clip_uint16(s->lut[R][R][rin] +
s->lut[R][G][gin] +
s->lut[R][B][bin] +
s->lut[R][A][ain]);
dst[j + goffset] = av_clip_uint16(s->lut[G][R][rin] +
s->lut[G][G][gin] +
s->lut[G][B][bin] +
s->lut[G][A][ain]);
dst[j + boffset] = av_clip_uint16(s->lut[B][R][rin] +
s->lut[B][G][gin] +
s->lut[B][B][bin] +
s->lut[B][A][ain]);
dst[j + aoffset] = av_clip_uint16(s->lut[A][R][rin] +
s->lut[A][G][gin] +
s->lut[A][B][bin] +
s->lut[A][A][ain]);
}
srcrow += in->linesize[0];
dstrow += out->linesize[0];
}
}
td.in = in;
td.out = out;
ctx->internal->execute(ctx, s->filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
if (in != out)
av_frame_free(&in);
return ff_filter_frame(ctx->outputs[0], out);
return ff_filter_frame(outlink, out);
}
static av_cold void uninit(AVFilterContext *ctx)
@ -358,5 +465,5 @@ AVFilter ff_vf_colorchannelmixer = {
.query_formats = query_formats,
.inputs = colorchannelmixer_inputs,
.outputs = colorchannelmixer_outputs,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
};