avfilter/scale_cuda: add support for pixel format conversion

This commit is contained in:
Timo Rothenpieler 2021-06-24 01:53:10 +02:00
parent b0e2e938c3
commit 62dc5df941
5 changed files with 1381 additions and 337 deletions

2
configure vendored
View File

@ -6283,6 +6283,8 @@ if [ -z "$nvccflags" ]; then
nvccflags=$nvccflags_default nvccflags=$nvccflags_default
fi fi
nvccflags="$nvccflags -std=c++11"
if enabled x86_64 || enabled ppc64 || enabled aarch64; then if enabled x86_64 || enabled ppc64 || enabled aarch64; then
nvccflags="$nvccflags -m64" nvccflags="$nvccflags -m64"
else else

View File

@ -17832,6 +17832,90 @@ If the specified expression is not valid, it is kept at its current
value. value.
@end table @end table
@section scale_cuda
Scale (resize) and convert (pixel format) the input video, using accelerated CUDA kernels.
Setting the output width and height works in the same way as for the @ref{scale} filter.
The filter accepts the following options:
@table @option
@item w
@item h
Set the output video dimension expression. Default value is the input dimension.
Allows for the same expressions as the @ref{scale} filter.
@item interp_algo
Sets the algorithm used for scaling:
@table @var
@item nearest
Nearest neighbour
Used by default if input parameters match the desired output.
@item bilinear
Bilinear
@item bicubic
Bicubic
This is the default.
@item lanczos
Lanczos
@end table
@item format
Controls the output pixel format. By default, or if none is specified, the input
pixel format is used.
The filter does not support converting between YUV and RGB pixel formats.
@item passthrough
If set to 0, every frame is processed, even if no conversion is neccesary.
This mode can be useful to use the filter as a buffer for a downstream
frame-consumer that exhausts the limited decoder frame pool.
If set to 1, frames are passed through as-is if they match the desired output
parameters. This is the default behaviour.
@item param
Algorithm-Specific parameter.
Affects the curves of the bicubic algorithm.
@item force_original_aspect_ratio
@item force_divisible_by
Work the same as the identical @ref{scale} filter options.
@end table
@subsection Examples
@itemize
@item
Scale input to 720p, keeping aspect ratio and ensuring the output is yuv420p.
@example
scale_cuda=-2:720:format=yuv420p
@end example
@item
Upscale to 4K using nearest neighbour algorithm.
@example
scale_cuda=4096:2160:interp_algo=nearest
@end example
@item
Don't do any conversion or scaling, but copy all input frames into newly allocated ones.
This can be useful to deal with a filter and encode chain that otherwise exhausts the
decoders frame pool.
@example
scale_cuda=passthrough=0
@end example
@end itemize
@section scale_npp @section scale_npp
Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel

View File

@ -31,7 +31,7 @@
#define LIBAVFILTER_VERSION_MAJOR 8 #define LIBAVFILTER_VERSION_MAJOR 8
#define LIBAVFILTER_VERSION_MINOR 0 #define LIBAVFILTER_VERSION_MINOR 0
#define LIBAVFILTER_VERSION_MICRO 102 #define LIBAVFILTER_VERSION_MICRO 103
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

View File

@ -75,8 +75,11 @@ typedef struct CUDAScaleContext {
AVCUDADeviceContext *hwctx; AVCUDADeviceContext *hwctx;
enum AVPixelFormat in_fmt; enum AVPixelFormat in_fmt, out_fmt;
enum AVPixelFormat out_fmt; const AVPixFmtDescriptor *in_desc, *out_desc;
int in_planes, out_planes;
int in_plane_depths[4];
int in_plane_channels[4];
AVBufferRef *frames_ctx; AVBufferRef *frames_ctx;
AVFrame *frame; AVFrame *frame;
@ -97,18 +100,10 @@ typedef struct CUDAScaleContext {
CUcontext cu_ctx; CUcontext cu_ctx;
CUmodule cu_module; CUmodule cu_module;
CUfunction cu_func_uchar; CUfunction cu_func;
CUfunction cu_func_uchar2; CUfunction cu_func_uv;
CUfunction cu_func_uchar4;
CUfunction cu_func_ushort;
CUfunction cu_func_ushort2;
CUfunction cu_func_ushort4;
CUstream cu_stream; CUstream cu_stream;
CUdeviceptr srcBuffer;
CUdeviceptr dstBuffer;
int tex_alignment;
int interp_algo; int interp_algo;
int interp_use_linear; int interp_use_linear;
int interp_as_integer; int interp_as_integer;
@ -120,7 +115,6 @@ static av_cold int cudascale_init(AVFilterContext *ctx)
{ {
CUDAScaleContext *s = ctx->priv; CUDAScaleContext *s = ctx->priv;
s->format = AV_PIX_FMT_NONE;
s->frame = av_frame_alloc(); s->frame = av_frame_alloc();
if (!s->frame) if (!s->frame)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
@ -210,6 +204,32 @@ static int format_is_supported(enum AVPixelFormat fmt)
return 0; return 0;
} }
static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
{
CUDAScaleContext *s = ctx->priv;
int i, p, d;
s->in_fmt = in_format;
s->out_fmt = out_format;
s->in_desc = av_pix_fmt_desc_get(s->in_fmt);
s->out_desc = av_pix_fmt_desc_get(s->out_fmt);
s->in_planes = av_pix_fmt_count_planes(s->in_fmt);
s->out_planes = av_pix_fmt_count_planes(s->out_fmt);
// find maximum step of each component of each plane
// For our subset of formats, this should accurately tell us how many channels CUDA needs
// i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
for (i = 0; i < s->in_desc->nb_components; i++) {
d = (s->in_desc->comp[i].depth + 7) / 8;
p = s->in_desc->comp[i].plane;
s->in_plane_channels[p] = FFMAX(s->in_plane_channels[p], s->in_desc->comp[i].step / d);
s->in_plane_depths[p] = s->in_desc->comp[i].depth;
}
}
static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
int out_width, int out_height) int out_width, int out_height)
{ {
@ -241,8 +261,7 @@ static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int
return AVERROR(ENOSYS); return AVERROR(ENOSYS);
} }
s->in_fmt = in_format; set_format_info(ctx, in_format, out_format);
s->out_fmt = out_format;
if (s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) { if (s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
s->frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx); s->frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx);
@ -254,6 +273,10 @@ static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int
ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, out_width, out_height); ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, out_width, out_height);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (in_width == out_width && in_height == out_height &&
in_format == out_format && s->interp_algo == INTERP_ALGO_DEFAULT)
s->interp_algo = INTERP_ALGO_NEAREST;
} }
ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
@ -263,19 +286,17 @@ static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int
return 0; return 0;
} }
static av_cold int cudascale_config_props(AVFilterLink *outlink) static av_cold int cudascale_load_functions(AVFilterContext *ctx)
{ {
AVFilterContext *ctx = outlink->src; CUDAScaleContext *s = ctx->priv;
AVFilterLink *inlink = outlink->src->inputs[0]; CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
CUDAScaleContext *s = ctx->priv; CudaFunctions *cu = s->hwctx->internal->cuda_dl;
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; char buf[128];
AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
CudaFunctions *cu = device_hwctx->internal->cuda_dl;
char buf[64];
int w, h;
int ret; int ret;
const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
const char *function_infix = ""; const char *function_infix = "";
extern const unsigned char ff_vf_scale_cuda_ptx_data[]; extern const unsigned char ff_vf_scale_cuda_ptx_data[];
@ -283,23 +304,23 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
switch(s->interp_algo) { switch(s->interp_algo) {
case INTERP_ALGO_NEAREST: case INTERP_ALGO_NEAREST:
function_infix = "_Nearest"; function_infix = "Nearest";
s->interp_use_linear = 0; s->interp_use_linear = 0;
s->interp_as_integer = 1; s->interp_as_integer = 1;
break; break;
case INTERP_ALGO_BILINEAR: case INTERP_ALGO_BILINEAR:
function_infix = "_Bilinear"; function_infix = "Bilinear";
s->interp_use_linear = 1; s->interp_use_linear = 1;
s->interp_as_integer = 1; s->interp_as_integer = 1;
break; break;
case INTERP_ALGO_DEFAULT: case INTERP_ALGO_DEFAULT:
case INTERP_ALGO_BICUBIC: case INTERP_ALGO_BICUBIC:
function_infix = "_Bicubic"; function_infix = "Bicubic";
s->interp_use_linear = 0; s->interp_use_linear = 0;
s->interp_as_integer = 0; s->interp_as_integer = 0;
break; break;
case INTERP_ALGO_LANCZOS: case INTERP_ALGO_LANCZOS:
function_infix = "_Lanczos"; function_infix = "Lanczos";
s->interp_use_linear = 0; s->interp_use_linear = 0;
s->interp_as_integer = 0; s->interp_as_integer = 0;
break; break;
@ -308,51 +329,47 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
return AVERROR_BUG; return AVERROR_BUG;
} }
s->hwctx = device_hwctx;
s->cu_stream = s->hwctx->stream;
ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
if (ret < 0) if (ret < 0)
goto fail; return ret;
ret = ff_cuda_load_module(ctx, device_hwctx, &s->cu_module, ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module,
ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len); ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len);
if (ret < 0) if (ret < 0)
goto fail; goto fail;
snprintf(buf, sizeof(buf), "Subsample%s_uchar", function_infix); snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s", function_infix, in_fmt_name, out_fmt_name);
CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, buf)); ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, buf));
if (ret < 0) {
av_log(ctx, AV_LOG_FATAL, "Unsupported conversion: %s -> %s\n", in_fmt_name, out_fmt_name);
ret = AVERROR(ENOSYS);
goto fail;
}
snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
if (ret < 0) if (ret < 0)
goto fail; goto fail;
snprintf(buf, sizeof(buf), "Subsample%s_uchar2", function_infix); fail:
CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, buf));
if (ret < 0)
goto fail;
snprintf(buf, sizeof(buf), "Subsample%s_uchar4", function_infix);
CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, buf));
if (ret < 0)
goto fail;
snprintf(buf, sizeof(buf), "Subsample%s_ushort", function_infix);
CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, buf));
if (ret < 0)
goto fail;
snprintf(buf, sizeof(buf), "Subsample%s_ushort2", function_infix);
CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, buf));
if (ret < 0)
goto fail;
snprintf(buf, sizeof(buf), "Subsample%s_ushort4", function_infix);
CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, buf));
if (ret < 0)
goto fail;
CHECK_CU(cu->cuCtxPopCurrent(&dummy)); CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return ret;
}
static av_cold int cudascale_config_props(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
AVFilterLink *inlink = outlink->src->inputs[0];
CUDAScaleContext *s = ctx->priv;
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
int w, h;
int ret;
s->hwctx = device_hwctx;
s->cu_stream = s->hwctx->stream;
if ((ret = ff_scale_eval_dimensions(s, if ((ret = ff_scale_eval_dimensions(s,
s->w_expr, s->h_expr, s->w_expr, s->h_expr,
inlink, outlink, inlink, outlink,
@ -373,9 +390,6 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
if (ret < 0) if (ret < 0)
return ret; return ret;
av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n",
inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : "");
if (inlink->sample_aspect_ratio.num) { if (inlink->sample_aspect_ratio.num) {
outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
outlink->w*inlink->h}, outlink->w*inlink->h},
@ -384,154 +398,118 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
} }
av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d fmt:%s -> w:%d h:%d fmt:%s%s\n",
inlink->w, inlink->h, av_get_pix_fmt_name(s->in_fmt),
outlink->w, outlink->h, av_get_pix_fmt_name(s->out_fmt),
s->passthrough ? " (passthrough)" : "");
ret = cudascale_load_functions(ctx);
if (ret < 0)
return ret;
return 0; return 0;
fail: fail:
return ret; return ret;
} }
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
uint8_t *src_dptr, int src_width, int src_height, int src_pitch, CUtexObject src_tex[4], int src_width, int src_height,
uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
int pixel_size, int bit_depth)
{ {
CUDAScaleContext *s = ctx->priv; CUDAScaleContext *s = ctx->priv;
CudaFunctions *cu = s->hwctx->internal->cuda_dl; CudaFunctions *cu = s->hwctx->internal->cuda_dl;
CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
CUtexObject tex = 0;
void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
&src_width, &src_height, &bit_depth, &s->param };
int ret;
CUDA_TEXTURE_DESC tex_desc = { CUdeviceptr dst_devptr[4] = {
.filterMode = s->interp_use_linear ? (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
CU_TR_FILTER_MODE_LINEAR : (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
CU_TR_FILTER_MODE_POINT,
.flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
}; };
CUDA_RESOURCE_DESC res_desc = { void *args_uchar[] = {
.resType = CU_RESOURCE_TYPE_PITCH2D, &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
.res.pitch2D.format = pixel_size == 1 ? &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
CU_AD_FORMAT_UNSIGNED_INT8 : &dst_width, &dst_height, &dst_pitch,
CU_AD_FORMAT_UNSIGNED_INT16, &src_width, &src_height, &s->param
.res.pitch2D.numChannels = channels,
.res.pitch2D.width = src_width,
.res.pitch2D.height = src_height,
.res.pitch2D.pitchInBytes = src_pitch,
.res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
}; };
// Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch return CHECK_CU(cu->cuLaunchKernel(func,
// Same for pixel_size, which is represented via datatypes on the cuda side of things. DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
dst_pitch /= channels * pixel_size; BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
if (ret < 0)
goto exit;
ret = CHECK_CU(cu->cuLaunchKernel(func,
DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
exit:
if (tex)
CHECK_CU(cu->cuTexObjectDestroy(tex));
return ret;
} }
static int scalecuda_resize(AVFilterContext *ctx, static int scalecuda_resize(AVFilterContext *ctx,
AVFrame *out, AVFrame *in) AVFrame *out, AVFrame *in)
{ {
AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
CUDAScaleContext *s = ctx->priv; CUDAScaleContext *s = ctx->priv;
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
int i, ret;
switch (in_frames_ctx->sw_format) { CUtexObject tex[4] = { 0, 0, 0, 0 };
case AV_PIX_FMT_YUV420P:
call_resize_kernel(ctx, s->cu_func_uchar, 1, ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
in->data[0], in->width, in->height, in->linesize[0], if (ret < 0)
out->data[0], out->width, out->height, out->linesize[0], return ret;
1, 8);
call_resize_kernel(ctx, s->cu_func_uchar, 1, for (i = 0; i < s->in_planes; i++) {
in->data[1], in->width / 2, in->height / 2, in->linesize[1], CUDA_TEXTURE_DESC tex_desc = {
out->data[1], out->width / 2, out->height / 2, out->linesize[1], .filterMode = s->interp_use_linear ?
1, 8); CU_TR_FILTER_MODE_LINEAR :
call_resize_kernel(ctx, s->cu_func_uchar, 1, CU_TR_FILTER_MODE_POINT,
in->data[2], in->width / 2, in->height / 2, in->linesize[2], .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
out->data[2], out->width / 2, out->height / 2, out->linesize[2], };
1, 8);
break; CUDA_RESOURCE_DESC res_desc = {
case AV_PIX_FMT_YUV444P: .resType = CU_RESOURCE_TYPE_PITCH2D,
call_resize_kernel(ctx, s->cu_func_uchar, 1, .res.pitch2D.format = s->in_plane_depths[i] <= 8 ?
in->data[0], in->width, in->height, in->linesize[0], CU_AD_FORMAT_UNSIGNED_INT8 :
out->data[0], out->width, out->height, out->linesize[0], CU_AD_FORMAT_UNSIGNED_INT16,
1, 8); .res.pitch2D.numChannels = s->in_plane_channels[i],
call_resize_kernel(ctx, s->cu_func_uchar, 1, .res.pitch2D.pitchInBytes = in->linesize[i],
in->data[1], in->width, in->height, in->linesize[1], .res.pitch2D.devPtr = (CUdeviceptr)in->data[i],
out->data[1], out->width, out->height, out->linesize[1], };
1, 8);
call_resize_kernel(ctx, s->cu_func_uchar, 1, if (i == 1 || i == 2) {
in->data[2], in->width, in->height, in->linesize[2], res_desc.res.pitch2D.width = AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w);
out->data[2], out->width, out->height, out->linesize[2], res_desc.res.pitch2D.height = AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h);
1, 8); } else {
break; res_desc.res.pitch2D.width = in->width;
case AV_PIX_FMT_YUV444P16: res_desc.res.pitch2D.height = in->height;
call_resize_kernel(ctx, s->cu_func_ushort, 1, }
in->data[0], in->width, in->height, in->linesize[0],
out->data[0], out->width, out->height, out->linesize[0], ret = CHECK_CU(cu->cuTexObjectCreate(&tex[i], &res_desc, &tex_desc, NULL));
2, 16); if (ret < 0)
call_resize_kernel(ctx, s->cu_func_ushort, 1, goto exit;
in->data[1], in->width, in->height, in->linesize[1],
out->data[1], out->width, out->height, out->linesize[1],
2, 16);
call_resize_kernel(ctx, s->cu_func_ushort, 1,
in->data[2], in->width, in->height, in->linesize[2],
out->data[2], out->width, out->height, out->linesize[2],
2, 16);
break;
case AV_PIX_FMT_NV12:
call_resize_kernel(ctx, s->cu_func_uchar, 1,
in->data[0], in->width, in->height, in->linesize[0],
out->data[0], out->width, out->height, out->linesize[0],
1, 8);
call_resize_kernel(ctx, s->cu_func_uchar2, 2,
in->data[1], in->width / 2, in->height / 2, in->linesize[1],
out->data[1], out->width / 2, out->height / 2, out->linesize[1],
1, 8);
break;
case AV_PIX_FMT_P010LE:
call_resize_kernel(ctx, s->cu_func_ushort, 1,
in->data[0], in->width, in->height, in->linesize[0],
out->data[0], out->width, out->height, out->linesize[0],
2, 10);
call_resize_kernel(ctx, s->cu_func_ushort2, 2,
in->data[1], in->width / 2, in->height / 2, in->linesize[1],
out->data[1], out->width / 2, out->height / 2, out->linesize[1],
2, 10);
break;
case AV_PIX_FMT_P016LE:
call_resize_kernel(ctx, s->cu_func_ushort, 1,
in->data[0], in->width, in->height, in->linesize[0],
out->data[0], out->width, out->height, out->linesize[0],
2, 16);
call_resize_kernel(ctx, s->cu_func_ushort2, 2,
in->data[1], in->width / 2, in->height / 2, in->linesize[1],
out->data[1], out->width / 2, out->height / 2, out->linesize[1],
2, 16);
break;
case AV_PIX_FMT_0RGB32:
case AV_PIX_FMT_0BGR32:
call_resize_kernel(ctx, s->cu_func_uchar4, 4,
in->data[0], in->width, in->height, in->linesize[0],
out->data[0], out->width, out->height, out->linesize[0],
1, 8);
break;
default:
return AVERROR_BUG;
} }
return 0; // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
ret = call_resize_kernel(ctx, s->cu_func,
tex, in->width, in->height,
out, out->width, out->height, out->linesize[0]);
if (ret < 0)
goto exit;
if (s->out_planes > 1) {
// scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
ret = call_resize_kernel(ctx, s->cu_func_uv, tex,
AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w),
AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h),
out,
AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
out->linesize[1]);
if (ret < 0)
goto exit;
}
exit:
for (i = 0; i < s->in_planes; i++)
if (tex[i])
CHECK_CU(cu->cuTexObjectDestroy(tex[i]));
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return ret;
} }
static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in) static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
@ -625,6 +603,7 @@ static const AVOption options[] = {
{ "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" }, { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
{ "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" }, { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" },
{ "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" }, { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" },
{ "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags=FLAGS },
{ "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
{ "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS }, { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
{ "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" }, { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },

File diff suppressed because it is too large Load Diff