2017-05-10 19:18:16 +02:00
/*
* Copyright ( c ) 2017 , NVIDIA CORPORATION . All rights reserved .
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
* LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING
* FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE .
*/
2020-11-04 18:10:19 +01:00
# include <float.h>
2017-05-10 19:18:16 +02:00
# include <stdio.h>
# include <string.h>
# include "libavutil/avstring.h"
# include "libavutil/common.h"
# include "libavutil/hwcontext.h"
2019-02-21 04:57:51 +01:00
# include "libavutil/hwcontext_cuda_internal.h"
2018-11-11 07:47:28 +01:00
# include "libavutil/cuda_check.h"
2017-05-10 19:18:16 +02:00
# include "libavutil/internal.h"
# include "libavutil/opt.h"
# include "libavutil/pixdesc.h"
# include "avfilter.h"
# include "formats.h"
# include "internal.h"
2019-12-08 12:12:36 +01:00
# include "scale_eval.h"
2017-05-10 19:18:16 +02:00
# include "video.h"
2020-11-04 18:10:19 +01:00
# include "vf_scale_cuda.h"
2017-05-10 19:18:16 +02:00
static const enum AVPixelFormat supported_formats [ ] = {
AV_PIX_FMT_YUV420P ,
AV_PIX_FMT_NV12 ,
AV_PIX_FMT_YUV444P ,
AV_PIX_FMT_P010 ,
2019-05-14 05:00:12 +02:00
AV_PIX_FMT_P016 ,
AV_PIX_FMT_YUV444P16 ,
2017-05-10 19:18:16 +02:00
} ;
# define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
# define BLOCKX 32
# define BLOCKY 16
2019-02-21 04:57:51 +01:00
# define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
2018-11-11 07:47:28 +01:00
2020-10-31 20:22:33 +01:00
enum {
INTERP_ALGO_DEFAULT ,
2020-11-03 19:28:06 +01:00
INTERP_ALGO_NEAREST ,
2020-10-31 20:22:33 +01:00
INTERP_ALGO_BILINEAR ,
INTERP_ALGO_BICUBIC ,
2020-11-04 01:43:00 +01:00
INTERP_ALGO_LANCZOS ,
2020-10-31 20:22:33 +01:00
INTERP_ALGO_COUNT
} ;
2017-05-10 19:18:16 +02:00
typedef struct CUDAScaleContext {
const AVClass * class ;
2019-02-21 04:57:51 +01:00
AVCUDADeviceContext * hwctx ;
2017-05-10 19:18:16 +02:00
enum AVPixelFormat in_fmt ;
enum AVPixelFormat out_fmt ;
AVBufferRef * frames_ctx ;
AVFrame * frame ;
AVFrame * tmp_frame ;
int passthrough ;
/**
* Output sw format . AV_PIX_FMT_NONE for no conversion .
*/
enum AVPixelFormat format ;
char * w_expr ; ///< width expression string
char * h_expr ; ///< height expression string
2019-12-02 16:41:21 +01:00
int force_original_aspect_ratio ;
int force_divisible_by ;
2017-05-10 19:18:16 +02:00
CUcontext cu_ctx ;
CUmodule cu_module ;
CUfunction cu_func_uchar ;
CUfunction cu_func_uchar2 ;
CUfunction cu_func_uchar4 ;
CUfunction cu_func_ushort ;
CUfunction cu_func_ushort2 ;
CUfunction cu_func_ushort4 ;
2019-02-21 04:57:51 +01:00
CUstream cu_stream ;
2017-05-10 19:18:16 +02:00
CUdeviceptr srcBuffer ;
CUdeviceptr dstBuffer ;
int tex_alignment ;
2020-10-31 20:22:33 +01:00
int interp_algo ;
int interp_use_linear ;
2020-11-03 18:33:55 +01:00
int interp_as_integer ;
2020-11-04 18:10:19 +01:00
float param ;
2017-05-10 19:18:16 +02:00
} CUDAScaleContext ;
static av_cold int cudascale_init ( AVFilterContext * ctx )
{
CUDAScaleContext * s = ctx - > priv ;
s - > format = AV_PIX_FMT_NONE ;
s - > frame = av_frame_alloc ( ) ;
if ( ! s - > frame )
return AVERROR ( ENOMEM ) ;
s - > tmp_frame = av_frame_alloc ( ) ;
if ( ! s - > tmp_frame )
return AVERROR ( ENOMEM ) ;
return 0 ;
}
static av_cold void cudascale_uninit ( AVFilterContext * ctx )
{
CUDAScaleContext * s = ctx - > priv ;
2020-10-12 13:59:39 +02:00
if ( s - > hwctx & & s - > cu_module ) {
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
CUcontext dummy ;
CHECK_CU ( cu - > cuCtxPushCurrent ( s - > hwctx - > cuda_ctx ) ) ;
CHECK_CU ( cu - > cuModuleUnload ( s - > cu_module ) ) ;
s - > cu_module = NULL ;
CHECK_CU ( cu - > cuCtxPopCurrent ( & dummy ) ) ;
}
2017-05-10 19:18:16 +02:00
av_frame_free ( & s - > frame ) ;
av_buffer_unref ( & s - > frames_ctx ) ;
av_frame_free ( & s - > tmp_frame ) ;
}
static int cudascale_query_formats ( AVFilterContext * ctx )
{
static const enum AVPixelFormat pixel_formats [ ] = {
AV_PIX_FMT_CUDA , AV_PIX_FMT_NONE ,
} ;
AVFilterFormats * pix_fmts = ff_make_format_list ( pixel_formats ) ;
2020-11-03 18:33:55 +01:00
if ( ! pix_fmts )
return AVERROR ( ENOMEM ) ;
2017-05-10 19:18:16 +02:00
return ff_set_common_formats ( ctx , pix_fmts ) ;
}
2020-11-03 18:33:55 +01:00
static av_cold int init_hwframe_ctx ( CUDAScaleContext * s , AVBufferRef * device_ctx , int width , int height )
2017-05-10 19:18:16 +02:00
{
AVBufferRef * out_ref = NULL ;
AVHWFramesContext * out_ctx ;
2020-11-03 18:33:55 +01:00
int ret ;
2017-05-10 19:18:16 +02:00
out_ref = av_hwframe_ctx_alloc ( device_ctx ) ;
if ( ! out_ref )
return AVERROR ( ENOMEM ) ;
out_ctx = ( AVHWFramesContext * ) out_ref - > data ;
out_ctx - > format = AV_PIX_FMT_CUDA ;
out_ctx - > sw_format = s - > out_fmt ;
2020-11-03 18:33:55 +01:00
out_ctx - > width = FFALIGN ( width , 32 ) ;
out_ctx - > height = FFALIGN ( height , 32 ) ;
2017-05-10 19:18:16 +02:00
ret = av_hwframe_ctx_init ( out_ref ) ;
if ( ret < 0 )
goto fail ;
av_frame_unref ( s - > frame ) ;
ret = av_hwframe_get_buffer ( out_ref , s - > frame , 0 ) ;
if ( ret < 0 )
goto fail ;
2020-11-03 18:33:55 +01:00
s - > frame - > width = width ;
s - > frame - > height = height ;
2017-05-10 19:18:16 +02:00
av_buffer_unref ( & s - > frames_ctx ) ;
s - > frames_ctx = out_ref ;
return 0 ;
fail :
av_buffer_unref ( & out_ref ) ;
return ret ;
}
static int format_is_supported ( enum AVPixelFormat fmt )
{
int i ;
for ( i = 0 ; i < FF_ARRAY_ELEMS ( supported_formats ) ; i + + )
if ( supported_formats [ i ] = = fmt )
return 1 ;
return 0 ;
}
static av_cold int init_processing_chain ( AVFilterContext * ctx , int in_width , int in_height ,
int out_width , int out_height )
{
CUDAScaleContext * s = ctx - > priv ;
AVHWFramesContext * in_frames_ctx ;
enum AVPixelFormat in_format ;
enum AVPixelFormat out_format ;
int ret ;
/* check that we have a hw context */
if ( ! ctx - > inputs [ 0 ] - > hw_frames_ctx ) {
av_log ( ctx , AV_LOG_ERROR , " No hw context provided on input \n " ) ;
return AVERROR ( EINVAL ) ;
}
in_frames_ctx = ( AVHWFramesContext * ) ctx - > inputs [ 0 ] - > hw_frames_ctx - > data ;
in_format = in_frames_ctx - > sw_format ;
out_format = ( s - > format = = AV_PIX_FMT_NONE ) ? in_format : s - > format ;
if ( ! format_is_supported ( in_format ) ) {
av_log ( ctx , AV_LOG_ERROR , " Unsupported input format: %s \n " ,
av_get_pix_fmt_name ( in_format ) ) ;
return AVERROR ( ENOSYS ) ;
}
if ( ! format_is_supported ( out_format ) ) {
av_log ( ctx , AV_LOG_ERROR , " Unsupported output format: %s \n " ,
av_get_pix_fmt_name ( out_format ) ) ;
return AVERROR ( ENOSYS ) ;
}
s - > in_fmt = in_format ;
s - > out_fmt = out_format ;
2020-11-03 18:33:55 +01:00
if ( s - > passthrough & & in_width = = out_width & & in_height = = out_height & & in_format = = out_format ) {
s - > frames_ctx = av_buffer_ref ( ctx - > inputs [ 0 ] - > hw_frames_ctx ) ;
if ( ! s - > frames_ctx )
return AVERROR ( ENOMEM ) ;
} else {
s - > passthrough = 0 ;
2017-05-10 19:18:16 +02:00
2020-11-03 18:33:55 +01:00
ret = init_hwframe_ctx ( s , in_frames_ctx - > device_ref , out_width , out_height ) ;
if ( ret < 0 )
return ret ;
}
2017-05-10 19:18:16 +02:00
ctx - > outputs [ 0 ] - > hw_frames_ctx = av_buffer_ref ( s - > frames_ctx ) ;
if ( ! ctx - > outputs [ 0 ] - > hw_frames_ctx )
return AVERROR ( ENOMEM ) ;
return 0 ;
}
static av_cold int cudascale_config_props ( AVFilterLink * outlink )
{
AVFilterContext * ctx = outlink - > src ;
AVFilterLink * inlink = outlink - > src - > inputs [ 0 ] ;
CUDAScaleContext * s = ctx - > priv ;
AVHWFramesContext * frames_ctx = ( AVHWFramesContext * ) inlink - > hw_frames_ctx - > data ;
AVCUDADeviceContext * device_hwctx = frames_ctx - > device_ctx - > hwctx ;
CUcontext dummy , cuda_ctx = device_hwctx - > cuda_ctx ;
2019-02-21 04:57:51 +01:00
CudaFunctions * cu = device_hwctx - > internal - > cuda_dl ;
2020-10-31 20:22:33 +01:00
char buf [ 64 ] ;
2017-05-10 19:18:16 +02:00
int w , h ;
int ret ;
2020-10-31 20:22:33 +01:00
char * scaler_ptx ;
const char * function_infix = " " ;
2017-05-10 19:18:16 +02:00
extern char vf_scale_cuda_ptx [ ] ;
2020-10-31 20:22:33 +01:00
extern char vf_scale_cuda_bicubic_ptx [ ] ;
switch ( s - > interp_algo ) {
2020-11-03 19:28:06 +01:00
case INTERP_ALGO_NEAREST :
scaler_ptx = vf_scale_cuda_ptx ;
function_infix = " _Nearest " ;
s - > interp_use_linear = 0 ;
s - > interp_as_integer = 1 ;
break ;
2020-10-31 20:22:33 +01:00
case INTERP_ALGO_BILINEAR :
scaler_ptx = vf_scale_cuda_ptx ;
function_infix = " _Bilinear " ;
s - > interp_use_linear = 1 ;
2020-11-03 18:33:55 +01:00
s - > interp_as_integer = 1 ;
2020-10-31 20:22:33 +01:00
break ;
case INTERP_ALGO_DEFAULT :
case INTERP_ALGO_BICUBIC :
scaler_ptx = vf_scale_cuda_bicubic_ptx ;
function_infix = " _Bicubic " ;
s - > interp_use_linear = 0 ;
2020-11-03 18:33:55 +01:00
s - > interp_as_integer = 0 ;
2020-10-31 20:22:33 +01:00
break ;
2020-11-04 01:43:00 +01:00
case INTERP_ALGO_LANCZOS :
scaler_ptx = vf_scale_cuda_bicubic_ptx ;
function_infix = " _Lanczos " ;
s - > interp_use_linear = 0 ;
s - > interp_as_integer = 0 ;
break ;
2020-10-31 20:22:33 +01:00
default :
av_log ( ctx , AV_LOG_ERROR , " Unknown interpolation algorithm \n " ) ;
return AVERROR_BUG ;
}
2017-05-10 19:18:16 +02:00
2019-02-21 04:57:51 +01:00
s - > hwctx = device_hwctx ;
s - > cu_stream = s - > hwctx - > stream ;
ret = CHECK_CU ( cu - > cuCtxPushCurrent ( cuda_ctx ) ) ;
if ( ret < 0 )
goto fail ;
2020-10-31 20:22:33 +01:00
ret = CHECK_CU ( cu - > cuModuleLoadData ( & s - > cu_module , scaler_ptx ) ) ;
2019-02-21 04:57:51 +01:00
if ( ret < 0 )
goto fail ;
2020-10-31 20:22:33 +01:00
snprintf ( buf , sizeof ( buf ) , " Subsample%s_uchar " , function_infix ) ;
CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_uchar , s - > cu_module , buf ) ) ;
2019-02-21 04:57:51 +01:00
if ( ret < 0 )
goto fail ;
2020-10-31 20:22:33 +01:00
snprintf ( buf , sizeof ( buf ) , " Subsample%s_uchar2 " , function_infix ) ;
CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_uchar2 , s - > cu_module , buf ) ) ;
2019-02-21 04:57:51 +01:00
if ( ret < 0 )
goto fail ;
2020-10-31 20:22:33 +01:00
snprintf ( buf , sizeof ( buf ) , " Subsample%s_uchar4 " , function_infix ) ;
CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_uchar4 , s - > cu_module , buf ) ) ;
2019-02-21 04:57:51 +01:00
if ( ret < 0 )
goto fail ;
2020-10-31 20:22:33 +01:00
snprintf ( buf , sizeof ( buf ) , " Subsample%s_ushort " , function_infix ) ;
CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_ushort , s - > cu_module , buf ) ) ;
2018-11-11 07:47:28 +01:00
if ( ret < 0 )
2017-05-10 19:18:16 +02:00
goto fail ;
2020-10-31 20:22:33 +01:00
snprintf ( buf , sizeof ( buf ) , " Subsample%s_ushort2 " , function_infix ) ;
CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_ushort2 , s - > cu_module , buf ) ) ;
2019-02-21 04:57:51 +01:00
if ( ret < 0 )
goto fail ;
2020-10-31 20:22:33 +01:00
snprintf ( buf , sizeof ( buf ) , " Subsample%s_ushort4 " , function_infix ) ;
CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_ushort4 , s - > cu_module , buf ) ) ;
2018-11-11 07:47:28 +01:00
if ( ret < 0 )
2017-05-10 19:18:16 +02:00
goto fail ;
2019-02-21 04:57:51 +01:00
CHECK_CU ( cu - > cuCtxPopCurrent ( & dummy ) ) ;
2017-05-10 19:18:16 +02:00
if ( ( ret = ff_scale_eval_dimensions ( s ,
s - > w_expr , s - > h_expr ,
inlink , outlink ,
& w , & h ) ) < 0 )
goto fail ;
2019-12-02 16:41:21 +01:00
ff_scale_adjust_dimensions ( inlink , & w , & h ,
s - > force_original_aspect_ratio , s - > force_divisible_by ) ;
2017-05-10 19:18:16 +02:00
if ( ( ( int64_t ) h * inlink - > w ) > INT_MAX | |
( ( int64_t ) w * inlink - > h ) > INT_MAX )
av_log ( ctx , AV_LOG_ERROR , " Rescaled value for width or height is too big. \n " ) ;
outlink - > w = w ;
outlink - > h = h ;
ret = init_processing_chain ( ctx , inlink - > w , inlink - > h , w , h ) ;
if ( ret < 0 )
return ret ;
2020-11-03 18:33:55 +01:00
av_log ( ctx , AV_LOG_VERBOSE , " w:%d h:%d -> w:%d h:%d%s \n " ,
inlink - > w , inlink - > h , outlink - > w , outlink - > h , s - > passthrough ? " (passthrough) " : " " ) ;
2017-05-10 19:18:16 +02:00
if ( inlink - > sample_aspect_ratio . num ) {
outlink - > sample_aspect_ratio = av_mul_q ( ( AVRational ) { outlink - > h * inlink - > w ,
outlink - > w * inlink - > h } ,
inlink - > sample_aspect_ratio ) ;
} else {
outlink - > sample_aspect_ratio = inlink - > sample_aspect_ratio ;
}
return 0 ;
fail :
return ret ;
}
2019-02-21 04:57:51 +01:00
static int call_resize_kernel ( AVFilterContext * ctx , CUfunction func , int channels ,
2017-05-10 19:18:16 +02:00
uint8_t * src_dptr , int src_width , int src_height , int src_pitch ,
uint8_t * dst_dptr , int dst_width , int dst_height , int dst_pitch ,
2020-10-31 20:22:33 +01:00
int pixel_size , int bit_depth )
2017-05-10 19:18:16 +02:00
{
2019-02-21 04:57:51 +01:00
CUDAScaleContext * s = ctx - > priv ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
2017-05-10 19:18:16 +02:00
CUdeviceptr dst_devptr = ( CUdeviceptr ) dst_dptr ;
2019-02-21 04:57:51 +01:00
CUtexObject tex = 0 ;
2020-11-04 18:10:19 +01:00
void * args_uchar [ ] = { & tex , & dst_devptr , & dst_width , & dst_height , & dst_pitch ,
& src_width , & src_height , & bit_depth , & s - > param } ;
2019-02-21 04:57:51 +01:00
int ret ;
CUDA_TEXTURE_DESC tex_desc = {
2020-10-31 20:22:33 +01:00
. filterMode = s - > interp_use_linear ?
CU_TR_FILTER_MODE_LINEAR :
CU_TR_FILTER_MODE_POINT ,
2020-11-03 18:33:55 +01:00
. flags = s - > interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0 ,
2019-02-21 04:57:51 +01:00
} ;
2017-05-10 19:18:16 +02:00
2019-02-21 04:57:51 +01:00
CUDA_RESOURCE_DESC res_desc = {
. resType = CU_RESOURCE_TYPE_PITCH2D ,
. res . pitch2D . format = pixel_size = = 1 ?
CU_AD_FORMAT_UNSIGNED_INT8 :
CU_AD_FORMAT_UNSIGNED_INT16 ,
. res . pitch2D . numChannels = channels ,
. res . pitch2D . width = src_width ,
. res . pitch2D . height = src_height ,
2019-05-14 04:15:41 +02:00
. res . pitch2D . pitchInBytes = src_pitch * pixel_size ,
2019-02-21 04:57:51 +01:00
. res . pitch2D . devPtr = ( CUdeviceptr ) src_dptr ,
} ;
2017-05-10 19:18:16 +02:00
2019-02-21 04:57:51 +01:00
ret = CHECK_CU ( cu - > cuTexObjectCreate ( & tex , & res_desc , & tex_desc , NULL ) ) ;
if ( ret < 0 )
goto exit ;
ret = CHECK_CU ( cu - > cuLaunchKernel ( func ,
DIV_UP ( dst_width , BLOCKX ) , DIV_UP ( dst_height , BLOCKY ) , 1 ,
BLOCKX , BLOCKY , 1 , 0 , s - > cu_stream , args_uchar , NULL ) ) ;
exit :
if ( tex )
CHECK_CU ( cu - > cuTexObjectDestroy ( tex ) ) ;
return ret ;
2017-05-10 19:18:16 +02:00
}
static int scalecuda_resize ( AVFilterContext * ctx ,
AVFrame * out , AVFrame * in )
{
AVHWFramesContext * in_frames_ctx = ( AVHWFramesContext * ) in - > hw_frames_ctx - > data ;
CUDAScaleContext * s = ctx - > priv ;
switch ( in_frames_ctx - > sw_format ) {
case AV_PIX_FMT_YUV420P :
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar , 1 ,
2017-05-10 19:18:16 +02:00
in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] ,
out - > data [ 0 ] , out - > width , out - > height , out - > linesize [ 0 ] ,
2020-10-31 20:22:33 +01:00
1 , 8 ) ;
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar , 1 ,
2020-11-03 18:33:55 +01:00
in - > data [ 1 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 1 ] ,
out - > data [ 1 ] , out - > width / 2 , out - > height / 2 , out - > linesize [ 1 ] ,
2020-10-31 20:22:33 +01:00
1 , 8 ) ;
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar , 1 ,
2020-11-03 18:33:55 +01:00
in - > data [ 2 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 2 ] ,
out - > data [ 2 ] , out - > width / 2 , out - > height / 2 , out - > linesize [ 2 ] ,
2020-10-31 20:22:33 +01:00
1 , 8 ) ;
2017-05-10 19:18:16 +02:00
break ;
case AV_PIX_FMT_YUV444P :
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar , 1 ,
2017-05-10 19:18:16 +02:00
in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] ,
out - > data [ 0 ] , out - > width , out - > height , out - > linesize [ 0 ] ,
2020-10-31 20:22:33 +01:00
1 , 8 ) ;
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar , 1 ,
2020-11-03 18:33:55 +01:00
in - > data [ 1 ] , in - > width , in - > height , in - > linesize [ 1 ] ,
out - > data [ 1 ] , out - > width , out - > height , out - > linesize [ 1 ] ,
2020-10-31 20:22:33 +01:00
1 , 8 ) ;
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar , 1 ,
2020-11-03 18:33:55 +01:00
in - > data [ 2 ] , in - > width , in - > height , in - > linesize [ 2 ] ,
out - > data [ 2 ] , out - > width , out - > height , out - > linesize [ 2 ] ,
2020-10-31 20:22:33 +01:00
1 , 8 ) ;
2017-05-10 19:18:16 +02:00
break ;
2019-05-14 05:00:12 +02:00
case AV_PIX_FMT_YUV444P16 :
call_resize_kernel ( ctx , s - > cu_func_ushort , 1 ,
in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] / 2 ,
out - > data [ 0 ] , out - > width , out - > height , out - > linesize [ 0 ] / 2 ,
2020-10-31 20:22:33 +01:00
2 , 16 ) ;
2019-05-14 05:00:12 +02:00
call_resize_kernel ( ctx , s - > cu_func_ushort , 1 ,
in - > data [ 1 ] , in - > width , in - > height , in - > linesize [ 1 ] / 2 ,
out - > data [ 1 ] , out - > width , out - > height , out - > linesize [ 1 ] / 2 ,
2020-10-31 20:22:33 +01:00
2 , 16 ) ;
2019-05-14 05:00:12 +02:00
call_resize_kernel ( ctx , s - > cu_func_ushort , 1 ,
in - > data [ 2 ] , in - > width , in - > height , in - > linesize [ 2 ] / 2 ,
out - > data [ 2 ] , out - > width , out - > height , out - > linesize [ 2 ] / 2 ,
2020-10-31 20:22:33 +01:00
2 , 16 ) ;
2019-05-14 05:00:12 +02:00
break ;
2017-05-10 19:18:16 +02:00
case AV_PIX_FMT_NV12 :
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar , 1 ,
2017-05-10 19:18:16 +02:00
in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] ,
out - > data [ 0 ] , out - > width , out - > height , out - > linesize [ 0 ] ,
2020-10-31 20:22:33 +01:00
1 , 8 ) ;
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_uchar2 , 2 ,
2020-10-31 20:22:33 +01:00
in - > data [ 1 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 1 ] ,
out - > data [ 1 ] , out - > width / 2 , out - > height / 2 , out - > linesize [ 1 ] / 2 ,
1 , 8 ) ;
2017-05-10 19:18:16 +02:00
break ;
case AV_PIX_FMT_P010LE :
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_ushort , 1 ,
2020-10-31 20:22:33 +01:00
in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] / 2 ,
out - > data [ 0 ] , out - > width , out - > height , out - > linesize [ 0 ] / 2 ,
2 , 10 ) ;
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_ushort2 , 2 ,
2020-10-31 20:22:33 +01:00
in - > data [ 1 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 1 ] / 2 ,
2019-05-14 05:07:42 +02:00
out - > data [ 1 ] , out - > width / 2 , out - > height / 2 , out - > linesize [ 1 ] / 4 ,
2020-10-31 20:22:33 +01:00
2 , 10 ) ;
2017-05-10 19:18:16 +02:00
break ;
case AV_PIX_FMT_P016LE :
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_ushort , 1 ,
2017-05-10 19:18:16 +02:00
in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] / 2 ,
out - > data [ 0 ] , out - > width , out - > height , out - > linesize [ 0 ] / 2 ,
2020-10-31 20:22:33 +01:00
2 , 16 ) ;
2019-02-21 04:57:51 +01:00
call_resize_kernel ( ctx , s - > cu_func_ushort2 , 2 ,
2017-05-10 19:18:16 +02:00
in - > data [ 1 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 1 ] / 2 ,
2019-05-14 05:07:42 +02:00
out - > data [ 1 ] , out - > width / 2 , out - > height / 2 , out - > linesize [ 1 ] / 4 ,
2020-10-31 20:22:33 +01:00
2 , 16 ) ;
2017-05-10 19:18:16 +02:00
break ;
default :
return AVERROR_BUG ;
}
return 0 ;
}
static int cudascale_scale ( AVFilterContext * ctx , AVFrame * out , AVFrame * in )
{
CUDAScaleContext * s = ctx - > priv ;
2020-11-03 18:33:55 +01:00
AVFilterLink * outlink = ctx - > outputs [ 0 ] ;
2017-05-10 19:18:16 +02:00
AVFrame * src = in ;
int ret ;
ret = scalecuda_resize ( ctx , s - > frame , src ) ;
if ( ret < 0 )
return ret ;
src = s - > frame ;
ret = av_hwframe_get_buffer ( src - > hw_frames_ctx , s - > tmp_frame , 0 ) ;
if ( ret < 0 )
return ret ;
av_frame_move_ref ( out , s - > frame ) ;
av_frame_move_ref ( s - > frame , s - > tmp_frame ) ;
2020-11-03 18:33:55 +01:00
s - > frame - > width = outlink - > w ;
s - > frame - > height = outlink - > h ;
2019-05-24 18:39:47 +02:00
2017-05-10 19:18:16 +02:00
ret = av_frame_copy_props ( out , in ) ;
if ( ret < 0 )
return ret ;
return 0 ;
}
static int cudascale_filter_frame ( AVFilterLink * link , AVFrame * in )
{
2019-02-21 04:57:51 +01:00
AVFilterContext * ctx = link - > dst ;
CUDAScaleContext * s = ctx - > priv ;
AVFilterLink * outlink = ctx - > outputs [ 0 ] ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
2017-05-10 19:18:16 +02:00
AVFrame * out = NULL ;
CUcontext dummy ;
int ret = 0 ;
2020-11-03 18:33:55 +01:00
if ( s - > passthrough )
return ff_filter_frame ( outlink , in ) ;
2017-05-10 19:18:16 +02:00
out = av_frame_alloc ( ) ;
if ( ! out ) {
ret = AVERROR ( ENOMEM ) ;
goto fail ;
}
2019-02-21 04:57:51 +01:00
ret = CHECK_CU ( cu - > cuCtxPushCurrent ( s - > hwctx - > cuda_ctx ) ) ;
2018-11-11 07:47:28 +01:00
if ( ret < 0 )
2017-05-10 19:18:16 +02:00
goto fail ;
ret = cudascale_scale ( ctx , out , in ) ;
2019-02-21 04:57:51 +01:00
CHECK_CU ( cu - > cuCtxPopCurrent ( & dummy ) ) ;
2017-05-10 19:18:16 +02:00
if ( ret < 0 )
goto fail ;
av_reduce ( & out - > sample_aspect_ratio . num , & out - > sample_aspect_ratio . den ,
( int64_t ) in - > sample_aspect_ratio . num * outlink - > h * link - > w ,
( int64_t ) in - > sample_aspect_ratio . den * outlink - > w * link - > h ,
INT_MAX ) ;
av_frame_free ( & in ) ;
return ff_filter_frame ( outlink , out ) ;
fail :
av_frame_free ( & in ) ;
av_frame_free ( & out ) ;
return ret ;
}
2020-11-03 18:33:55 +01:00
static AVFrame * cudascale_get_video_buffer ( AVFilterLink * inlink , int w , int h )
{
CUDAScaleContext * s = inlink - > dst - > priv ;
return s - > passthrough ?
ff_null_get_video_buffer ( inlink , w , h ) :
ff_default_get_video_buffer ( inlink , w , h ) ;
}
2017-05-10 19:18:16 +02:00
# define OFFSET(x) offsetof(CUDAScaleContext, x)
# define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
static const AVOption options [ ] = {
2020-11-04 18:10:19 +01:00
{ " w " , " Output video width " , OFFSET ( w_expr ) , AV_OPT_TYPE_STRING , { . str = " iw " } , . flags = FLAGS } ,
{ " h " , " Output video height " , OFFSET ( h_expr ) , AV_OPT_TYPE_STRING , { . str = " ih " } , . flags = FLAGS } ,
2020-10-31 20:22:33 +01:00
{ " interp_algo " , " Interpolation algorithm used for resizing " , OFFSET ( interp_algo ) , AV_OPT_TYPE_INT , { . i64 = INTERP_ALGO_DEFAULT } , 0 , INTERP_ALGO_COUNT - 1 , FLAGS , " interp_algo " } ,
2020-11-03 19:28:06 +01:00
{ " nearest " , " nearest neighbour " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_NEAREST } , 0 , 0 , FLAGS , " interp_algo " } ,
2020-11-03 18:33:55 +01:00
{ " bilinear " , " bilinear " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_BILINEAR } , 0 , 0 , FLAGS , " interp_algo " } ,
{ " bicubic " , " bicubic " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_BICUBIC } , 0 , 0 , FLAGS , " interp_algo " } ,
2020-11-04 01:43:00 +01:00
{ " lanczos " , " lanczos " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_LANCZOS } , 0 , 0 , FLAGS , " interp_algo " } ,
2020-11-03 18:33:55 +01:00
{ " passthrough " , " Do not process frames at all if parameters match " , OFFSET ( passthrough ) , AV_OPT_TYPE_BOOL , { . i64 = 1 } , 0 , 1 , FLAGS } ,
2020-11-04 18:10:19 +01:00
{ " param " , " Algorithm-Specific parameter " , OFFSET ( param ) , AV_OPT_TYPE_FLOAT , { . dbl = SCALE_CUDA_PARAM_DEFAULT } , - FLT_MAX , FLT_MAX , FLAGS } ,
2020-11-03 18:33:55 +01:00
{ " force_original_aspect_ratio " , " decrease or increase w/h if necessary to keep the original AR " , OFFSET ( force_original_aspect_ratio ) , AV_OPT_TYPE_INT , { . i64 = 0 } , 0 , 2 , FLAGS , " force_oar " } ,
{ " disable " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , FLAGS , " force_oar " } ,
{ " decrease " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , FLAGS , " force_oar " } ,
{ " increase " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 2 } , 0 , 0 , FLAGS , " force_oar " } ,
2020-11-04 18:10:19 +01:00
{ " force_divisible_by " , " enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used " , OFFSET ( force_divisible_by ) , AV_OPT_TYPE_INT , { . i64 = 1 } , 1 , 256 , FLAGS } ,
2017-05-10 19:18:16 +02:00
{ NULL } ,
} ;
static const AVClass cudascale_class = {
. class_name = " cudascale " ,
. item_name = av_default_item_name ,
. option = options ,
. version = LIBAVUTIL_VERSION_INT ,
} ;
static const AVFilterPad cudascale_inputs [ ] = {
{
. name = " default " ,
. type = AVMEDIA_TYPE_VIDEO ,
. filter_frame = cudascale_filter_frame ,
2020-11-03 18:33:55 +01:00
. get_video_buffer = cudascale_get_video_buffer ,
2017-05-10 19:18:16 +02:00
} ,
{ NULL }
} ;
static const AVFilterPad cudascale_outputs [ ] = {
{
. name = " default " ,
. type = AVMEDIA_TYPE_VIDEO ,
. config_props = cudascale_config_props ,
} ,
{ NULL }
} ;
AVFilter ff_vf_scale_cuda = {
. name = " scale_cuda " ,
. description = NULL_IF_CONFIG_SMALL ( " GPU accelerated video resizer " ) ,
. init = cudascale_init ,
. uninit = cudascale_uninit ,
. query_formats = cudascale_query_formats ,
. priv_size = sizeof ( CUDAScaleContext ) ,
. priv_class = & cudascale_class ,
. inputs = cudascale_inputs ,
. outputs = cudascale_outputs ,
. flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE ,
} ;