lavfi/dnn: refine dnn interface to add DNNExecBaseParams

Different function type of model requires different parameters, for example, object detection detects lots of objects (cat/dog/...) in the frame, and classifcation needs to know which object (cat or dog) it is going to classify. The current interface needs to add a new function with more parameters to support new requirement, with this change, we can just add a new struct (for example DNNExecClassifyParams) based on DNNExecBaseParams, and so we can continue to use the current interface execute_model just with params changed.
2021-04-01 10:06:06 +08:00 · 2021-04-01 10:06:06 +08:00 · a3b74651a0
commit a3b74651a0
parent 7eb9accc37
11 changed files with 139 additions and 73 deletions
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@ -2,6 +2,7 @@ OBJS-$(CONFIG_DNN)                           += dnn/dnn_interface.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_io_proc.o
 OBJS-$(CONFIG_DNN)                           += dnn/queue.o
 OBJS-$(CONFIG_DNN)                           += dnn/safe_queue.o
+OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_common.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layers.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_avgpool.o
--- a/libavfilter/dnn/dnn_backend_common.c
+++ b/libavfilter/dnn/dnn_backend_common.c
@ -0,0 +1,51 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN common functions different backends.
+ */
+
+#include "dnn_backend_common.h"
+
+int ff_check_exec_params(void *ctx, DNNBackendType backend, DNNFunctionType func_type, DNNExecBaseParams *exec_params)
+{
+    if (!exec_params) {
+        av_log(ctx, AV_LOG_ERROR, "exec_params is null when execute model.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!exec_params->in_frame) {
+        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!exec_params->out_frame) {
+        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (exec_params->nb_output != 1 && backend != DNN_TF) {
+        // currently, the filter does not need multiple outputs,
+        // so we just pending the support until we really need it.
+        avpriv_report_missing_feature(ctx, "multiple outputs");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
--- a/libavfilter/dnn/dnn_backend_common.h
+++ b/libavfilter/dnn/dnn_backend_common.h
@ -0,0 +1,31 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN common functions different backends.
+ */
+
+#ifndef AVFILTER_DNN_DNN_BACKEND_COMMON_H
+#define AVFILTER_DNN_DNN_BACKEND_COMMON_H
+
+#include "../dnn_interface.h"
+
+int ff_check_exec_params(void *ctx, DNNBackendType backend, DNNFunctionType func_type, DNNExecBaseParams *exec_params);
+
+#endif
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@ -28,6 +28,7 @@
 #include "dnn_backend_native_layer_conv2d.h"
 #include "dnn_backend_native_layers.h"
 #include "dnn_io_proc.h"
+#include "dnn_backend_common.h"

 #define OFFSET(x) offsetof(NativeContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
@ -372,23 +373,17 @@ static DNNReturnType execute_model_native(const DNNModel *model, const char *inp
    return DNN_SUCCESS;
 }

-DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                          const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
    NativeModel *native_model = model->model;
    NativeContext *ctx = &native_model->ctx;

-    if (!in_frame) {
-        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
+    if (ff_check_exec_params(ctx, DNN_NATIVE, model->func_type, exec_params) != 0) {
        return DNN_ERROR;
    }

-    if (!out_frame) {
-        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
-        return DNN_ERROR;
-    }
-
-    return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
+    return execute_model_native(model, exec_params->input_name, exec_params->in_frame,
+                                exec_params->output_names, exec_params->nb_output, exec_params->out_frame, 1);
 }

 int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd)
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@ -130,8 +130,7 @@ typedef struct NativeModel{

 DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);

-DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                          const char **output_names, uint32_t nb_output, AVFrame *out_frame);
+DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params);

 void ff_dnn_free_model_native(DNNModel **model);

--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@ -33,6 +33,7 @@
 #include "queue.h"
 #include "safe_queue.h"
 #include <c_api/ie_c_api.h>
+#include "dnn_backend_common.h"

 typedef struct OVOptions{
    char *device_type;
@ -678,28 +679,14 @@ err:
    return NULL;
 }

-DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
    OVModel *ov_model = model->model;
    OVContext *ctx = &ov_model->ctx;
    TaskItem task;
    RequestItem *request;

-    if (!in_frame) {
-        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
-        return DNN_ERROR;
-    }
-
-    if (!out_frame && model->func_type == DFT_PROCESS_FRAME) {
-        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
-        return DNN_ERROR;
-    }
-
-    if (nb_output != 1) {
-        // currently, the filter does not need multiple outputs,
-        // so we just pending the support until we really need it.
-        avpriv_report_missing_feature(ctx, "multiple outputs");
+    if (ff_check_exec_params(ctx, DNN_OV, model->func_type, exec_params) != 0) {
        return DNN_ERROR;
    }

@ -709,7 +696,7 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
    }

    if (!ov_model->exe_network) {
-        if (init_model_ov(ov_model, input_name, output_names[0]) != DNN_SUCCESS) {
+        if (init_model_ov(ov_model, exec_params->input_name, exec_params->output_names[0]) != DNN_SUCCESS) {
            av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n");
            return DNN_ERROR;
        }
@ -717,10 +704,10 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n

    task.do_ioproc = 1;
    task.async = 0;
-    task.input_name = input_name;
-    task.in_frame = in_frame;
-    task.output_name = output_names[0];
-    task.out_frame = out_frame;
+    task.input_name = exec_params->input_name;
+    task.in_frame = exec_params->in_frame;
+    task.output_name = exec_params->output_names[0];
+    task.out_frame = exec_params->out_frame ? exec_params->out_frame : exec_params->in_frame;
    task.ov_model = ov_model;

    if (extract_inference_from_task(ov_model->model->func_type, &task, ov_model->inference_queue) != DNN_SUCCESS) {
@ -737,26 +724,19 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
    return execute_model_ov(request, ov_model->inference_queue);
 }

-DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                            const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
    OVModel *ov_model = model->model;
    OVContext *ctx = &ov_model->ctx;
    RequestItem *request;
    TaskItem *task;

-    if (!in_frame) {
-        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when async execute model.\n");
-        return DNN_ERROR;
-    }
-
-    if (!out_frame && model->func_type == DFT_PROCESS_FRAME) {
-        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when async execute model.\n");
+    if (ff_check_exec_params(ctx, DNN_OV, model->func_type, exec_params) != 0) {
        return DNN_ERROR;
    }

    if (!ov_model->exe_network) {
-        if (init_model_ov(ov_model, input_name, output_names[0]) != DNN_SUCCESS) {
+        if (init_model_ov(ov_model, exec_params->input_name, exec_params->output_names[0]) != DNN_SUCCESS) {
            av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n");
            return DNN_ERROR;
        }
@ -770,10 +750,10 @@ DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, const char *i

    task->do_ioproc = 1;
    task->async = 1;
-    task->input_name = input_name;
-    task->in_frame = in_frame;
-    task->output_name = output_names[0];
-    task->out_frame = out_frame;
+    task->input_name = exec_params->input_name;
+    task->in_frame = exec_params->in_frame;
+    task->output_name = exec_params->output_names[0];
+    task->out_frame = exec_params->out_frame ? exec_params->out_frame : exec_params->in_frame;
    task->ov_model = ov_model;
    if (ff_queue_push_back(ov_model->task_queue, task) < 0) {
        av_freep(&task);
--- a/libavfilter/dnn/dnn_backend_openvino.h
+++ b/libavfilter/dnn/dnn_backend_openvino.h
@ -31,10 +31,8 @@

 DNNModel *ff_dnn_load_model_ov(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);

-DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame);
-DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                            const char **output_names, uint32_t nb_output, AVFrame *out_frame);
+DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_params);
+DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, DNNExecBaseParams *exec_params);
 DNNAsyncStatusType ff_dnn_get_async_result_ov(const DNNModel *model, AVFrame **in, AVFrame **out);
 DNNReturnType ff_dnn_flush_ov(const DNNModel *model);

--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@ -34,7 +34,7 @@
 #include "dnn_backend_native_layer_pad.h"
 #include "dnn_backend_native_layer_maximum.h"
 #include "dnn_io_proc.h"
-
+#include "dnn_backend_common.h"
 #include <tensorflow/c/c_api.h>

 typedef struct TFOptions{
@ -814,23 +814,17 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n
    return DNN_SUCCESS;
 }

-DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
    TFModel *tf_model = model->model;
    TFContext *ctx = &tf_model->ctx;

-    if (!in_frame) {
-        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
-        return DNN_ERROR;
+    if (ff_check_exec_params(ctx, DNN_TF, model->func_type, exec_params) != 0) {
+         return DNN_ERROR;
    }

-    if (!out_frame) {
-        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
-        return DNN_ERROR;
-    }
-
-    return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
+    return execute_model_tf(model, exec_params->input_name, exec_params->in_frame,
+                            exec_params->output_names, exec_params->nb_output, exec_params->out_frame, 1);
 }

 void ff_dnn_free_model_tf(DNNModel **model)
--- a/libavfilter/dnn/dnn_backend_tf.h
+++ b/libavfilter/dnn/dnn_backend_tf.h
@ -31,8 +31,7 @@

 DNNModel *ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);

-DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame);
+DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_params);

 void ff_dnn_free_model_tf(DNNModel **model);

--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@ -90,14 +90,26 @@ DNNReturnType ff_dnn_get_output(DnnContext *ctx, int input_width, int input_heig

 DNNReturnType ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)
 {
-    return (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, in_frame,
-                                            (const char **)&ctx->model_outputname, 1, out_frame);
+    DNNExecBaseParams exec_params = {
+        .input_name     = ctx->model_inputname,
+        .output_names   = (const char **)&ctx->model_outputname,
+        .nb_output      = 1,
+        .in_frame       = in_frame,
+        .out_frame      = out_frame,
+    };
+    return (ctx->dnn_module->execute_model)(ctx->model, &exec_params);
 }

 DNNReturnType ff_dnn_execute_model_async(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)
 {
-    return (ctx->dnn_module->execute_model_async)(ctx->model, ctx->model_inputname, in_frame,
-                                                  (const char **)&ctx->model_outputname, 1, out_frame);
+    DNNExecBaseParams exec_params = {
+        .input_name     = ctx->model_inputname,
+        .output_names   = (const char **)&ctx->model_outputname,
+        .nb_output      = 1,
+        .in_frame       = in_frame,
+        .out_frame      = out_frame,
+    };
+    return (ctx->dnn_module->execute_model_async)(ctx->model, &exec_params);
 }

 DNNAsyncStatusType ff_dnn_get_async_result(DnnContext *ctx, AVFrame **in_frame, AVFrame **out_frame)
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@ -63,6 +63,14 @@ typedef struct DNNData{
    DNNColorOrder order;
 } DNNData;

+typedef struct DNNExecBaseParams {
+    const char *input_name;
+    const char **output_names;
+    uint32_t nb_output;
+    AVFrame *in_frame;
+    AVFrame *out_frame;
+} DNNExecBaseParams;
+
 typedef int (*FramePrePostProc)(AVFrame *frame, DNNData *model, AVFilterContext *filter_ctx);
 typedef int (*DetectPostProc)(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx);

@ -96,11 +104,9 @@ typedef struct DNNModule{
    // Loads model and parameters from given file. Returns NULL if it is not possible.
    DNNModel *(*load_model)(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
    // Executes model with specified input and output. Returns DNN_ERROR otherwise.
-    DNNReturnType (*execute_model)(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                   const char **output_names, uint32_t nb_output, AVFrame *out_frame);
+    DNNReturnType (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params);
    // Executes model with specified input and output asynchronously. Returns DNN_ERROR otherwise.
-    DNNReturnType (*execute_model_async)(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                         const char **output_names, uint32_t nb_output, AVFrame *out_frame);
+    DNNReturnType (*execute_model_async)(const DNNModel *model, DNNExecBaseParams *exec_params);
    // Retrieve inference result.
    DNNAsyncStatusType (*get_async_result)(const DNNModel *model, AVFrame **in, AVFrame **out);
    // Flush all the pending tasks.