/* * Copyright (c) Lynne * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "vulkan_filter.h" #include "libavutil/vulkan_loader.h" int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, AVBufferRef *frames_ref, int width, int height, enum AVPixelFormat sw_format) { int err; AVHWFramesContext *frames_ctx; AVHWDeviceContext *device_ctx; AVVulkanFramesContext *vk_frames; AVVulkanDeviceContext *vk_dev; AVBufferRef *device_ref = avctx->hw_device_ctx; /* Check if context is reusable as-is */ if (frames_ref) { int no_storage = 0; FFVulkanFunctions *vk; const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); frames_ctx = (AVHWFramesContext *)frames_ref->data; device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; vk_frames = frames_ctx->hwctx; vk_dev = device_ctx->hwctx; /* Width and height mismatch */ if (width != frames_ctx->width || height != frames_ctx->height) goto skip; /* Format mismatch */ if (sw_format != frames_ctx->sw_format) goto skip; /* Unusual tiling mismatch. Don't let linear through either. */ if (vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) goto skip; /* Usage mismatch */ if ((vk_frames->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) != (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) goto skip; s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, vk_dev->nb_enabled_dev_extensions); err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); if (err < 0) return err; vk = &s->vkfn; /* Check if the subformats can do storage */ for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { VkFormatProperties2 prop = { .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, }; vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], &prop); if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) { no_storage |= !(prop.formatProperties.linearTilingFeatures & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); } else { no_storage |= !(prop.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); } } /* Check if it's usable */ if (no_storage) { skip: device_ref = frames_ctx->device_ref; frames_ref = NULL; } else { frames_ref = av_buffer_ref(frames_ref); if (!frames_ref) return AVERROR(ENOMEM); } } if (!frames_ref) { if (!device_ref) { av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a device context!\n"); return AVERROR(EINVAL); } frames_ref = av_hwframe_ctx_alloc(device_ref); frames_ctx = (AVHWFramesContext *)frames_ref->data; frames_ctx->format = AV_PIX_FMT_VULKAN; frames_ctx->sw_format = sw_format; frames_ctx->width = width; frames_ctx->height = height; vk_frames = frames_ctx->hwctx; vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; err = av_hwframe_ctx_init(frames_ref); if (err < 0) { av_buffer_unref(&frames_ref); return err; } device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; vk_dev = device_ctx->hwctx; } s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, vk_dev->nb_enabled_dev_extensions); /** * libplacebo does not use descriptor buffers. */ if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && strcmp(avctx->filter->name, "libplacebo")) { av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that " "the %s extension is supported!\n", VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME); av_buffer_unref(&frames_ref); return AVERROR(EINVAL); } err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); if (err < 0) { av_buffer_unref(&frames_ref); return err; } s->frames_ref = frames_ref; s->frames = frames_ctx; s->hwfc = vk_frames; s->device = device_ctx; s->hwctx = device_ctx->hwctx; err = ff_vk_load_props(s); if (err < 0) av_buffer_unref(&s->frames_ref); return err; } int ff_vk_filter_config_input(AVFilterLink *inlink) { AVHWFramesContext *input_frames; AVFilterContext *avctx = inlink->dst; FFVulkanContext *s = inlink->dst->priv; if (!inlink->hw_frames_ctx) { av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " "hardware frames context on the input.\n"); return AVERROR(EINVAL); } input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; if (input_frames->format != AV_PIX_FMT_VULKAN) return AVERROR(EINVAL); /* Extract the device and default output format from the first input. */ if (avctx->inputs[0] != inlink) return 0; /* Save the ref, without reffing it */ s->input_frames_ref = inlink->hw_frames_ctx; /* Defaults */ s->input_format = input_frames->sw_format; s->output_format = input_frames->sw_format; s->output_width = inlink->w; s->output_height = inlink->h; return 0; } int ff_vk_filter_config_output(AVFilterLink *outlink) { int err; FFVulkanContext *s = outlink->src->priv; av_buffer_unref(&outlink->hw_frames_ctx); err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref, s->output_width, s->output_height, s->output_format); if (err < 0) return err; outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); if (!outlink->hw_frames_ctx) return AVERROR(ENOMEM); outlink->w = s->output_width; outlink->h = s->output_height; return err; } int ff_vk_filter_init(AVFilterContext *avctx) { FFVulkanContext *s = avctx->priv; s->output_format = AV_PIX_FMT_NONE; return 0; } int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, VkSampler sampler, void *push_src, size_t push_size) { int err = 0; FFVulkanFunctions *vk = &vkctx->vkfn; VkImageView in_views[AV_NUM_DATA_POINTERS]; VkImageView out_views[AV_NUM_DATA_POINTERS]; VkImageMemoryBarrier2 img_bar[37]; int nb_img_bar = 0; /* Update descriptors and init the exec context */ FFVkExecContext *exec = ff_vk_exec_get(e); ff_vk_exec_start(vkctx, exec); ff_vk_exec_bind_pipeline(vkctx, exec, pl); if (push_src) ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_size, push_src); if (in_f) { RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f)); ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, sampler); ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); } RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, .imageMemoryBarrierCount = nb_img_bar, }); vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], pl->wg_size[2]); return ff_vk_exec_submit(vkctx, exec); fail: ff_vk_exec_discard_deps(vkctx, exec); return err; } int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, FFVulkanPipeline *pls[2], AVFrame *out, AVFrame *tmp, AVFrame *in, VkSampler sampler, void *push_src, size_t push_size) { int err = 0; FFVulkanFunctions *vk = &vkctx->vkfn; VkImageView in_views[AV_NUM_DATA_POINTERS]; VkImageView tmp_views[AV_NUM_DATA_POINTERS]; VkImageView out_views[AV_NUM_DATA_POINTERS]; VkImageMemoryBarrier2 img_bar[37]; int nb_img_bar = 0; /* Update descriptors and init the exec context */ FFVkExecContext *exec = ff_vk_exec_get(e); ff_vk_exec_start(vkctx, exec); RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp)); RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, .imageMemoryBarrierCount = nb_img_bar, }); for (int i = 0; i < 2; i++) { FFVulkanPipeline *pl = pls[i]; AVFrame *src_f = !i ? in : tmp; AVFrame *dst_f = !i ? tmp : out; VkImageView *src_views = !i ? in_views : tmp_views; VkImageView *dst_views = !i ? tmp_views : out_views; ff_vk_exec_bind_pipeline(vkctx, exec, pl); if (push_src) ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_size, push_src); ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0, !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL, sampler); ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], pl->wg_size[2]); } return ff_vk_exec_submit(vkctx, exec); fail: ff_vk_exec_discard_deps(vkctx, exec); return err; } int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, FFVulkanPipeline *pl, AVFrame *out, AVFrame *in[], int nb_in, VkSampler sampler, void *push_src, size_t push_size) { int err = 0; FFVulkanFunctions *vk = &vkctx->vkfn; VkImageView in_views[16][AV_NUM_DATA_POINTERS]; VkImageView out_views[AV_NUM_DATA_POINTERS]; VkImageMemoryBarrier2 img_bar[128]; int nb_img_bar = 0; /* Update descriptors and init the exec context */ FFVkExecContext *exec = ff_vk_exec_get(e); ff_vk_exec_start(vkctx, exec); /* Inputs */ for (int i = 0; i < nb_in; i++) { RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i])); ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); } /* Output */ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, .imageMemoryBarrierCount = nb_img_bar, }); ff_vk_exec_bind_pipeline(vkctx, exec, pl); if (push_src) ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_size, push_src); for (int i = 0; i < nb_in; i++) ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, sampler); ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], pl->wg_size[2]); return ff_vk_exec_submit(vkctx, exec); fail: ff_vk_exec_discard_deps(vkctx, exec); return err; }