diff --git a/doc/APIchanges b/doc/APIchanges index 6e187670c0..436fb71acb 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,10 @@ libavutil: 2015-08-28 API changes, most recent first: +2016-03-20 - xxxxxxx - lavu 55.50.100 / 55.21.0 - imgutils.h + Add av_image_copy_uc_from(), a version of av_image_copy() for copying + from GPU mapped memory. + 2017-03-20 - 9c2436e - lavu 55.49.100 - pixdesc.h Add AV_PIX_FMT_FLAG_BAYER pixel format flag. diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c index cc410abad1..f8f2244b89 100644 --- a/libavutil/imgutils.c +++ b/libavutil/imgutils.c @@ -24,6 +24,7 @@ #include "avassert.h" #include "common.h" #include "imgutils.h" +#include "imgutils_internal.h" #include "internal.h" #include "intreadwrite.h" #include "log.h" @@ -303,9 +304,9 @@ int av_image_check_sar(unsigned int w, unsigned int h, AVRational sar) return AVERROR(EINVAL); } -void av_image_copy_plane(uint8_t *dst, int dst_linesize, - const uint8_t *src, int src_linesize, - int bytewidth, int height) +static void image_copy_plane(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) { if (!dst || !src) return; @@ -318,9 +319,33 @@ void av_image_copy_plane(uint8_t *dst, int dst_linesize, } } -void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], - const uint8_t *src_data[4], const int src_linesizes[4], - enum AVPixelFormat pix_fmt, int width, int height) +static void image_copy_plane_uc_from(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) +{ + int ret = -1; + +#if ARCH_X86 + ret = ff_image_copy_plane_uc_from_x86(dst, dst_linesize, src, src_linesize, + bytewidth, height); +#endif + + if (ret < 0) + image_copy_plane(dst, dst_linesize, src, src_linesize, bytewidth, height); +} + +void av_image_copy_plane(uint8_t *dst, int dst_linesize, + const uint8_t *src, int src_linesize, + int bytewidth, int height) +{ + image_copy_plane(dst, dst_linesize, src, src_linesize, bytewidth, height); +} + +static void image_copy(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height, + void (*copy_plane)(uint8_t *, ptrdiff_t, const uint8_t *, + ptrdiff_t, ptrdiff_t, int)) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); @@ -329,9 +354,9 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) { - av_image_copy_plane(dst_data[0], dst_linesizes[0], - src_data[0], src_linesizes[0], - width, height); + copy_plane(dst_data[0], dst_linesizes[0], + src_data[0], src_linesizes[0], + width, height); /* copy the palette */ memcpy(dst_data[1], src_data[1], 4*256); } else { @@ -342,7 +367,7 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], for (i = 0; i < planes_nb; i++) { int h = height; - int bwidth = av_image_get_linesize(pix_fmt, width, i); + ptrdiff_t bwidth = av_image_get_linesize(pix_fmt, width, i); if (bwidth < 0) { av_log(NULL, AV_LOG_ERROR, "av_image_get_linesize failed\n"); return; @@ -350,13 +375,37 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], if (i == 1 || i == 2) { h = AV_CEIL_RSHIFT(height, desc->log2_chroma_h); } - av_image_copy_plane(dst_data[i], dst_linesizes[i], - src_data[i], src_linesizes[i], - bwidth, h); + copy_plane(dst_data[i], dst_linesizes[i], + src_data[i], src_linesizes[i], + bwidth, h); } } } +void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], + const uint8_t *src_data[4], const int src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height) +{ + ptrdiff_t dst_linesizes1[4], src_linesizes1[4]; + int i; + + for (i = 0; i < 4; i++) { + dst_linesizes1[i] = dst_linesizes[i]; + src_linesizes1[i] = src_linesizes[i]; + } + + image_copy(dst_data, dst_linesizes1, src_data, src_linesizes1, pix_fmt, + width, height, image_copy_plane); +} + +void av_image_copy_uc_from(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height) +{ + image_copy(dst_data, dst_linesizes, src_data, src_linesizes, pix_fmt, + width, height, image_copy_plane_uc_from); +} + int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4], const uint8_t *src, enum AVPixelFormat pix_fmt, int width, int height, int align) diff --git a/libavutil/imgutils.h b/libavutil/imgutils.h index 40aee8b98e..a4a5efcc66 100644 --- a/libavutil/imgutils.h +++ b/libavutil/imgutils.h @@ -120,6 +120,24 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], const uint8_t *src_data[4], const int src_linesizes[4], enum AVPixelFormat pix_fmt, int width, int height); +/** + * Copy image data located in uncacheable (e.g. GPU mapped) memory. Where + * available, this function will use special functionality for reading from such + * memory, which may result in greatly improved performance compared to plain + * av_image_copy(). + * + * The data pointers and the linesizes must be aligned to the maximum required + * by the CPU architecture. + * + * @note The linesize parameters have the type ptrdiff_t here, while they are + * int for av_image_copy(). + * @note On x86, the linesizes currently need to be aligned to the cacheline + * size (i.e. 64) to get improved performance. + */ +void av_image_copy_uc_from(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height); + /** * Setup the data pointers and linesizes based on the specified image * parameters and the provided array. diff --git a/libavutil/imgutils_internal.h b/libavutil/imgutils_internal.h new file mode 100644 index 0000000000..d515858413 --- /dev/null +++ b/libavutil/imgutils_internal.h @@ -0,0 +1,30 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_IMGUTILS_INTERNAL_H +#define AVUTIL_IMGUTILS_INTERNAL_H + +#include +#include + +int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + + +#endif /* AVUTIL_IMGUTILS_INTERNAL_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 25925b996d..d50f92ce12 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 55 -#define LIBAVUTIL_VERSION_MINOR 49 +#define LIBAVUTIL_VERSION_MINOR 50 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 94d8832062..2b0ba16c94 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -1,6 +1,7 @@ OBJS += x86/cpu.o \ x86/fixed_dsp_init.o \ x86/float_dsp_init.o \ + x86/imgutils_init.o \ x86/lls_init.o \ OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \ @@ -11,6 +12,7 @@ YASM-OBJS += x86/cpuid.o \ $(EMMS_OBJS__yes_) \ x86/fixed_dsp.o \ x86/float_dsp.o \ + x86/imgutils.o \ x86/lls.o \ YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ diff --git a/libavutil/x86/imgutils.asm b/libavutil/x86/imgutils.asm new file mode 100644 index 0000000000..f5ebc0f351 --- /dev/null +++ b/libavutil/x86/imgutils.asm @@ -0,0 +1,53 @@ +;***************************************************************************** +;* Copyright 2016 Anton Khirnov +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_XMM sse4 +cglobal image_copy_plane_uc_from, 6, 7, 4, dst, dst_linesize, src, src_linesize, bw, height, rowpos + add dstq, bwq + add srcq, bwq + neg bwq + +.row_start + mov rowposq, bwq + +.loop + movntdqa m0, [srcq + rowposq + 0 * mmsize] + movntdqa m1, [srcq + rowposq + 1 * mmsize] + movntdqa m2, [srcq + rowposq + 2 * mmsize] + movntdqa m3, [srcq + rowposq + 3 * mmsize] + + mova [dstq + rowposq + 0 * mmsize], m0 + mova [dstq + rowposq + 1 * mmsize], m1 + mova [dstq + rowposq + 2 * mmsize], m2 + mova [dstq + rowposq + 3 * mmsize], m3 + + add rowposq, 4 * mmsize + jnz .loop + + add srcq, src_linesizeq + add dstq, dst_linesizeq + dec heightd + jnz .row_start + + RET diff --git a/libavutil/x86/imgutils_init.c b/libavutil/x86/imgutils_init.c new file mode 100644 index 0000000000..4ea398205e --- /dev/null +++ b/libavutil/x86/imgutils_init.c @@ -0,0 +1,49 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +#include "libavutil/cpu.h" +#include "libavutil/error.h" +#include "libavutil/imgutils.h" +#include "libavutil/imgutils_internal.h" +#include "libavutil/internal.h" + +#include "cpu.h" + +void ff_image_copy_plane_uc_from_sse4(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + +int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) +{ + int cpu_flags = av_get_cpu_flags(); + ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64); + + if (EXTERNAL_SSE4(cpu_flags) && + bw_aligned <= dst_linesize && bw_aligned <= src_linesize) + ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize, + bw_aligned, height); + else + return AVERROR(ENOSYS); + + return 0; +}