avcodec: export motion vectors in frame side data on demand

The reasoning behind this addition is that various third party
applications are interested in getting some motion information out of a
video "for free" when it is available.

It was considered to export other information as well (such as the intra
information about the block, or the quantization) but the structure
might have ended up into a half full-generic, half full of codec
specific cruft. If more information is necessary, it should either be
added in the "flags" field of the AVMotionVector structure, or in
another side-data.

This commit also includes an example exporting them in a CSV stream.
This commit is contained in:
Clément Bœsch 2014-07-16 16:42:42 +02:00
parent c5f43c8888
commit b0352b1997
15 changed files with 360 additions and 2 deletions

1
.gitignore vendored
View File

@ -39,6 +39,7 @@
/doc/examples/avio_reading
/doc/examples/decoding_encoding
/doc/examples/demuxing_decoding
/doc/examples/extract_mvs
/doc/examples/filter_audio
/doc/examples/filtering_audio
/doc/examples/filtering_video

2
configure vendored
View File

@ -1305,6 +1305,7 @@ EXAMPLE_LIST="
avio_reading_example
decoding_encoding_example
demuxing_decoding_example
extract_mvs_example
filter_audio_example
filtering_audio_example
filtering_video_example
@ -2586,6 +2587,7 @@ zoompan_filter_deps="swscale"
avio_reading="avformat avcodec avutil"
avcodec_example_deps="avcodec avutil"
demuxing_decoding_example_deps="avcodec avformat avutil"
extract_mvs_example_deps="avcodec avformat avutil"
filter_audio_example_deps="avfilter avutil"
filtering_audio_example_deps="avfilter avcodec avformat avutil"
filtering_video_example_deps="avfilter avcodec avformat avutil"

View File

@ -15,6 +15,9 @@ libavutil: 2014-08-09
API changes, most recent first:
2014-08-xx - xxxxxxx - lavu 54.5.100 - frame.h motion_vector.h
Add AV_FRAME_DATA_MOTION_VECTORS side data and AVMotionVector structure
2014-08-16 - xxxxxxx - lswr 1.1.100 - swresample.h
Add AVFrame based API

View File

@ -39,6 +39,7 @@ DOCS = $(DOCS-yes)
DOC_EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE) += avio_reading
DOC_EXAMPLES-$(CONFIG_AVCODEC_EXAMPLE) += avcodec
DOC_EXAMPLES-$(CONFIG_DEMUXING_DECODING_EXAMPLE) += demuxing_decoding
DOC_EXAMPLES-$(CONFIG_EXTRACT_MVS_EXAMPLE) += extract_mvs
DOC_EXAMPLES-$(CONFIG_FILTER_AUDIO_EXAMPLE) += filter_audio
DOC_EXAMPLES-$(CONFIG_FILTERING_AUDIO_EXAMPLE) += filtering_audio
DOC_EXAMPLES-$(CONFIG_FILTERING_VIDEO_EXAMPLE) += filtering_video

View File

@ -797,6 +797,9 @@ Frame data might be split into multiple chunks.
Show all frames before the first keyframe.
@item skiprd
Deprecated, use mpegvideo private options instead.
@item export_mvs
Export motion vectors into frame side-data (see @code{AV_FRAME_DATA_MOTION_VECTORS})
for codecs that support it. See also @file{doc/examples/export_mvs.c}.
@end table
@item error @var{integer} (@emph{encoding,video})

View File

@ -14,6 +14,7 @@ LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) $(LDLIBS)
EXAMPLES= avio_reading \
decoding_encoding \
demuxing_decoding \
extract_mvs \
filtering_video \
filtering_audio \
metadata \

185
doc/examples/extract_mvs.c Normal file
View File

@ -0,0 +1,185 @@
/*
* Copyright (c) 2012 Stefano Sabatini
* Copyright (c) 2014 Clément Bœsch
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <libavutil/motion_vector.h>
#include <libavformat/avformat.h>
static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *video_dec_ctx = NULL;
static AVStream *video_stream = NULL;
static const char *src_filename = NULL;
static int video_stream_idx = -1;
static AVFrame *frame = NULL;
static AVPacket pkt;
static int video_frame_count = 0;
static int decode_packet(int *got_frame, int cached)
{
int decoded = pkt.size;
*got_frame = 0;
if (pkt.stream_index == video_stream_idx) {
int ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, &pkt);
if (ret < 0) {
fprintf(stderr, "Error decoding video frame (%s)\n", av_err2str(ret));
return ret;
}
if (*got_frame) {
int i;
AVFrameSideData *sd;
video_frame_count++;
sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS);
if (sd) {
const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
for (i = 0; i < sd->size / sizeof(*mvs); i++) {
const AVMotionVector *mv = &mvs[i];
printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%"PRIx64"\n",
video_frame_count, mv->source,
mv->w, mv->h, mv->src_x, mv->src_y,
mv->dst_x, mv->dst_y, mv->flags);
}
}
}
}
return decoded;
}
static int open_codec_context(int *stream_idx,
AVFormatContext *fmt_ctx, enum AVMediaType type)
{
int ret;
AVStream *st;
AVCodecContext *dec_ctx = NULL;
AVCodec *dec = NULL;
AVDictionary *opts = NULL;
ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0);
if (ret < 0) {
fprintf(stderr, "Could not find %s stream in input file '%s'\n",
av_get_media_type_string(type), src_filename);
return ret;
} else {
*stream_idx = ret;
st = fmt_ctx->streams[*stream_idx];
/* find decoder for the stream */
dec_ctx = st->codec;
dec = avcodec_find_decoder(dec_ctx->codec_id);
if (!dec) {
fprintf(stderr, "Failed to find %s codec\n",
av_get_media_type_string(type));
return AVERROR(EINVAL);
}
/* Init the video decoder */
av_dict_set(&opts, "flags2", "+export_mvs", 0);
if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
fprintf(stderr, "Failed to open %s codec\n",
av_get_media_type_string(type));
return ret;
}
}
return 0;
}
int main(int argc, char **argv)
{
int ret = 0, got_frame;
if (argc != 2) {
fprintf(stderr, "Usage: %s <video>\n", argv[0]);
exit(1);
}
src_filename = argv[1];
av_register_all();
if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) {
fprintf(stderr, "Could not open source file %s\n", src_filename);
exit(1);
}
if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
fprintf(stderr, "Could not find stream information\n");
exit(1);
}
if (open_codec_context(&video_stream_idx, fmt_ctx, AVMEDIA_TYPE_VIDEO) >= 0) {
video_stream = fmt_ctx->streams[video_stream_idx];
video_dec_ctx = video_stream->codec;
}
av_dump_format(fmt_ctx, 0, src_filename, 0);
if (!video_stream) {
fprintf(stderr, "Could not find video stream in the input, aborting\n");
ret = 1;
goto end;
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate frame\n");
ret = AVERROR(ENOMEM);
goto end;
}
printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n");
/* initialize packet, set data to NULL, let the demuxer fill it */
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
/* read frames from the file */
while (av_read_frame(fmt_ctx, &pkt) >= 0) {
AVPacket orig_pkt = pkt;
do {
ret = decode_packet(&got_frame, 0);
if (ret < 0)
break;
pkt.data += ret;
pkt.size -= ret;
} while (pkt.size > 0);
av_free_packet(&orig_pkt);
}
/* flush cached frames */
pkt.data = NULL;
pkt.size = 0;
do {
decode_packet(&got_frame, 1);
} while (got_frame);
end:
avcodec_close(video_dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
return ret < 0;
}

View File

@ -767,6 +767,7 @@ typedef struct RcOverride{
#define CODEC_FLAG2_CHUNKS 0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
#define CODEC_FLAG2_SHOW_ALL 0x00400000 ///< Show all frames before the first keyframe
#define CODEC_FLAG2_EXPORT_MVS 0x10000000 ///< Export motion vectors through frame side data
/* Unsupported options :
* Syntax Arithmetic coding (SAC)

View File

@ -31,6 +31,7 @@
#include "libavutil/avassert.h"
#include "libavutil/imgutils.h"
#include "libavutil/internal.h"
#include "libavutil/motion_vector.h"
#include "libavutil/timer.h"
#include "avcodec.h"
#include "blockdsp.h"
@ -600,7 +601,8 @@ static int alloc_picture_tables(MpegEncContext *s, Picture *pic)
return AVERROR(ENOMEM);
}
if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv) {
if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv ||
(s->avctx->flags2 & CODEC_FLAG2_EXPORT_MVS)) {
int mv_size = 2 * (b8_array_size + 4) * sizeof(int16_t);
int ref_index_size = 4 * mb_array_size;
@ -2106,6 +2108,24 @@ static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
}
static int add_mb(AVMotionVector *mb, uint32_t mb_type,
int dst_x, int dst_y,
int src_x, int src_y,
int direction)
{
if (dst_x == src_x && dst_y == src_y)
return 0;
mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16;
mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16;
mb->src_x = src_x;
mb->src_y = src_y;
mb->dst_x = dst_x;
mb->dst_y = dst_y;
mb->source = direction ? 1 : -1;
mb->flags = 0; // XXX: does mb_type contain extra information that could be exported here?
return 1;
}
/**
* Print debugging info for the given picture.
*/
@ -2114,6 +2134,87 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_
int *low_delay,
int mb_width, int mb_height, int mb_stride, int quarter_sample)
{
if ((avctx->flags2 & CODEC_FLAG2_EXPORT_MVS) && mbtype_table && motion_val[0]) {
const int shift = 1 + quarter_sample;
const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
const int mv_stride = (mb_width << mv_sample_log2) +
(avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
int mb_x, mb_y, mbcount = 0;
/* size is width * height * 2 * 4 where 2 is for directions and 4 is
* for the maximum number of MB (4 MB in case of IS_8x8) */
AVMotionVector *mvs = av_malloc_array(mb_width * mb_height, 2 * 4 * sizeof(AVMotionVector));
if (!mvs)
return;
for (mb_y = 0; mb_y < mb_height; mb_y++) {
for (mb_x = 0; mb_x < mb_width; mb_x++) {
int i, direction, mb_type = mbtype_table[mb_x + mb_y * mb_stride];
for (direction = 0; direction < 2; direction++) {
if (!USES_LIST(mb_type, direction))
continue;
if (IS_8X8(mb_type)) {
for (i = 0; i < 4; i++) {
int sx = mb_x * 16 + 4 + 8 * (i & 1);
int sy = mb_y * 16 + 4 + 8 * (i >> 1);
int xy = (mb_x * 2 + (i & 1) +
(mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
int mx = (motion_val[direction][xy][0] >> shift) + sx;
int my = (motion_val[direction][xy][1] >> shift) + sy;
mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction);
}
} else if (IS_16X8(mb_type)) {
for (i = 0; i < 2; i++) {
int sx = mb_x * 16 + 8;
int sy = mb_y * 16 + 4 + 8 * i;
int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
int mx = (motion_val[direction][xy][0] >> shift);
int my = (motion_val[direction][xy][1] >> shift);
if (IS_INTERLACED(mb_type))
my *= 2;
mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
}
} else if (IS_8X16(mb_type)) {
for (i = 0; i < 2; i++) {
int sx = mb_x * 16 + 4 + 8 * i;
int sy = mb_y * 16 + 8;
int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
int mx = motion_val[direction][xy][0] >> shift;
int my = motion_val[direction][xy][1] >> shift;
if (IS_INTERLACED(mb_type))
my *= 2;
mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
}
} else {
int sx = mb_x * 16 + 8;
int sy = mb_y * 16 + 8;
int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
int mx = (motion_val[direction][xy][0]>>shift) + sx;
int my = (motion_val[direction][xy][1]>>shift) + sy;
mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction);
}
}
}
}
if (mbcount) {
AVFrameSideData *sd;
av_log(avctx, AV_LOG_DEBUG, "Adding %d MVs info to frame %d\n", mbcount, avctx->frame_number);
sd = av_frame_new_side_data(pict, AV_FRAME_DATA_MOTION_VECTORS, mbcount * sizeof(AVMotionVector));
if (!sd)
return;
memcpy(sd->data, mvs, mbcount * sizeof(AVMotionVector));
}
av_freep(&mvs);
}
/* TODO: export all the following to make them accessible for users (and filters) */
if (avctx->hwaccel || !mbtype_table
|| (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU))
return;

View File

@ -88,6 +88,7 @@ static const AVOption avcodec_options[] = {
{"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
{"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"},
{"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"},
{"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"},
{"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"},
{"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" },
{"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },

View File

@ -38,6 +38,7 @@ HEADERS = adler32.h \
mathematics.h \
md5.h \
mem.h \
motion_vector.h \
murmur3.h \
dict.h \
old_pix_fmts.h \

View File

@ -683,6 +683,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
case AV_FRAME_DATA_DOWNMIX_INFO: return "Metadata relevant to a downmix procedure";
case AV_FRAME_DATA_REPLAYGAIN: return "AVReplayGain";
case AV_FRAME_DATA_DISPLAYMATRIX: return "3x3 displaymatrix";
case AV_FRAME_DATA_MOTION_VECTORS: return "Motion vectors";
}
return NULL;
}

View File

@ -87,6 +87,13 @@ enum AVFrameSideDataType {
* in ETSI TS 101 154 using AVActiveFormatDescription enum.
*/
AV_FRAME_DATA_AFD,
/**
* Motion vectors exported by some codecs (on demand through the export_mvs
* flag set in the libavcodec AVCodecContext flags2 option).
* The data is the AVMotionVector struct defined in
* libavutil/motion_vector.h.
*/
AV_FRAME_DATA_MOTION_VECTORS,
};
enum AVActiveFormatDescription {

50
libavutil/motion_vector.h Normal file
View File

@ -0,0 +1,50 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_MOTION_VECTOR_H
#define AVUTIL_MOTION_VECTOR_H
#include <stdint.h>
typedef struct AVMotionVector {
/**
* Where the current macroblock comes from; negative value when it comes
* from the past, positive value when it comes from the future.
* XXX: set exact relative ref frame reference instead of a +/- 1 "direction".
*/
int32_t source;
/**
* Width and height of the block.
*/
uint8_t w, h;
/**
* Absolute source position.
*/
uint16_t src_x, src_y;
/**
* Absolute destination position.
*/
uint16_t dst_x, dst_y;
/**
* Extra flag information.
* Currently unused.
*/
uint64_t flags;
} AVMotionVector;
#endif /* AVUTIL_MOTION_VECTOR_H */

View File

@ -56,7 +56,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 54
#define LIBAVUTIL_VERSION_MINOR 4
#define LIBAVUTIL_VERSION_MINOR 5
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \