ffmpeg/libavcodec/mfenc.c
James Almer 827d6fe73d avcodec/encode: restructure the core encoding code
This commit follows the same logic as 061a0c14bb, but for the encode API: The
new public encoding API will no longer be a wrapper around the old deprecated
one, and the internal API used by the encoders now consists of a single
receive_packet() callback that pulls frames as required.

amf encoders adapted by James Almer
librav1e encoder adapted by James Almer
nvidia encoders adapted by James Almer
MediaFoundation encoders adapted by James Almer
vaapi encoders adapted by Linjie Fu
v4l2_m2m encoders adapted by Andriy Gelman

Signed-off-by: James Almer <jamrial@gmail.com>
2020-06-18 17:11:37 -03:00

1213 lines
40 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define COBJMACROS
#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
#undef _WIN32_WINNT
#define _WIN32_WINNT 0x0602
#endif
#include "encode.h"
#include "mf_utils.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
#include "libavutil/time.h"
#include "internal.h"
typedef struct MFContext {
AVClass *av_class;
AVFrame *frame;
int is_video, is_audio;
GUID main_subtype;
IMFTransform *mft;
IMFMediaEventGenerator *async_events;
DWORD in_stream_id, out_stream_id;
MFT_INPUT_STREAM_INFO in_info;
MFT_OUTPUT_STREAM_INFO out_info;
int out_stream_provides_samples;
int draining, draining_done;
int sample_sent;
int async_need_input, async_have_output, async_marker;
int64_t reorder_delay;
ICodecAPI *codec_api;
// set by AVOption
int opt_enc_rc;
int opt_enc_quality;
int opt_enc_scenario;
int opt_enc_hw;
} MFContext;
static int mf_choose_output_type(AVCodecContext *avctx);
static int mf_setup_context(AVCodecContext *avctx);
#define MF_TIMEBASE (AVRational){1, 10000000}
// Sentinel value only used by us.
#define MF_INVALID_TIME AV_NOPTS_VALUE
static int mf_wait_events(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
if (!c->async_events)
return 0;
while (!(c->async_need_input || c->async_have_output || c->draining_done || c->async_marker)) {
IMFMediaEvent *ev = NULL;
MediaEventType ev_id = 0;
HRESULT hr = IMFMediaEventGenerator_GetEvent(c->async_events, 0, &ev);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "IMFMediaEventGenerator_GetEvent() failed: %s\n",
ff_hr_str(hr));
return AVERROR_EXTERNAL;
}
IMFMediaEvent_GetType(ev, &ev_id);
switch (ev_id) {
case ff_METransformNeedInput:
if (!c->draining)
c->async_need_input = 1;
break;
case ff_METransformHaveOutput:
c->async_have_output = 1;
break;
case ff_METransformDrainComplete:
c->draining_done = 1;
break;
case ff_METransformMarker:
c->async_marker = 1;
break;
default: ;
}
IMFMediaEvent_Release(ev);
}
return 0;
}
static AVRational mf_get_tb(AVCodecContext *avctx)
{
if (avctx->pkt_timebase.num > 0 && avctx->pkt_timebase.den > 0)
return avctx->pkt_timebase;
if (avctx->time_base.num > 0 && avctx->time_base.den > 0)
return avctx->time_base;
return MF_TIMEBASE;
}
static LONGLONG mf_to_mf_time(AVCodecContext *avctx, int64_t av_pts)
{
if (av_pts == AV_NOPTS_VALUE)
return MF_INVALID_TIME;
return av_rescale_q(av_pts, mf_get_tb(avctx), MF_TIMEBASE);
}
static void mf_sample_set_pts(AVCodecContext *avctx, IMFSample *sample, int64_t av_pts)
{
LONGLONG stime = mf_to_mf_time(avctx, av_pts);
if (stime != MF_INVALID_TIME)
IMFSample_SetSampleTime(sample, stime);
}
static int64_t mf_from_mf_time(AVCodecContext *avctx, LONGLONG stime)
{
return av_rescale_q(stime, MF_TIMEBASE, mf_get_tb(avctx));
}
static int64_t mf_sample_get_pts(AVCodecContext *avctx, IMFSample *sample)
{
LONGLONG pts;
HRESULT hr = IMFSample_GetSampleTime(sample, &pts);
if (FAILED(hr))
return AV_NOPTS_VALUE;
return mf_from_mf_time(avctx, pts);
}
static int mf_enca_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
UINT32 sz;
if (avctx->codec_id != AV_CODEC_ID_MP3 && avctx->codec_id != AV_CODEC_ID_AC3) {
hr = IMFAttributes_GetBlobSize(type, &MF_MT_USER_DATA, &sz);
if (!FAILED(hr) && sz > 0) {
avctx->extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
if (!avctx->extradata)
return AVERROR(ENOMEM);
avctx->extradata_size = sz;
hr = IMFAttributes_GetBlob(type, &MF_MT_USER_DATA, avctx->extradata, sz, NULL);
if (FAILED(hr))
return AVERROR_EXTERNAL;
if (avctx->codec_id == AV_CODEC_ID_AAC && avctx->extradata_size >= 12) {
// Get rid of HEAACWAVEINFO (after wfx field, 12 bytes).
avctx->extradata_size = avctx->extradata_size - 12;
memmove(avctx->extradata, avctx->extradata + 12, avctx->extradata_size);
}
}
}
// I don't know where it's documented that we need this. It happens with the
// MS mp3 encoder MFT. The idea for the workaround is taken from NAudio.
// (Certainly any lossy codec will have frames much smaller than 1 second.)
if (!c->out_info.cbSize && !c->out_stream_provides_samples) {
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &sz);
if (!FAILED(hr)) {
av_log(avctx, AV_LOG_VERBOSE, "MFT_OUTPUT_STREAM_INFO.cbSize set to 0, "
"assuming %d bytes instead.\n", (int)sz);
c->out_info.cbSize = sz;
}
}
return 0;
}
static int mf_encv_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
{
HRESULT hr;
UINT32 sz;
hr = IMFAttributes_GetBlobSize(type, &MF_MT_MPEG_SEQUENCE_HEADER, &sz);
if (!FAILED(hr) && sz > 0) {
uint8_t *extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
if (!extradata)
return AVERROR(ENOMEM);
hr = IMFAttributes_GetBlob(type, &MF_MT_MPEG_SEQUENCE_HEADER, extradata, sz, NULL);
if (FAILED(hr)) {
av_free(extradata);
return AVERROR_EXTERNAL;
}
av_freep(&avctx->extradata);
avctx->extradata = extradata;
avctx->extradata_size = sz;
}
return 0;
}
static int mf_output_type_get(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
IMFMediaType *type;
int ret;
hr = IMFTransform_GetOutputCurrentType(c->mft, c->out_stream_id, &type);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "could not get output type\n");
return AVERROR_EXTERNAL;
}
av_log(avctx, AV_LOG_VERBOSE, "final output type:\n");
ff_media_type_dump(avctx, type);
ret = 0;
if (c->is_video) {
ret = mf_encv_output_type_get(avctx, type);
} else if (c->is_audio) {
ret = mf_enca_output_type_get(avctx, type);
}
if (ret < 0)
av_log(avctx, AV_LOG_ERROR, "output type not supported\n");
IMFMediaType_Release(type);
return ret;
}
static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPacket *avpkt)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
int ret;
DWORD len;
IMFMediaBuffer *buffer;
BYTE *data;
UINT64 t;
UINT32 t32;
hr = IMFSample_GetTotalLength(sample, &len);
if (FAILED(hr))
return AVERROR_EXTERNAL;
if ((ret = av_new_packet(avpkt, len)) < 0)
return ret;
IMFSample_ConvertToContiguousBuffer(sample, &buffer);
if (FAILED(hr))
return AVERROR_EXTERNAL;
hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
if (FAILED(hr)) {
IMFMediaBuffer_Release(buffer);
return AVERROR_EXTERNAL;
}
memcpy(avpkt->data, data, len);
IMFMediaBuffer_Unlock(buffer);
IMFMediaBuffer_Release(buffer);
avpkt->pts = avpkt->dts = mf_sample_get_pts(avctx, sample);
hr = IMFAttributes_GetUINT32(sample, &MFSampleExtension_CleanPoint, &t32);
if (c->is_audio || (!FAILED(hr) && t32 != 0))
avpkt->flags |= AV_PKT_FLAG_KEY;
hr = IMFAttributes_GetUINT64(sample, &MFSampleExtension_DecodeTimestamp, &t);
if (!FAILED(hr)) {
avpkt->dts = mf_from_mf_time(avctx, t);
// At least on Qualcomm's HEVC encoder on SD 835, the output dts
// starts from the input pts of the first frame, while the output pts
// is shifted forward. Therefore, shift the output values back so that
// the output pts matches the input.
if (c->reorder_delay == AV_NOPTS_VALUE)
c->reorder_delay = avpkt->pts - avpkt->dts;
avpkt->dts -= c->reorder_delay;
avpkt->pts -= c->reorder_delay;
}
return 0;
}
static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
{
MFContext *c = avctx->priv_data;
size_t len;
size_t bps;
IMFSample *sample;
bps = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels;
len = frame->nb_samples * bps;
sample = ff_create_memory_sample(frame->data[0], len, c->in_info.cbAlignment);
if (sample)
IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->nb_samples));
return sample;
}
static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
{
MFContext *c = avctx->priv_data;
IMFSample *sample;
IMFMediaBuffer *buffer;
BYTE *data;
HRESULT hr;
int ret;
int size;
size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
if (size < 0)
return NULL;
sample = ff_create_memory_sample(NULL, size, c->in_info.cbAlignment);
if (!sample)
return NULL;
hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
if (FAILED(hr)) {
IMFSample_Release(sample);
return NULL;
}
hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
if (FAILED(hr)) {
IMFMediaBuffer_Release(buffer);
IMFSample_Release(sample);
return NULL;
}
ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
avctx->pix_fmt, avctx->width, avctx->height, 1);
IMFMediaBuffer_SetCurrentLength(buffer, size);
IMFMediaBuffer_Unlock(buffer);
IMFMediaBuffer_Release(buffer);
if (ret < 0) {
IMFSample_Release(sample);
return NULL;
}
IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->pkt_duration));
return sample;
}
static IMFSample *mf_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
{
MFContext *c = avctx->priv_data;
IMFSample *sample;
if (c->is_audio) {
sample = mf_a_avframe_to_sample(avctx, frame);
} else {
sample = mf_v_avframe_to_sample(avctx, frame);
}
if (sample)
mf_sample_set_pts(avctx, sample, frame->pts);
return sample;
}
static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
int ret;
if (sample) {
if (c->async_events) {
if ((ret = mf_wait_events(avctx)) < 0)
return ret;
if (!c->async_need_input)
return AVERROR(EAGAIN);
}
if (!c->sample_sent)
IMFSample_SetUINT32(sample, &MFSampleExtension_Discontinuity, TRUE);
c->sample_sent = 1;
hr = IMFTransform_ProcessInput(c->mft, c->in_stream_id, sample, 0);
if (hr == MF_E_NOTACCEPTING) {
return AVERROR(EAGAIN);
} else if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "failed processing input: %s\n", ff_hr_str(hr));
return AVERROR_EXTERNAL;
}
c->async_need_input = 0;
} else if (!c->draining) {
hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_COMMAND_DRAIN, 0);
if (FAILED(hr))
av_log(avctx, AV_LOG_ERROR, "failed draining: %s\n", ff_hr_str(hr));
// Some MFTs (AC3) will send a frame after each drain command (???), so
// this is required to make draining actually terminate.
c->draining = 1;
c->async_need_input = 0;
} else {
return AVERROR_EOF;
}
return 0;
}
static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
DWORD st;
MFT_OUTPUT_DATA_BUFFER out_buffers;
IMFSample *sample;
int ret = 0;
while (1) {
*out_sample = NULL;
sample = NULL;
if (c->async_events) {
if ((ret = mf_wait_events(avctx)) < 0)
return ret;
if (!c->async_have_output || c->draining_done) {
ret = 0;
break;
}
}
if (!c->out_stream_provides_samples) {
sample = ff_create_memory_sample(NULL, c->out_info.cbSize, c->out_info.cbAlignment);
if (!sample)
return AVERROR(ENOMEM);
}
out_buffers = (MFT_OUTPUT_DATA_BUFFER) {
.dwStreamID = c->out_stream_id,
.pSample = sample,
};
st = 0;
hr = IMFTransform_ProcessOutput(c->mft, 0, 1, &out_buffers, &st);
if (out_buffers.pEvents)
IMFCollection_Release(out_buffers.pEvents);
if (!FAILED(hr)) {
*out_sample = out_buffers.pSample;
ret = 0;
break;
}
if (out_buffers.pSample)
IMFSample_Release(out_buffers.pSample);
if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
if (c->draining)
c->draining_done = 1;
ret = 0;
} else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
av_log(avctx, AV_LOG_WARNING, "stream format change\n");
ret = mf_choose_output_type(avctx);
if (ret == 0) // we don't expect renegotiating the input type
ret = AVERROR_EXTERNAL;
if (ret > 0) {
ret = mf_setup_context(avctx);
if (ret >= 0) {
c->async_have_output = 0;
continue;
}
}
} else {
av_log(avctx, AV_LOG_ERROR, "failed processing output: %s\n", ff_hr_str(hr));
ret = AVERROR_EXTERNAL;
}
break;
}
c->async_have_output = 0;
if (ret >= 0 && !*out_sample)
ret = c->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
return ret;
}
static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
{
MFContext *c = avctx->priv_data;
IMFSample *sample = NULL;
int ret;
if (!c->frame->buf[0]) {
ret = ff_encode_get_frame(avctx, c->frame);
if (ret < 0 && ret != AVERROR_EOF)
return ret;
}
if (c->frame->buf[0]) {
sample = mf_avframe_to_sample(avctx, c->frame);
if (!sample) {
av_frame_unref(c->frame);
return AVERROR(ENOMEM);
}
if (c->is_video && c->codec_api) {
if (c->frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
}
}
ret = mf_send_sample(avctx, sample);
if (sample)
IMFSample_Release(sample);
if (ret != AVERROR(EAGAIN))
av_frame_unref(c->frame);
if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
return ret;
ret = mf_receive_sample(avctx, &sample);
if (ret < 0)
return ret;
ret = mf_sample_to_avpacket(avctx, sample, avpkt);
IMFSample_Release(sample);
return ret;
}
// Most encoders seem to enumerate supported audio formats on the output types,
// at least as far as channel configuration and sample rate is concerned. Pick
// the one which seems to match best.
static int64_t mf_enca_output_score(AVCodecContext *avctx, IMFMediaType *type)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
UINT32 t;
GUID tg;
int64_t score = 0;
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
if (!FAILED(hr) && t == avctx->sample_rate)
score |= 1LL << 32;
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
if (!FAILED(hr) && t == avctx->channels)
score |= 2LL << 32;
hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
if (!FAILED(hr)) {
if (IsEqualGUID(&c->main_subtype, &tg))
score |= 4LL << 32;
}
// Select the bitrate (lowest priority).
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &t);
if (!FAILED(hr)) {
int diff = (int)t - avctx->bit_rate / 8;
if (diff >= 0) {
score |= (1LL << 31) - diff; // prefer lower bitrate
} else {
score |= (1LL << 30) + diff; // prefer higher bitrate
}
}
hr = IMFAttributes_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &t);
if (!FAILED(hr) && t != 0)
return -1;
return score;
}
static int mf_enca_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
{
// (some decoders allow adjusting this freely, but it can also cause failure
// to set the output type - so it's commented for being too fragile)
//IMFAttributes_SetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, avctx->bit_rate / 8);
//IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
return 0;
}
static int64_t mf_enca_input_score(AVCodecContext *avctx, IMFMediaType *type)
{
HRESULT hr;
UINT32 t;
int64_t score = 0;
enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
if (sformat == AV_SAMPLE_FMT_NONE)
return -1; // can not use
if (sformat == avctx->sample_fmt)
score |= 1;
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
if (!FAILED(hr) && t == avctx->sample_rate)
score |= 2;
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
if (!FAILED(hr) && t == avctx->channels)
score |= 4;
return score;
}
static int mf_enca_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
{
HRESULT hr;
UINT32 t;
enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
if (sformat != avctx->sample_fmt) {
av_log(avctx, AV_LOG_ERROR, "unsupported input sample format set\n");
return AVERROR(EINVAL);
}
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
if (FAILED(hr) || t != avctx->sample_rate) {
av_log(avctx, AV_LOG_ERROR, "unsupported input sample rate set\n");
return AVERROR(EINVAL);
}
hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
if (FAILED(hr) || t != avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "unsupported input channel number set\n");
return AVERROR(EINVAL);
}
return 0;
}
static int64_t mf_encv_output_score(AVCodecContext *avctx, IMFMediaType *type)
{
MFContext *c = avctx->priv_data;
GUID tg;
HRESULT hr;
int score = -1;
hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
if (!FAILED(hr)) {
if (IsEqualGUID(&c->main_subtype, &tg))
score = 1;
}
return score;
}
static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
{
MFContext *c = avctx->priv_data;
AVRational framerate;
ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
IMFAttributes_SetUINT32(type, &MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
framerate = avctx->framerate;
} else {
framerate = av_inv_q(avctx->time_base);
framerate.den *= avctx->ticks_per_frame;
}
ff_MFSetAttributeRatio((IMFAttributes *)type, &MF_MT_FRAME_RATE, framerate.num, framerate.den);
// (MS HEVC supports eAVEncH265VProfile_Main_420_8 only.)
if (avctx->codec_id == AV_CODEC_ID_H264) {
UINT32 profile = ff_eAVEncH264VProfile_Base;
switch (avctx->profile) {
case FF_PROFILE_H264_MAIN:
profile = ff_eAVEncH264VProfile_Main;
break;
case FF_PROFILE_H264_HIGH:
profile = ff_eAVEncH264VProfile_High;
break;
}
IMFAttributes_SetUINT32(type, &MF_MT_MPEG2_PROFILE, profile);
}
IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
// Note that some of the ICodecAPI options must be set before SetOutputType.
if (c->codec_api) {
if (avctx->bit_rate)
ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMeanBitRate, FF_VAL_VT_UI4(avctx->bit_rate));
if (c->opt_enc_rc >= 0)
ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonRateControlMode, FF_VAL_VT_UI4(c->opt_enc_rc));
if (c->opt_enc_quality >= 0)
ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQuality, FF_VAL_VT_UI4(c->opt_enc_quality));
// Always set the number of b-frames. Qualcomm's HEVC encoder on SD835
// defaults this to 1, and that setting is buggy with many of the
// rate control modes. (0 or 2 b-frames works fine with most rate
// control modes, but 2 seems buggy with the u_vbr mode.) Setting
// "scenario" to "camera_record" sets it in CFR mode (where the default
// is VFR), which makes the encoder avoid dropping frames.
ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVDefaultBPictureCount, FF_VAL_VT_UI4(avctx->max_b_frames));
avctx->has_b_frames = avctx->max_b_frames > 0;
ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncH264CABACEnable, FF_VAL_VT_BOOL(1));
if (c->opt_enc_scenario >= 0)
ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVScenarioInfo, FF_VAL_VT_UI4(c->opt_enc_scenario));
}
return 0;
}
static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
{
enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
if (pix_fmt != avctx->pix_fmt)
return -1; // can not use
return 0;
}
static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
{
enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
if (pix_fmt != avctx->pix_fmt) {
av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
return AVERROR(EINVAL);
}
//ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
return 0;
}
static int mf_choose_output_type(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
int ret;
IMFMediaType *out_type = NULL;
int64_t out_type_score = -1;
int out_type_index = -1;
int n;
av_log(avctx, AV_LOG_VERBOSE, "output types:\n");
for (n = 0; ; n++) {
IMFMediaType *type;
int64_t score = -1;
hr = IMFTransform_GetOutputAvailableType(c->mft, c->out_stream_id, n, &type);
if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
break;
if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
av_log(avctx, AV_LOG_VERBOSE, "(need to set input type)\n");
ret = 0;
goto done;
}
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "error getting output type: %s\n", ff_hr_str(hr));
ret = AVERROR_EXTERNAL;
goto done;
}
av_log(avctx, AV_LOG_VERBOSE, "output type %d:\n", n);
ff_media_type_dump(avctx, type);
if (c->is_video) {
score = mf_encv_output_score(avctx, type);
} else if (c->is_audio) {
score = mf_enca_output_score(avctx, type);
}
if (score > out_type_score) {
if (out_type)
IMFMediaType_Release(out_type);
out_type = type;
out_type_score = score;
out_type_index = n;
IMFMediaType_AddRef(out_type);
}
IMFMediaType_Release(type);
}
if (out_type) {
av_log(avctx, AV_LOG_VERBOSE, "picking output type %d.\n", out_type_index);
} else {
hr = MFCreateMediaType(&out_type);
if (FAILED(hr)) {
ret = AVERROR(ENOMEM);
goto done;
}
}
ret = 0;
if (c->is_video) {
ret = mf_encv_output_adjust(avctx, out_type);
} else if (c->is_audio) {
ret = mf_enca_output_adjust(avctx, out_type);
}
if (ret >= 0) {
av_log(avctx, AV_LOG_VERBOSE, "setting output type:\n");
ff_media_type_dump(avctx, out_type);
hr = IMFTransform_SetOutputType(c->mft, c->out_stream_id, out_type, 0);
if (!FAILED(hr)) {
ret = 1;
} else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set input type\n");
ret = 0;
} else {
av_log(avctx, AV_LOG_ERROR, "could not set output type (%s)\n", ff_hr_str(hr));
ret = AVERROR_EXTERNAL;
}
}
done:
if (out_type)
IMFMediaType_Release(out_type);
return ret;
}
static int mf_choose_input_type(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
int ret;
IMFMediaType *in_type = NULL;
int64_t in_type_score = -1;
int in_type_index = -1;
int n;
av_log(avctx, AV_LOG_VERBOSE, "input types:\n");
for (n = 0; ; n++) {
IMFMediaType *type = NULL;
int64_t score = -1;
hr = IMFTransform_GetInputAvailableType(c->mft, c->in_stream_id, n, &type);
if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
break;
if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 1)\n");
ret = 0;
goto done;
}
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "error getting input type: %s\n", ff_hr_str(hr));
ret = AVERROR_EXTERNAL;
goto done;
}
av_log(avctx, AV_LOG_VERBOSE, "input type %d:\n", n);
ff_media_type_dump(avctx, type);
if (c->is_video) {
score = mf_encv_input_score(avctx, type);
} else if (c->is_audio) {
score = mf_enca_input_score(avctx, type);
}
if (score > in_type_score) {
if (in_type)
IMFMediaType_Release(in_type);
in_type = type;
in_type_score = score;
in_type_index = n;
IMFMediaType_AddRef(in_type);
}
IMFMediaType_Release(type);
}
if (in_type) {
av_log(avctx, AV_LOG_VERBOSE, "picking input type %d.\n", in_type_index);
} else {
// Some buggy MFTs (WMA encoder) fail to return MF_E_TRANSFORM_TYPE_NOT_SET.
av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 2)\n");
ret = 0;
goto done;
}
ret = 0;
if (c->is_video) {
ret = mf_encv_input_adjust(avctx, in_type);
} else if (c->is_audio) {
ret = mf_enca_input_adjust(avctx, in_type);
}
if (ret >= 0) {
av_log(avctx, AV_LOG_VERBOSE, "setting input type:\n");
ff_media_type_dump(avctx, in_type);
hr = IMFTransform_SetInputType(c->mft, c->in_stream_id, in_type, 0);
if (!FAILED(hr)) {
ret = 1;
} else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set output type\n");
ret = 0;
} else {
av_log(avctx, AV_LOG_ERROR, "could not set input type (%s)\n", ff_hr_str(hr));
ret = AVERROR_EXTERNAL;
}
}
done:
if (in_type)
IMFMediaType_Release(in_type);
return ret;
}
static int mf_negotiate_types(AVCodecContext *avctx)
{
// This follows steps 1-5 on:
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa965264(v=vs.85).aspx
// If every MFT implementer does this correctly, this loop should at worst
// be repeated once.
int need_input = 1, need_output = 1;
int n;
for (n = 0; n < 2 && (need_input || need_output); n++) {
int ret;
ret = mf_choose_input_type(avctx);
if (ret < 0)
return ret;
need_input = ret < 1;
ret = mf_choose_output_type(avctx);
if (ret < 0)
return ret;
need_output = ret < 1;
}
if (need_input || need_output) {
av_log(avctx, AV_LOG_ERROR, "format negotiation failed (%d/%d)\n",
need_input, need_output);
return AVERROR_EXTERNAL;
}
return 0;
}
static int mf_setup_context(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
int ret;
hr = IMFTransform_GetInputStreamInfo(c->mft, c->in_stream_id, &c->in_info);
if (FAILED(hr))
return AVERROR_EXTERNAL;
av_log(avctx, AV_LOG_VERBOSE, "in_info: size=%d, align=%d\n",
(int)c->in_info.cbSize, (int)c->in_info.cbAlignment);
hr = IMFTransform_GetOutputStreamInfo(c->mft, c->out_stream_id, &c->out_info);
if (FAILED(hr))
return AVERROR_EXTERNAL;
c->out_stream_provides_samples =
(c->out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
(c->out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
av_log(avctx, AV_LOG_VERBOSE, "out_info: size=%d, align=%d%s\n",
(int)c->out_info.cbSize, (int)c->out_info.cbAlignment,
c->out_stream_provides_samples ? " (provides samples)" : "");
if ((ret = mf_output_type_get(avctx)) < 0)
return ret;
return 0;
}
static int mf_unlock_async(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
IMFAttributes *attrs;
UINT32 v;
int res = AVERROR_EXTERNAL;
// For hw encoding we unfortunately need to use async mode, otherwise
// play it safe and avoid it.
if (!(c->is_video && c->opt_enc_hw))
return 0;
hr = IMFTransform_GetAttributes(c->mft, &attrs);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "error retrieving MFT attributes: %s\n", ff_hr_str(hr));
goto err;
}
hr = IMFAttributes_GetUINT32(attrs, &MF_TRANSFORM_ASYNC, &v);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "error querying async: %s\n", ff_hr_str(hr));
goto err;
}
if (!v) {
av_log(avctx, AV_LOG_ERROR, "hardware MFT is not async\n");
goto err;
}
hr = IMFAttributes_SetUINT32(attrs, &MF_TRANSFORM_ASYNC_UNLOCK, TRUE);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "could not set async unlock: %s\n", ff_hr_str(hr));
goto err;
}
hr = IMFTransform_QueryInterface(c->mft, &IID_IMFMediaEventGenerator, (void **)&c->async_events);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "could not get async interface\n");
goto err;
}
res = 0;
err:
IMFAttributes_Release(attrs);
return res;
}
static int mf_create(void *log, IMFTransform **mft, const AVCodec *codec, int use_hw)
{
int is_audio = codec->type == AVMEDIA_TYPE_AUDIO;
const CLSID *subtype = ff_codec_to_mf_subtype(codec->id);
MFT_REGISTER_TYPE_INFO reg = {0};
GUID category;
int ret;
*mft = NULL;
if (!subtype)
return AVERROR(ENOSYS);
reg.guidSubtype = *subtype;
if (is_audio) {
reg.guidMajorType = MFMediaType_Audio;
category = MFT_CATEGORY_AUDIO_ENCODER;
} else {
reg.guidMajorType = MFMediaType_Video;
category = MFT_CATEGORY_VIDEO_ENCODER;
}
if ((ret = ff_instantiate_mf(log, category, NULL, &reg, use_hw, mft)) < 0)
return ret;
return 0;
}
static int mf_init(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
HRESULT hr;
int ret;
const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
int use_hw = 0;
c->frame = av_frame_alloc();
if (!c->frame)
return AVERROR(ENOMEM);
c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
c->is_video = !c->is_audio;
c->reorder_delay = AV_NOPTS_VALUE;
if (c->is_video && c->opt_enc_hw)
use_hw = 1;
if (!subtype)
return AVERROR(ENOSYS);
c->main_subtype = *subtype;
if ((ret = mf_create(avctx, &c->mft, avctx->codec, use_hw)) < 0)
return ret;
if ((ret = mf_unlock_async(avctx)) < 0)
return ret;
hr = IMFTransform_QueryInterface(c->mft, &IID_ICodecAPI, (void **)&c->codec_api);
if (!FAILED(hr))
av_log(avctx, AV_LOG_VERBOSE, "MFT supports ICodecAPI.\n");
hr = IMFTransform_GetStreamIDs(c->mft, 1, &c->in_stream_id, 1, &c->out_stream_id);
if (hr == E_NOTIMPL) {
c->in_stream_id = c->out_stream_id = 0;
} else if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "could not get stream IDs (%s)\n", ff_hr_str(hr));
return AVERROR_EXTERNAL;
}
if ((ret = mf_negotiate_types(avctx)) < 0)
return ret;
if ((ret = mf_setup_context(avctx)) < 0)
return ret;
hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
return AVERROR_EXTERNAL;
}
hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
if (FAILED(hr)) {
av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
return AVERROR_EXTERNAL;
}
if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
c->is_video && !avctx->extradata) {
int sleep = 10000, total = 0;
av_log(avctx, AV_LOG_VERBOSE, "Awaiting extradata\n");
while (total < 70*1000) {
// The Qualcomm H264 encoder on SD835 doesn't provide extradata
// immediately, but it becomes available soon after init (without
// any waitable event). In practice, it's available after less
// than 10 ms, but wait for up to 70 ms before giving up.
// Some encoders (Qualcomm's HEVC encoder on SD835, some versions
// of the QSV H264 encoder at least) don't provide extradata this
// way at all, not even after encoding a frame - it's only
// available prepended to frames.
av_usleep(sleep);
total += sleep;
mf_output_type_get(avctx);
if (avctx->extradata)
break;
sleep *= 2;
}
av_log(avctx, AV_LOG_VERBOSE, "%s extradata in %d ms\n",
avctx->extradata ? "Got" : "Didn't get", total / 1000);
}
return 0;
}
static int mf_close(AVCodecContext *avctx)
{
MFContext *c = avctx->priv_data;
if (c->codec_api)
ICodecAPI_Release(c->codec_api);
if (c->async_events)
IMFMediaEventGenerator_Release(c->async_events);
ff_free_mf(&c->mft);
av_frame_free(&c->frame);
av_freep(&avctx->extradata);
avctx->extradata_size = 0;
return 0;
}
#define OFFSET(x) offsetof(MFContext, x)
#define MF_ENCODER(MEDIATYPE, NAME, ID, OPTS, EXTRA) \
static const AVClass ff_ ## NAME ## _mf_encoder_class = { \
.class_name = #NAME "_mf", \
.item_name = av_default_item_name, \
.option = OPTS, \
.version = LIBAVUTIL_VERSION_INT, \
}; \
AVCodec ff_ ## NAME ## _mf_encoder = { \
.priv_class = &ff_ ## NAME ## _mf_encoder_class, \
.name = #NAME "_mf", \
.long_name = NULL_IF_CONFIG_SMALL(#ID " via MediaFoundation"), \
.type = AVMEDIA_TYPE_ ## MEDIATYPE, \
.id = AV_CODEC_ID_ ## ID, \
.priv_data_size = sizeof(MFContext), \
.init = mf_init, \
.close = mf_close, \
.receive_packet = mf_receive_packet, \
EXTRA \
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID, \
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | \
FF_CODEC_CAP_INIT_CLEANUP, \
};
#define AFMTS \
.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, \
AV_SAMPLE_FMT_NONE },
MF_ENCODER(AUDIO, aac, AAC, NULL, AFMTS);
MF_ENCODER(AUDIO, ac3, AC3, NULL, AFMTS);
MF_ENCODER(AUDIO, mp3, MP3, NULL, AFMTS);
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
static const AVOption venc_opts[] = {
{"rate_control", "Select rate control mode", OFFSET(opt_enc_rc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "rate_control"},
{ "default", "Default mode", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "rate_control"},
{ "cbr", "CBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_CBR}, 0, 0, VE, "rate_control"},
{ "pc_vbr", "Peak constrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_PeakConstrainedVBR}, 0, 0, VE, "rate_control"},
{ "u_vbr", "Unconstrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_UnconstrainedVBR}, 0, 0, VE, "rate_control"},
{ "quality", "Quality mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_Quality}, 0, 0, VE, "rate_control" },
// The following rate_control modes require Windows 8.
{ "ld_vbr", "Low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_LowDelayVBR}, 0, 0, VE, "rate_control"},
{ "g_vbr", "Global VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalVBR}, 0, 0, VE, "rate_control" },
{ "gld_vbr", "Global low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalLowDelayVBR}, 0, 0, VE, "rate_control"},
{"scenario", "Select usage scenario", OFFSET(opt_enc_scenario), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "scenario"},
{ "default", "Default scenario", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "scenario"},
{ "display_remoting", "Display remoting", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemoting}, 0, 0, VE, "scenario"},
{ "video_conference", "Video conference", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_VideoConference}, 0, 0, VE, "scenario"},
{ "archive", "Archive", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_Archive}, 0, 0, VE, "scenario"},
{ "live_streaming", "Live streaming", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_LiveStreaming}, 0, 0, VE, "scenario"},
{ "camera_record", "Camera record", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_CameraRecord}, 0, 0, VE, "scenario"},
{ "display_remoting_with_feature_map", "Display remoting with feature map", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemotingWithFeatureMap}, 0, 0, VE, "scenario"},
{"quality", "Quality", OFFSET(opt_enc_quality), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
{"hw_encoding", "Force hardware encoding", OFFSET(opt_enc_hw), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VE},
{NULL}
};
#define VFMTS \
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12, \
AV_PIX_FMT_YUV420P, \
AV_PIX_FMT_NONE },
MF_ENCODER(VIDEO, h264, H264, venc_opts, VFMTS);
MF_ENCODER(VIDEO, hevc, HEVC, venc_opts, VFMTS);