webmdashenc: Support for live stream manifests

This patch adds support for creating DASH manifests for WebM Live
Streams. It also updates the documentation and adds a fate test to
verify the behavior of the new muxer flag.

Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Vignesh Venkatasubramanian 2015-03-31 19:40:01 -07:00 committed by Michael Niedermayer
parent bc0e65e7d0
commit 26f2e2f3f7
4 changed files with 224 additions and 28 deletions

View File

@ -1210,7 +1210,17 @@ is the @option{global_header} flag.
WebM DASH Manifest muxer.
This muxer implements the WebM DASH Manifest specification to generate the DASH manifest XML.
This muxer implements the WebM DASH Manifest specification to generate the DASH
manifest XML. It also supports manifest generation for DASH live streams.
For more information see:
@itemize @bullet
@item
WebM DASH Specification: @url{https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification}
@item
ISO DASH Specification: @url{http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip}
@end itemize
@subsection Options
@ -1221,6 +1231,28 @@ This muxer supports the following options:
This option has the following syntax: "id=x,streams=a,b,c id=y,streams=d,e" where x and y are the
unique identifiers of the adaptation sets and a,b,c,d and e are the indices of the corresponding
audio and video streams. Any number of adaptation sets can be added using this option.
@item live
Set this to 1 to create a live stream DASH Manifest. Default: 0.
@item chunk_start_index
Start index of the first chunk. This will go in the @samp{startNumber} attribute
of the @samp{SegmentTemplate} element in the manifest. Default: 0.
@item chunk_duration_ms
Duration of each chunk in milliseconds. This will go in the @samp{duration}
attribute of the @samp{SegmentTemplate} element in the manifest. Default: 1000.
@item utc_timing_url
URL of the page that will return the UTC timestamp in ISO format. This will go
in the @samp{value} attribute of the @samp{UTCTiming} element in the manifest.
Default: None.
@item time_shift_buffer_depth
Smallest time (in seconds) shifting buffer for which any Representation is
guaranteed to be available. This will go in the @samp{timeShiftBufferDepth}
attribute of the @samp{MPD} element. Default: 60.
@end table
@subsection Example

View File

@ -22,8 +22,11 @@
/*
* WebM DASH Specification:
* https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
* ISO DASH Specification:
* http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
*/
#include <float.h>
#include <stdint.h>
#include <string.h>
@ -34,6 +37,7 @@
#include "libavutil/avstring.h"
#include "libavutil/dict.h"
#include "libavutil/opt.h"
#include "libavutil/time_internal.h"
typedef struct AdaptationSet {
char id[10];
@ -47,6 +51,12 @@ typedef struct WebMDashMuxContext {
AdaptationSet *as;
int nb_as;
int representation_id;
int is_live;
int chunk_start_index;
int chunk_duration;
char *utc_timing_url;
double time_shift_buffer_depth;
int debug_mode;
} WebMDashMuxContext;
static const char *get_codec_name(int codec_id)
@ -79,19 +89,42 @@ static double get_duration(AVFormatContext *s)
static void write_header(AVFormatContext *s)
{
WebMDashMuxContext *w = s->priv_data;
double min_buffer_time = 1.0;
time_t local_time;
struct tm *gmt, gmt_buffer;
char *gmt_iso = av_malloc(21);
avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
avio_printf(s->pb, "<MPD\n");
avio_printf(s->pb, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
avio_printf(s->pb, " xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
avio_printf(s->pb, " xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
avio_printf(s->pb, " type=\"static\"\n");
avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n",
get_duration(s));
avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n",
min_buffer_time);
avio_printf(s->pb, " profiles=\"urn:webm:dash:profile:webm-on-demand:2012\"");
avio_printf(s->pb, ">\n");
avio_printf(s->pb, " type=\"%s\"\n", w->is_live ? "dynamic" : "static");
if (!w->is_live) {
avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n",
get_duration(s));
}
avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n", min_buffer_time);
avio_printf(s->pb, " profiles=\"%s\"%s",
w->is_live ? "urn:mpeg:dash:profile:isoff-live:2011" : "urn:webm:dash:profile:webm-on-demand:2012",
w->is_live ? "\n" : ">\n");
time(&local_time);
gmt = gmtime_r(&local_time, &gmt_buffer);
strftime(gmt_iso, 21, "%FT%TZ", gmt);
if (w->debug_mode) {
av_strlcpy(gmt_iso, "", 1);
}
if (w->is_live) {
avio_printf(s->pb, " availabilityStartTime=\"%s\"\n", gmt_iso);
avio_printf(s->pb, " timeShiftBufferDepth=\"PT%gS\"", w->time_shift_buffer_depth);
avio_printf(s->pb, ">\n");
avio_printf(s->pb, "<UTCTiming\n");
avio_printf(s->pb, " schemeIdUri=\"%s\"\n",
w->utc_timing_url ? "urn:mpeg:dash:utc:http-iso:2014" : "urn:mpeg:dash:utc:direct:2012");
avio_printf(s->pb, " value=\"%s\"/>\n",
w->utc_timing_url ? w->utc_timing_url : gmt_iso);
}
av_free(gmt_iso);
}
static void write_footer(AVFormatContext *s)
@ -137,33 +170,47 @@ static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) {
* Writes a Representation within an Adaptation Set. Returns 0 on success and
* < 0 on failure.
*/
static int write_representation(AVFormatContext *s, AVStream *stream, int id,
static int write_representation(AVFormatContext *s, AVStream *stream, char *id,
int output_width, int output_height,
int output_sample_rate) {
WebMDashMuxContext *w = s->priv_data;
AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0);
AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0);
AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0);
AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0);
AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0);
if (!irange || cues_start == NULL || cues_end == NULL || filename == NULL ||
!bandwidth) {
if ((w->is_live && (!filename)) ||
(!w->is_live && (!irange || !cues_start || !cues_end || !filename || !bandwidth))) {
return -1;
}
avio_printf(s->pb, "<Representation id=\"%d\"", id);
avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value);
avio_printf(s->pb, "<Representation id=\"%s\"", id);
// FIXME: For live, This should be obtained from the input file or as an AVOption.
avio_printf(s->pb, " bandwidth=\"%s\"",
w->is_live ? (stream->codec->codec_type == AVMEDIA_TYPE_AUDIO ? "128000" : "1000000") : bandwidth->value);
if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_width)
avio_printf(s->pb, " width=\"%d\"", stream->codec->width);
if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height)
avio_printf(s->pb, " height=\"%d\"", stream->codec->height);
if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate)
avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate);
avio_printf(s->pb, ">\n");
avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
avio_printf(s->pb, "<SegmentBase\n");
avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
avio_printf(s->pb, "<Initialization\n");
avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value);
avio_printf(s->pb, "</SegmentBase>\n");
if (w->is_live) {
// For live streams, Codec and Mime Type always go in the Representation tag.
avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(stream->codec->codec_id));
avio_printf(s->pb, " mimeType=\"%s/webm\"",
stream->codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
// For live streams, subsegments always start with key frames. So this
// is always 1.
avio_printf(s->pb, " startsWithSAP=\"1\"");
avio_printf(s->pb, ">");
} else {
avio_printf(s->pb, ">\n");
avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
avio_printf(s->pb, "<SegmentBase\n");
avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
avio_printf(s->pb, "<Initialization\n");
avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value);
avio_printf(s->pb, "</SegmentBase>\n");
}
avio_printf(s->pb, "</Representation>\n");
return 0;
}
@ -207,6 +254,51 @@ static int check_matching_sample_rate(AVFormatContext *s, AdaptationSet *as) {
return 1;
}
/*
* Parses a live header filename and computes the representation id,
* initialization pattern and the media pattern. Pass NULL if you don't want to
* compute any of those 3. Returns 0 on success and non-zero on failure.
*
* Name of the header file should conform to the following pattern:
* <file_description>_<representation_id>.hdr where <file_description> can be
* anything. The chunks should be named according to the following pattern:
* <file_description>_<representation_id>_<chunk_number>.chk
*/
static int parse_filename(char *filename, char **representation_id,
char **initialization_pattern, char **media_pattern) {
char *underscore_pos = NULL;
char *period_pos = NULL;
char *temp_pos = NULL;
char *filename_str = av_strdup(filename);
if (!filename_str) return AVERROR(ENOMEM);
temp_pos = av_stristr(filename_str, "_");
while (temp_pos) {
underscore_pos = temp_pos + 1;
temp_pos = av_stristr(temp_pos + 1, "_");
}
if (!underscore_pos) return -1;
period_pos = av_stristr(underscore_pos, ".");
if (!period_pos) return -1;
*(underscore_pos - 1) = 0;
if (representation_id) {
*representation_id = av_malloc(period_pos - underscore_pos + 1);
if (!(*representation_id)) return AVERROR(ENOMEM);
av_strlcpy(*representation_id, underscore_pos, period_pos - underscore_pos + 1);
}
if (initialization_pattern) {
*initialization_pattern = av_asprintf("%s_$RepresentationID$.hdr",
filename_str);
if (!(*initialization_pattern)) return AVERROR(ENOMEM);
}
if (media_pattern) {
*media_pattern = av_asprintf("%s_$RepresentationID$_$Number$.chk",
filename_str);
if (!(*media_pattern)) return AVERROR(ENOMEM);
}
av_free(filename_str);
return 0;
}
/*
* Writes an Adaptation Set. Returns 0 on success and < 0 on failure.
*/
@ -222,13 +314,14 @@ static int write_adaptation_set(AVFormatContext *s, int as_index)
// Width, Height and Sample Rate will go in the AdaptationSet tag if they
// are the same for all contained Representations. otherwise, they will go
// on their respective Representation tag.
// on their respective Representation tag. For live streams, they always go
// in the Representation tag.
int width_in_as = 1, height_in_as = 1, sample_rate_in_as = 1;
if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
width_in_as = check_matching_width(s, as);
height_in_as = check_matching_height(s, as);
width_in_as = !w->is_live && check_matching_width(s, as);
height_in_as = !w->is_live && check_matching_height(s, as);
} else {
sample_rate_in_as = check_matching_sample_rate(s, as);
sample_rate_in_as = !w->is_live && check_matching_sample_rate(s, as);
}
avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id);
@ -249,19 +342,53 @@ static int write_adaptation_set(AVFormatContext *s, int as_index)
avio_printf(s->pb, " bitstreamSwitching=\"%s\"",
boolean[bitstream_switching(s, as)]);
avio_printf(s->pb, " subsegmentAlignment=\"%s\"",
boolean[subsegment_alignment(s, as)]);
boolean[w->is_live || subsegment_alignment(s, as)]);
for (i = 0; i < as->nb_streams; i++) {
AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata,
CLUSTER_KEYFRAME, NULL, 0);
if (!kf || !strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0;
if (!w->is_live && (!kf || !strncmp(kf->value, "0", 1))) subsegmentStartsWithSAP = 0;
}
avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP);
avio_printf(s->pb, ">\n");
if (w->is_live) {
AVDictionaryEntry *filename =
av_dict_get(s->streams[as->streams[0]]->metadata, FILENAME, NULL, 0);
char *initialization_pattern = NULL;
char *media_pattern = NULL;
int ret = parse_filename(filename->value, NULL, &initialization_pattern,
&media_pattern);
if (ret) return ret;
avio_printf(s->pb, "<ContentComponent id=\"1\" type=\"%s\"/>\n",
codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
avio_printf(s->pb, "<SegmentTemplate");
avio_printf(s->pb, " timescale=\"1000\"");
avio_printf(s->pb, " duration=\"%d\"", w->chunk_duration);
avio_printf(s->pb, " media=\"%s\"", media_pattern);
avio_printf(s->pb, " startNumber=\"%d\"", w->chunk_start_index);
avio_printf(s->pb, " initialization=\"%s\"", initialization_pattern);
avio_printf(s->pb, "/>\n");
av_free(initialization_pattern);
av_free(media_pattern);
}
for (i = 0; i < as->nb_streams; i++) {
write_representation(s, s->streams[as->streams[i]], w->representation_id++,
char *representation_id = NULL;
if (w->is_live) {
AVDictionaryEntry *filename =
av_dict_get(s->streams[as->streams[i]]->metadata, FILENAME, NULL, 0);
if (!filename ||
parse_filename(filename->value, &representation_id, NULL, NULL)) {
return -1;
}
} else {
representation_id = av_asprintf("%d", w->representation_id++);
if (!representation_id) return -1;
}
write_representation(s, s->streams[as->streams[i]], representation_id,
!width_in_as, !height_in_as, !sample_rate_in_as);
av_free(representation_id);
}
avio_printf(s->pb, "</AdaptationSet>\n");
return 0;
@ -333,7 +460,9 @@ static int webm_dash_manifest_write_header(AVFormatContext *s)
write_header(s);
avio_printf(s->pb, "<Period id=\"0\"");
avio_printf(s->pb, " start=\"PT%gS\"", start);
avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
if (!w->is_live) {
avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
}
avio_printf(s->pb, " >\n");
for (i = 0; i < w->nb_as; i++) {
@ -364,6 +493,12 @@ static int webm_dash_manifest_write_trailer(AVFormatContext *s)
#define OFFSET(x) offsetof(WebMDashMuxContext, x)
static const AVOption options[] = {
{ "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
{ "debug_mode", "[private option - users should never set this]. set this to 1 to create deterministic output", OFFSET(debug_mode), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
{ "live", "set this to 1 to create a live stream manifest", OFFSET(is_live), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
{ "chunk_start_index", "start index of the chunk", OFFSET(chunk_start_index), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
{ "chunk_duration_ms", "duration of each chunk (in milliseconds)", OFFSET(chunk_duration), AV_OPT_TYPE_INT, {.i64 = 1000}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
{ "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
{ "time_shift_buffer_depth", "Smallest time (in seconds) shifting buffer for which any Representation is guaranteed to be available.", OFFSET(time_shift_buffer_depth), AV_OPT_TYPE_DOUBLE, { .dbl = 60.0 }, 1.0, DBL_MAX, AV_OPT_FLAG_ENCODING_PARAM },
{ NULL },
};

View File

@ -43,6 +43,9 @@ fate-webm-dash-manifest-unaligned-audio-streams: CMD = run ffmpeg -f webm_dash_m
FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-representations
fate-webm-dash-manifest-representations: CMD = run ffmpeg -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video4.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-live
fate-webm-dash-manifest-live: CMD = run ffmpeg -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_video_360.hdr -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_audio_171.hdr -c copy -map 0 -map 1 -f webm_dash_manifest -live 1 -adaptation_sets "id=0,streams=0 id=1,streams=1" -chunk_start_index 1 -chunk_duration_ms 5000 -time_shift_buffer_depth 7200 -debug_mode 1 -
FATE_SAMPLES_AVCONV += $(FATE_VP6-yes)
fate-vp6: $(FATE_VP6-yes)

View File

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<MPD
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="urn:mpeg:DASH:schema:MPD:2011"
xsi:schemaLocation="urn:mpeg:DASH:schema:MPD:2011"
type="dynamic"
minBufferTime="PT1S"
profiles="urn:mpeg:dash:profile:isoff-live:2011"
availabilityStartTime=""
timeShiftBufferDepth="PT7200S">
<UTCTiming
schemeIdUri="urn:mpeg:dash:utc:direct:2012"
value=""/>
<Period id="0" start="PT0S" >
<AdaptationSet id="0" mimeType="video/webm" codecs="vp9" bitstreamSwitching="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
<ContentComponent id="1" type="video"/>
<SegmentTemplate timescale="1000" duration="5000" media="dash_live_video_$RepresentationID$_$Number$.chk" startNumber="1" initialization="dash_live_video_$RepresentationID$.hdr"/>
<Representation id="360" bandwidth="1000000" width="640" height="360" codecs="vp9" mimeType="video/webm" startsWithSAP="1"></Representation>
</AdaptationSet>
<AdaptationSet id="1" mimeType="audio/webm" codecs="vorbis" bitstreamSwitching="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
<ContentComponent id="1" type="audio"/>
<SegmentTemplate timescale="1000" duration="5000" media="dash_live_audio_$RepresentationID$_$Number$.chk" startNumber="1" initialization="dash_live_audio_$RepresentationID$.hdr"/>
<Representation id="171" bandwidth="128000" audioSamplingRate="32000" codecs="vorbis" mimeType="audio/webm" startsWithSAP="1"></Representation>
</AdaptationSet>
</Period>
</MPD>