minor VMD system update; still not perfect, but should not crash either

Originally committed as revision 2887 to svn://svn.ffmpeg.org/ffmpeg/trunk
2004-03-14 04:08:11 +00:00 · 2004-03-14 04:08:11 +00:00 · 23fe14bb20
commit 23fe14bb20
parent 3a278992bd
2 changed files with 144 additions and 45 deletions
--- a/libavcodec/vmdav.c
+++ b/libavcodec/vmdav.c
@ -22,6 +22,8 @@
 * @file vmdvideo.c
 * Sierra VMD audio & video decoders
 * by Vladimir "VAG" Gneushev (vagsoft at mail.ru)
+ * for more information on the Sierra VMD format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
 *
 * The video decoder outputs PAL8 colorspace data. The decoder expects
 * a 0x330-byte VMD file header to be transmitted via extradata during
@ -30,7 +32,7 @@
 * information record from the VMD file.
 *
 * The audio decoder, like the video decoder, expects each encoded data
- * chunk to be prepended with the approriate 16-byte frame information
+ * chunk to be prepended with the appropriate 16-byte frame information
 * record from the VMD file. It does not require the 0x330-byte VMD file
 * header, but it does need the audio setup parameters passed in through
 * normal libavcodec API means.
@ -51,12 +53,6 @@
 #define VMD_HEADER_SIZE 0x330
 #define PALETTE_COUNT 256

-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-
 /*
 * Video Decoder
 */
@ -275,7 +271,7 @@ static void vmd_decode(VmdVideoContext *s)
                    if (len & 0x80) {
                        len = (len & 0x7F) + 1;
                        if (*pb++ == 0xFF)
-                            len = rle_unpack(pb, dp, len);
+                            len = rle_unpack(pb, &dp[ofs], len);
                        else
                            memcpy(&dp[ofs], pb, len);
                        pb += len;
@ -349,6 +345,9 @@ static int vmdvideo_decode_frame(AVCodecContext *avctx,
    s->buf = buf;
    s->size = buf_size;

+    if (buf_size < 16)
+        return buf_size;
+
    s->frame.reference = 1;
    if (avctx->get_buffer(avctx, &s->frame)) {
        printf ("  VMD Video: get_buffer() failed\n");
@ -408,8 +407,8 @@ static int vmdaudio_decode_init(AVCodecContext *avctx)
    s->bits = avctx->bits_per_sample;
    s->block_align = avctx->block_align;

-printf ("  %d channels, %d bits/sample, block align = %d\n",
-  s->channels, s->bits, s->block_align);
+printf ("  %d channels, %d bits/sample, block align = %d, sample rate = %d\n",
+  s->channels, s->bits, s->block_align, avctx->sample_rate);

    /* set up the steps8 and steps16 tables */
    for (i = 0; i < 8; i++) {
@ -460,10 +459,17 @@ static void vmdaudio_decode_audio(VmdAudioContext *s, unsigned char *data,

 }

-static void vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
+static int vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
    uint8_t *buf, int silence)
 {
+    int bytes_decoded = 0;
+    int i;
+
+if (silence)
+  printf (" silent block!\n");
    if (s->channels == 2) {
+
+        /* stereo handling */
        if ((s->block_align & 0x01) == 0) {
            if (silence)
                memset(data, 0, s->block_align * 2);
@ -472,11 +478,34 @@ static void vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
        } else {
            if (silence)
                memset(data, 0, s->block_align * 2);
-//            else
-//                vmdaudio_decode_audio(s, data, buf, 1);
+            else
+                vmdaudio_decode_audio(s, data, buf, 1);
        }
    } else {
+
+        /* mono handling */
+        if (silence) {
+            if (s->bits == 16) {
+                memset(data, 0, s->block_align * 2);
+                bytes_decoded = s->block_align * 2;
+            } else {
+//                memset(data, 0x00, s->block_align);
+//                bytes_decoded = s->block_align;
+memset(data, 0x00, s->block_align * 2);
+bytes_decoded = s->block_align * 2;
+            }
+        } else {
+            if (s->bits == 16) {
+            } else {
+                /* copy the data but convert it to signed */
+                for (i = 0; i < s->block_align; i++)
+                    data[i * 2 + 1] = buf[i] + 0x80;
+                bytes_decoded = s->block_align * 2;
+            }
+        }
    }
+
+    return bytes_decoded;
 }

 static int vmdaudio_decode_frame(AVCodecContext *avctx,
@ -491,10 +520,16 @@ static int vmdaudio_decode_frame(AVCodecContext *avctx,
    unsigned char *p = buf + 16;
    unsigned char *p_end = buf + buf_size;

+printf ("    processing audio frame with %d bytes\n", buf_size);
+    if (buf_size < 16)
+        return buf_size;
+
+    *data_size = 0;
    if (buf[6] == 1) {
        /* the chunk contains audio */
-        vmdaudio_loadsound(s, output_samples, p, 0);
+        *data_size = vmdaudio_loadsound(s, output_samples, p, 0);
    } else if (buf[6] == 2) {
+printf ("  hey! audio case #2\n");
        /* the chunk contains audio and silence mixed together */
        sound_flags = LE_32(p);
        p += 4;
@ -503,22 +538,24 @@ static int vmdaudio_decode_frame(AVCodecContext *avctx,

        while (p < p_end) {
            if (sound_flags & 0x01)
-                /* audio */
-                vmdaudio_loadsound(s, output_samples, p, 1);
-            else
                /* silence */
-                vmdaudio_loadsound(s, output_samples, p, 0);
-            p += s->block_align;
+                *data_size += vmdaudio_loadsound(s, output_samples, p, 1);
+            else {
+                /* audio */
+                *data_size += vmdaudio_loadsound(s, output_samples, p, 0);
+                p += s->block_align;
+            }
            output_samples += (s->block_align * s->bits / 8);
            sound_flags >>= 1;
        }
    } else if (buf[6] == 3) {
+printf ("  hey! audio case #3\n");
        /* silent chunk */
-        vmdaudio_loadsound(s, output_samples, p, 1);
+        *data_size = vmdaudio_loadsound(s, output_samples, p, 1);
    }

-
-//    *datasize = ;
+printf ("      final sample count = %d, byte count = %d\n", (*data_size) / 2,
+  *data_size);
    return buf_size;
 }

--- a/libavformat/sierravmd.c
+++ b/libavformat/sierravmd.c
@ -21,16 +21,12 @@
 * @file sierravmd.c
 * Sierra VMD file demuxer
 * by Vladimir "VAG" Gneushev (vagsoft at mail.ru)
+ * for more information on the Sierra VMD file format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
 */

 #include "avformat.h"

-#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
-#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
-                   (((uint8_t*)(x))[2] << 16) | \
-                   (((uint8_t*)(x))[1] << 8) | \
-                    ((uint8_t*)(x))[0])
-
 #define VMD_HEADER_SIZE 0x0330
 #define BYTES_PER_FRAME_RECORD 16

@ -56,6 +52,11 @@ typedef struct VmdDemuxContext {
    vmd_frame_t *frame_table;
    unsigned int current_frame;

+    int sample_rate;
+    int64_t audio_sample_counter;
+    int audio_frame_divisor;
+    int audio_block_align;
+
    unsigned char vmd_header[VMD_HEADER_SIZE];
 } VmdDemuxContext;

@ -73,6 +74,32 @@ static int vmd_probe(AVProbeData *p)
    return AVPROBE_SCORE_MAX / 2;
 }

+/* This is a support function to determine the duration, in sample
+ * frames, of a particular audio chunk, taking into account silent
+ * encodings. */
+static int vmd_calculate_audio_duration(unsigned char *audio_chunk,
+    int audio_chunk_size, int block_align)
+{
+    unsigned char *p = audio_chunk + 16;
+    unsigned char *p_end = audio_chunk + audio_chunk_size;
+    int total_samples = 0;
+    unsigned int sound_flags;
+
+    if (audio_chunk_size < 16)
+        return 0;
+
+    sound_flags = LE_32(p);
+    p += 4;
+    while (p < p_end) {
+        total_samples += block_align;
+        if ((sound_flags & 0x01) == 0)
+            p += block_align;
+        sound_flags >>= 1;
+    }
+
+    return total_samples;
+}
+
 static int vmd_read_header(AVFormatContext *s,
                           AVFormatParameters *ap)
 {
@ -85,15 +112,19 @@ static int vmd_read_header(AVFormatContext *s,
    unsigned char *current_frame_record;
    offset_t current_offset;
    int i;
-    int sample_rate;
    unsigned int total_frames;
-int video_frame_count = 0;
+    int64_t video_pts_inc;
+    int64_t current_video_pts = 0;

    /* fetch the main header, including the 2 header length bytes */
    url_fseek(pb, 0, SEEK_SET);
    if (get_buffer(pb, vmd->vmd_header, VMD_HEADER_SIZE) != VMD_HEADER_SIZE)
        return -EIO;

+    vmd->audio_sample_counter = 0;
+    vmd->audio_frame_divisor = 1;
+    vmd->audio_block_align = 1;
+
    /* start up the decoders */
    st = av_new_stream(s, 0);
    if (!st)
@ -109,8 +140,8 @@ int video_frame_count = 0;
    memcpy(st->codec.extradata, vmd->vmd_header, VMD_HEADER_SIZE);

    /* if sample rate is 0, assume no audio */
-    sample_rate = LE_16(&vmd->vmd_header[804]);
-    if (sample_rate) {
+    vmd->sample_rate = LE_16(&vmd->vmd_header[804]);
+    if (vmd->sample_rate) {
        st = av_new_stream(s, 0);
        if (!st)
            return AVERROR_NOMEM;
@ -119,15 +150,28 @@ int video_frame_count = 0;
        st->codec.codec_id = CODEC_ID_VMDAUDIO;
        st->codec.codec_tag = 0;  /* no codec tag */
        st->codec.channels = (vmd->vmd_header[811] & 0x80) ? 2 : 1;
-        st->codec.sample_rate = sample_rate;
-        st->codec.bit_rate = st->codec.sample_rate * 
-            st->codec.bits_per_sample * st->codec.channels;
-        st->codec.block_align = LE_16(&vmd->vmd_header[806]);
+        st->codec.sample_rate = vmd->sample_rate;
+        st->codec.block_align = vmd->audio_block_align = 
+            LE_16(&vmd->vmd_header[806]);
        if (st->codec.block_align & 0x8000) {
            st->codec.bits_per_sample = 16;
            st->codec.block_align = -(st->codec.block_align - 0x10000);
        } else
-            st->codec.bits_per_sample = 8;
+            st->codec.bits_per_sample = 16;
+//            st->codec.bits_per_sample = 8;
+        st->codec.bit_rate = st->codec.sample_rate * 
+            st->codec.bits_per_sample * st->codec.channels;
+
+        /* for calculating pts */
+        vmd->audio_frame_divisor = st->codec.bits_per_sample / 8 / 
+            st->codec.channels;
+
+        video_pts_inc = 90000;
+        video_pts_inc *= st->codec.block_align;
+        video_pts_inc /= st->codec.sample_rate;
+    } else {
+        /* if no audio, assume 10 frames/second */
+        video_pts_inc = 90000 / 10;
    }

    /* skip over the offset table and load the table of contents; don't 
@ -184,12 +228,14 @@ int video_frame_count = 0;
        memcpy(vmd->frame_table[i].frame_record, current_frame_record,
            BYTES_PER_FRAME_RECORD);

-if (current_frame_record[0] == 0x02) {
-  /* assume 15 fps for now */
-  vmd->frame_table[i].pts = video_frame_count++;
-  vmd->frame_table[i].pts *= 90000;
-  vmd->frame_table[i].pts /= 15;
-}
+        /* figure out the pts for this frame */
+        if (current_frame_record[0] == 0x02) {
+            vmd->frame_table[i].pts = current_video_pts;
+            current_video_pts += video_pts_inc;
+        } else if (current_frame_record[0] == 0x01) {
+            /* figure out the pts during the dispatch phase */
+            vmd->frame_table[i].pts = 0;
+        }

        current_frame_record += BYTES_PER_FRAME_RECORD;
        i++;
@ -227,10 +273,26 @@ static int vmd_read_packet(AVFormatContext *s,
    ret = get_buffer(pb, pkt->data + BYTES_PER_FRAME_RECORD, 
        frame->frame_size);

-    if (ret != frame->frame_size)
+    if (ret != frame->frame_size) {
+        av_free_packet(pkt);
        ret = -EIO;
+    }
    pkt->stream_index = frame->stream_index;
-    pkt->pts = frame->pts;
+    if (frame->frame_record[0] == 0x02)
+        pkt->pts = frame->pts;
+    else {
+        pkt->pts = vmd->audio_sample_counter;
+        pkt->pts *= 90000;
+        pkt->pts /= vmd->sample_rate;
+//        pkt->pts /= vmd->audio_frame_divisor;
+        vmd->audio_sample_counter += vmd_calculate_audio_duration(
+            pkt->data, pkt->size, vmd->audio_block_align);
+
+    }
+printf (" dispatching %s frame with %d bytes and pts %lld (%0.1f sec)\n",
+  (frame->frame_record[0] == 0x02) ? "video" : "audio",
+  frame->frame_size + BYTES_PER_FRAME_RECORD,
+  pkt->pts, (float)(pkt->pts / 90000.0));

    vmd->current_frame++;