From 61df6e9d75e99f85192abe6bfd9c0da93dbbf971 Mon Sep 17 00:00:00 2001
From: Isaac Connor <iconnor@pseudo.connortechnology.com>
Date: Fri, 16 Sep 2016 15:54:13 -0400
Subject: [PATCH] Work on aac encoding

---
 src/zm_ffmpeg.cpp        |  52 ++++++----
 src/zm_ffmpeg.h          |   2 +
 src/zm_ffmpeg_camera.cpp |  70 +++++++------
 src/zm_packetqueue.h     |   6 +-
 src/zm_videostore.cpp    | 213 ++++++++++++++++++++++++++++++++++-----
 src/zm_videostore.h      |   5 +
 6 files changed, 268 insertions(+), 80 deletions(-)

diff --git a/src/zm_ffmpeg.cpp b/src/zm_ffmpeg.cpp
index 85f5ab059..adc0047bf 100644
--- a/src/zm_ffmpeg.cpp
+++ b/src/zm_ffmpeg.cpp
@@ -412,13 +412,13 @@ int hacked_up_context2_for_older_ffmpeg(AVFormatContext **avctx, AVOutputFormat
 static void zm_log_fps(double d, const char *postfix) {
   uint64_t v = lrintf(d * 100);
   if (!v) {
-    Debug(3, "%1.4f %s", d, postfix);
+    Debug(1, "%1.4f %s", d, postfix);
   } else if (v % 100) {
-    Debug(3, "%3.2f %s", d, postfix);
+    Debug(1, "%3.2f %s", d, postfix);
   } else if (v % (100 * 1000)) {
-    Debug(3, "%1.0f %s", d, postfix);
+    Debug(1, "%1.0f %s", d, postfix);
   } else
-    Debug(3, "%1.0fk %s", d / 1000, postfix);
+    Debug(1, "%1.0fk %s", d / 1000, postfix);
 }
 
 /* "user interface" functions */
@@ -429,17 +429,17 @@ void zm_dump_stream_format(AVFormatContext *ic, int i, int index, int is_output)
   AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL, 0);
 
   avcodec_string(buf, sizeof(buf), st->codec, is_output);
-  Debug(3, "    Stream #%d:%d", index, i);
+  Debug(1, "    Stream #%d:%d", index, i);
 
   /* the pid is an important information, so we display it */
   /* XXX: add a generic system */
   if (flags & AVFMT_SHOW_IDS)
-    Debug(3, "[0x%x]", st->id);
+    Debug(1, "[0x%x]", st->id);
   if (lang)
-    Debug(3, "(%s)", lang->value);
+    Debug(1, "(%s)", lang->value);
   av_log(NULL, AV_LOG_DEBUG, ", %d, %d/%d", st->codec_info_nb_frames,
       st->time_base.num, st->time_base.den);
-  Debug(3, ": %s", buf);
+  Debug(1, ": %s", buf);
 
   if (st->sample_aspect_ratio.num && // default
       av_cmp_q(st->sample_aspect_ratio, st->codec->sample_aspect_ratio)) {
@@ -448,7 +448,7 @@ void zm_dump_stream_format(AVFormatContext *ic, int i, int index, int is_output)
         st->codec->width  * (int64_t)st->sample_aspect_ratio.num,
         st->codec->height * (int64_t)st->sample_aspect_ratio.den,
         1024 * 1024);
-    Debug(3, ", SAR %d:%d DAR %d:%d",
+    Debug(1, ", SAR %d:%d DAR %d:%d",
         st->sample_aspect_ratio.num, st->sample_aspect_ratio.den,
         display_aspect_ratio.num, display_aspect_ratio.den);
   }
@@ -470,28 +470,40 @@ void zm_dump_stream_format(AVFormatContext *ic, int i, int index, int is_output)
   }
 
   if (st->disposition & AV_DISPOSITION_DEFAULT)
-    Debug(3, " (default)");
+    Debug(1, " (default)");
   if (st->disposition & AV_DISPOSITION_DUB)
-    Debug(3, " (dub)");
+    Debug(1, " (dub)");
   if (st->disposition & AV_DISPOSITION_ORIGINAL)
-    Debug(3, " (original)");
+    Debug(1, " (original)");
   if (st->disposition & AV_DISPOSITION_COMMENT)
-    Debug(3, " (comment)");
+    Debug(1, " (comment)");
   if (st->disposition & AV_DISPOSITION_LYRICS)
-    Debug(3, " (lyrics)");
+    Debug(1, " (lyrics)");
   if (st->disposition & AV_DISPOSITION_KARAOKE)
-    Debug(3, " (karaoke)");
+    Debug(1, " (karaoke)");
   if (st->disposition & AV_DISPOSITION_FORCED)
-    Debug(3, " (forced)");
+    Debug(1, " (forced)");
   if (st->disposition & AV_DISPOSITION_HEARING_IMPAIRED)
-    Debug(3, " (hearing impaired)");
+    Debug(1, " (hearing impaired)");
   if (st->disposition & AV_DISPOSITION_VISUAL_IMPAIRED)
-    Debug(3, " (visual impaired)");
+    Debug(1, " (visual impaired)");
   if (st->disposition & AV_DISPOSITION_CLEAN_EFFECTS)
-    Debug(3, " (clean effects)");
-  Debug(3, "\n");
+    Debug(1, " (clean effects)");
+  Debug(1, "\n");
 
   //dump_metadata(NULL, st->metadata, "    ");
 
   //dump_sidedata(NULL, st, "    ");
 }
+
+int check_sample_fmt(AVCodec *codec, enum AVSampleFormat sample_fmt) {
+  const enum AVSampleFormat *p = codec->sample_fmts;
+
+  while (*p != AV_SAMPLE_FMT_NONE) {
+    if (*p == sample_fmt)
+      return 1;
+    else Debug(2, "Not %s", av_get_sample_fmt_name( *p ) );
+    p++;
+  }
+  return 0;
+}
diff --git a/src/zm_ffmpeg.h b/src/zm_ffmpeg.h
index 372a84907..ecfd6ba2c 100644
--- a/src/zm_ffmpeg.h
+++ b/src/zm_ffmpeg.h
@@ -334,4 +334,6 @@ void zm_dump_stream_format(AVFormatContext *ic, int i, int index, int is_output)
       #define zm_avcodec_decode_video(context, rawFrame, frameComplete, packet ) avcodec_decode_video( context, rawFrame, frameComplete, packet->data, packet->size)
 #endif
 
+int check_sample_fmt(AVCodec *codec, enum AVSampleFormat sample_fmt);
+
 #endif // ZM_FFMPEG_H
diff --git a/src/zm_ffmpeg_camera.cpp b/src/zm_ffmpeg_camera.cpp
index bc3a666a2..dfaa0ce07 100644
--- a/src/zm_ffmpeg_camera.cpp
+++ b/src/zm_ffmpeg_camera.cpp
@@ -290,7 +290,6 @@ int FfmpegCamera::OpenFfmpeg() {
   Debug ( 1, "Opened input" );
 
   Info( "Stream open %s", mPath.c_str() );
-  startTime=av_gettime();//FIXME here or after find_Stream_info
 
   //FIXME can speed up initial analysis but need sensible parameters...
   //mFormatContext->probesize = 32;
@@ -356,20 +355,6 @@ int FfmpegCamera::OpenFfmpeg() {
   } else {
     Debug(1, "Video Found decoder");
     zm_dump_stream_format(mFormatContext, mVideoStreamId, 0, 0);
-  }
-
-  if (mAudioStreamId >= 0) {
-    mAudioCodecContext = mFormatContext->streams[mAudioStreamId]->codec;
-    if ((mAudioCodec = avcodec_find_decoder(mAudioCodecContext->codec_id)) == NULL) {
-      Debug(1, "Can't find codec for audio stream from %s", mPath.c_str());
-    } else {
-      Debug(1, "Audio Found decoder");
-      zm_dump_stream_format(mFormatContext, mAudioStreamId, 0, 0);
-    }
-  }
-
-  //
-
   // Open the codec
 #if !LIBAVFORMAT_VERSION_CHECK(53, 8, 0, 8, 0)
   Debug ( 1, "Calling avcodec_open" );
@@ -379,6 +364,29 @@ int FfmpegCamera::OpenFfmpeg() {
   if (avcodec_open2(mVideoCodecContext, mVideoCodec, 0) < 0)
 #endif
     Fatal( "Unable to open codec for video stream from %s", mPath.c_str() );
+  }
+
+  if (mAudioStreamId >= 0) {
+    mAudioCodecContext = mFormatContext->streams[mAudioStreamId]->codec;
+    if ((mAudioCodec = avcodec_find_decoder(mAudioCodecContext->codec_id)) == NULL) {
+      Debug(1, "Can't find codec for audio stream from %s", mPath.c_str());
+    } else {
+      Debug(1, "Audio Found decoder");
+      zm_dump_stream_format(mFormatContext, mAudioStreamId, 0, 0);
+  // Open the codec
+#if !LIBAVFORMAT_VERSION_CHECK(53, 8, 0, 8, 0)
+  Debug ( 1, "Calling avcodec_open" );
+  if (avcodec_open(mAudioCodecContext, mAudioCodec) < 0)
+#else
+    Debug ( 1, "Calling avcodec_open2" );
+  if (avcodec_open2(mAudioCodecContext, mAudioCodec, 0) < 0)
+#endif
+    Fatal( "Unable to open codec for video stream from %s", mPath.c_str() );
+    }
+  }
+
+  //
+
 
   Debug ( 1, "Opened codec" );
 
@@ -556,7 +564,7 @@ int FfmpegCamera::CaptureAndRecord( Image &image, bool recording, char* event_fi
   while ( !frameComplete ) {
     // We are now allocating dynamically because we need to queue these and may go out of scope.
     AVPacket *packet = (AVPacket *)av_malloc(sizeof(AVPacket));
-    av_init_packet( packet);
+    av_init_packet( packet );
 Debug(5, "Before av_read_frame");
     ret = av_read_frame( mFormatContext, packet );
 Debug(5, "After av_read_frame (%d)", ret );
@@ -593,6 +601,7 @@ Debug(5, "After av_read_frame (%d)", ret );
 
       if ( ! videoStore ) {
         //Instantiate the video storage module
+  startTime=av_gettime();//FIXME here or after find_Stream_info
 
         if (record_audio) {
           if (mAudioStreamId == -1) {
@@ -627,10 +636,11 @@ Debug(5, "After av_read_frame (%d)", ret );
         while ( ( queued_packet = packetqueue.popPacket() ) ) {
           packet_count += 1;
           //Write the packet to our video store
+      Debug(2, "Writing queued packet stream: %d  KEY %d", queued_packet->stream_index, packet->flags & AV_PKT_FLAG_KEY );
           if ( queued_packet->stream_index == mVideoStreamId ) {
             ret = videoStore->writeVideoFramePacket( queued_packet, mFormatContext->streams[mVideoStreamId]);
           } else if ( queued_packet->stream_index == mAudioStreamId ) {
-            //ret = videoStore->writeAudioFramePacket(&queued_packet, mFormatContext->streams[mAudioStreamId]);
+            ret = videoStore->writeAudioFramePacket( queued_packet, mFormatContext->streams[mAudioStreamId]);
           } else {
             Warning("Unknown stream id in queued packet (%d)", queued_packet->stream_index );
             ret = -1;
@@ -643,12 +653,6 @@ Debug(5, "After av_read_frame (%d)", ret );
         Debug(2, "Wrote %d queued packets", packet_count );
       } // end if ! wasRecording
 
-      //Write the packet to our video store
-      int ret = videoStore->writeVideoFramePacket( packet, mFormatContext->streams[mVideoStreamId] );
-      if ( ret < 0 ) { //Less than zero and we skipped a frame
-        zm_av_unref_packet( packet );
-        return 0;
-      }
     } else {
       // Not recording
       if ( videoStore ) {
@@ -656,15 +660,23 @@ Debug(5, "After av_read_frame (%d)", ret );
         delete videoStore;
         videoStore = NULL;
       }
+
+      //Buffer video packets, since we are not recording
+      if ( (packet->stream_index == mVideoStreamId) && ( packet->flags & AV_PKT_FLAG_KEY ) ) {
+        packetqueue.clearQueue();
+      }
+      packetqueue.queuePacket(packet);
     } // end if
 
-    //Buffer video packets
-    if ( packet->flags & AV_PKT_FLAG_KEY ) {
-      packetqueue.clearQueue();
-    }
-    packetqueue.queuePacket(packet);
-
     if ( packet->stream_index == mVideoStreamId ) {
+       if ( videoStore ) {
+        //Write the packet to our video store
+        int ret = videoStore->writeVideoFramePacket( packet, mFormatContext->streams[mVideoStreamId] );
+        if ( ret < 0 ) { //Less than zero and we skipped a frame
+          zm_av_unref_packet( packet );
+          return 0;
+        }
+      }
       ret = zm_avcodec_decode_video( mVideoCodecContext, mRawFrame, &frameComplete, packet );
       if ( ret < 0 ) {
         av_strerror( ret, errbuf, AV_ERROR_MAX_STRING_SIZE );
diff --git a/src/zm_packetqueue.h b/src/zm_packetqueue.h
index 18f3410c2..f79502996 100644
--- a/src/zm_packetqueue.h
+++ b/src/zm_packetqueue.h
@@ -20,9 +20,9 @@
 #ifndef ZM_PACKETQUEUE_H
 #define ZM_PACKETQUEUE_H
 
-#include <boost/interprocess/managed_shared_memory.hpp>
-#include <boost/interprocess/containers/map.hpp>
-#include <boost/interprocess/allocators/allocator.hpp>
+//#include <boost/interprocess/managed_shared_memory.hpp>
+//#include <boost/interprocess/containers/map.hpp>
+//#include <boost/interprocess/allocators/allocator.hpp>
 #include <queue>
 
 extern "C" {
diff --git a/src/zm_videostore.cpp b/src/zm_videostore.cpp
index 6146dec1b..311318a90 100644
--- a/src/zm_videostore.cpp
+++ b/src/zm_videostore.cpp
@@ -49,8 +49,9 @@ VideoStore::VideoStore(const char *filename_in, const char *format_in,
   Info("Opening video storage stream %s format: %s\n", filename, format);
 
   int ret;
-  //Init everything we need
-  av_register_all();
+  static char error_buffer[255];
+  //Init everything we need, shouldn't have to do this, ffmpeg_camera or something else will call it.
+  //av_register_all();
 
   ret = avformat_alloc_output_context2(&oc, NULL, NULL, filename);
   if ( ret < 0 ) {
@@ -153,12 +154,68 @@ VideoStore::VideoStore(const char *filename_in, const char *format_in,
       Warning( "Unsupported Orientation(%d)", orientation );
     }
   }
+audio_output_codec = NULL;
 
   if (input_audio_stream) {
 
     if ( input_audio_stream->codec->codec_id != AV_CODEC_ID_AAC ) {
       Warning("Can't transcode to AAC at this time");
       audio_stream = NULL;
+
+      audio_output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
+      if ( audio_output_codec ) {
+        audio_stream = avformat_new_stream(oc, audio_output_codec );
+
+        audio_output_context = audio_stream->codec;
+
+        //audio_output_context = avcodec_alloc_context3( audio_output_codec );
+        if ( audio_output_context ) {
+
+Debug(2, "Have audio_output_context");
+          AVDictionary *opts = NULL;
+          av_dict_set(&opts, "strict", "experimental", 0);
+
+          /* put sample parameters */
+          audio_output_context->bit_rate = input_audio_stream->codec->bit_rate;
+          audio_output_context->sample_rate = input_audio_stream->codec->sample_rate;
+          audio_output_context->channels = input_audio_stream->codec->channels;
+          audio_output_context->channel_layout = input_audio_stream->codec->channel_layout;
+          audio_output_context->sample_fmt = input_audio_stream->codec->sample_fmt;
+
+          /* check that the encoder supports s16 pcm input */
+          if (!check_sample_fmt( audio_output_codec, audio_output_context->sample_fmt)) {
+            Error( "Encoder does not support sample format %s, setting to FLTP",
+                av_get_sample_fmt_name( audio_output_context->sample_fmt));
+            audio_output_context->sample_fmt = AV_SAMPLE_FMT_FLTP;
+          }
+  
+          Debug(1, "Audio output bit_rate (%d) sample_rate(%d) channels(%d) fmt(%d) layout(%d)", 
+              audio_output_context->bit_rate,
+              audio_output_context->sample_rate,
+              audio_output_context->channels,
+              audio_output_context->sample_fmt,
+              audio_output_context->channel_layout
+              );
+
+          /** Set the sample rate for the container. */
+          audio_stream->time_base.den = input_audio_stream->codec->sample_rate;
+          audio_stream->time_base.num = 1;
+
+          ret = avcodec_open2(audio_output_context, audio_output_codec, &opts );
+          if ( ret < 0 ) {
+            av_strerror(ret, error_buffer, sizeof(error_buffer));
+            Fatal( "could not open codec (%d) (%s)\n", ret, error_buffer );
+          } else {
+            Debug(2, "Success opening AAC codec");
+          } 
+          av_dict_free(&opts);
+        } else {
+          Error( "could not allocate codec context for AAC\n");
+        }
+      } else {
+         Error( "could not find codec for AAC\n");
+      }
+
     } else {
       Debug(3, "Got something other than AAC (%d)", input_audio_stream->codec->codec_id );
 
@@ -205,7 +262,9 @@ VideoStore::VideoStore(const char *filename_in, const char *format_in,
   ret = avformat_write_header(oc, NULL);
   if (ret < 0) {
     zm_dump_stream_format( oc, 0, 0, 1 );
-    Fatal("Error occurred when writing output file header to %s: %s\n",
+    if ( audio_stream ) 
+    zm_dump_stream_format( oc, 1, 0, 1 );
+    Error("Error occurred when writing output file header to %s: %s\n",
         filename,
         av_make_error_string(ret).c_str());
   }
@@ -271,13 +330,11 @@ void VideoStore::dumpPacket( AVPacket *pkt ){
 
 int VideoStore::writeVideoFramePacket(AVPacket *ipkt, AVStream *input_video_stream){
 
-  Debug(2, "writeVideoFrame");
+  Debug(4, "writeVideoFrame");
   Debug(3, "before ost_tbcket starttime %d, timebase%d", startTime, video_stream->time_base );
   //zm_dump_stream_format( oc, ipkt->stream_index, 0, 1 );
-  Debug(2, "writeVideoFrame %x", video_stream);
   int64_t ost_tb_start_time = av_rescale_q(startTime, AV_TIME_BASE_Q, video_stream->time_base);
-  Debug(3, "before ost_tbcket starttime %d, ost_tbcket %d", startTime, ost_tb_start_time );
-  Debug(2, "writeVideoFrame");
+  Debug(2, "before ost_tbcket starttime %d, ost_tbcket %d", startTime, ost_tb_start_time );
 
   AVPacket opkt;
   AVPicture pict;
@@ -314,17 +371,21 @@ int VideoStore::writeVideoFramePacket(AVPacket *ipkt, AVStream *input_video_stre
   /*opkt.flags |= AV_PKT_FLAG_KEY;*/
 
   if (video_stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && (output_format->flags & AVFMT_RAWPICTURE)) {
+Debug(3, "video and RAWPICTURE");
     /* store AVPicture in AVPacket, as expected by the output format */
     avpicture_fill(&pict, opkt.data, video_stream->codec->pix_fmt, video_stream->codec->width, video_stream->codec->height);
     opkt.data = (uint8_t *)&pict;
     opkt.size = sizeof(AVPicture);
     opkt.flags |= AV_PKT_FLAG_KEY;
+   } else {
+Debug(3, "Not video and RAWPICTURE");
   }
 
   //memcpy(&safepkt, &opkt, sizeof(AVPacket));
 
   if ((opkt.data == NULL)||(opkt.size < 1)) {
     Warning("%s:%d: Mangled AVPacket: discarding frame", __FILE__, __LINE__ ); 
+    dumpPacket( ipkt);
     dumpPacket(&opkt);
 
   } else if ((prevDts > 0) && (prevDts > opkt.dts)) {
@@ -350,7 +411,7 @@ int VideoStore::writeVideoFramePacket(AVPacket *ipkt, AVStream *input_video_stre
 
 }
 
-int VideoStore::writeAudioFramePacket(AVPacket *ipkt, AVStream *input_video_stream){
+int VideoStore::writeAudioFramePacket(AVPacket *ipkt, AVStream *input_audio_stream){
   Debug(2, "writeAudioFrame");
 
   if(!audio_stream) {
@@ -361,6 +422,7 @@ int VideoStore::writeAudioFramePacket(AVPacket *ipkt, AVStream *input_video_stre
     return -1;*/
   //zm_dump_stream_format( oc, ipkt->stream_index, 0, 1 );
 
+  int ret;
   // What is this doing?  Getting the time of the start of this video chunk? Does that actually make sense?
   int64_t ost_tb_start_time = av_rescale_q(startTime, AV_TIME_BASE_Q, audio_stream->time_base);
 
@@ -371,51 +433,146 @@ int VideoStore::writeAudioFramePacket(AVPacket *ipkt, AVStream *input_video_stre
 
   //Scale the PTS of the outgoing packet to be the correct time base
   if (ipkt->pts != AV_NOPTS_VALUE) {
-    Debug(3, "Rescaling output pts");
-    opkt.pts = av_rescale_q(ipkt->pts-startPts, input_video_stream->time_base, audio_stream->time_base) - ost_tb_start_time;
+    Debug(2, "Rescaling output pts");
+    opkt.pts = av_rescale_q(ipkt->pts-startPts, input_audio_stream->time_base, audio_stream->time_base) - ost_tb_start_time;
   } else {
-    Debug(3, "Setting output pts to AV_NOPTS_VALUE");
+    Debug(2, "Setting output pts to AV_NOPTS_VALUE");
     opkt.pts = AV_NOPTS_VALUE;
   }
 
   //Scale the DTS of the outgoing packet to be the correct time base
   if(ipkt->dts == AV_NOPTS_VALUE) {
-    Debug(4, "ipkt->dts == AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
-    opkt.dts = av_rescale_q(input_video_stream->cur_dts-startDts, AV_TIME_BASE_Q, audio_stream->time_base);
-    Debug(4, "ipkt->dts == AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
+    Debug(2, "ipkt->dts == AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
+    opkt.dts = av_rescale_q(input_audio_stream->cur_dts-startDts, AV_TIME_BASE_Q, audio_stream->time_base);
+    Debug(2, "ipkt->dts == AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
   } else {
-    Debug(4, "ipkt->dts != AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
-    opkt.dts = av_rescale_q(ipkt->dts-startDts, input_video_stream->time_base, audio_stream->time_base);
-    Debug(4, "ipkt->dts != AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
+    Debug(2, "ipkt->dts != AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
+    opkt.dts = av_rescale_q(ipkt->dts-startDts, input_audio_stream->time_base, audio_stream->time_base);
+    Debug(2, "ipkt->dts != AV_NOPTS_VALUE %d to %d",  AV_NOPTS_VALUE, opkt.dts );
   }
+  Debug(2, "Not sure what ost_tb_start_time is (%d) - (%d)", opkt.dts, ost_tb_start_time );
   opkt.dts -= ost_tb_start_time;
 
   // Seems like it would be really weird for the codec type to NOT be audiu
   if (audio_stream->codec->codec_type == AVMEDIA_TYPE_AUDIO && ipkt->dts != AV_NOPTS_VALUE) {
-    int duration = av_get_audio_frame_duration(input_video_stream->codec, ipkt->size);
-    Debug( 4, "code is audio, dts != AV_NOPTS_VALUE got duration(%d)", duration );
+    int duration = av_get_audio_frame_duration(input_audio_stream->codec, ipkt->size);
+    Debug( 1, "code is audio, dts != AV_NOPTS_VALUE got duration(%d)", duration );
     if ( ! duration ) {
-      duration = input_video_stream->codec->frame_size;
+      duration = input_audio_stream->codec->frame_size;
       Warning( "got no duration from av_get_audio_frame_duration.  Using frame size(%d)", duration );
     }
 
     //FIXME where to get filter_in_rescale_delta_last
     //FIXME av_rescale_delta doesn't exist in ubuntu vivid libavtools
-    opkt.dts = opkt.pts = av_rescale_delta(input_video_stream->time_base, ipkt->dts,
-        (AVRational){1, input_video_stream->codec->sample_rate}, duration, &filter_in_rescale_delta_last,
+    opkt.dts = opkt.pts = av_rescale_delta(input_audio_stream->time_base, ipkt->dts,
+        (AVRational){1, input_audio_stream->codec->sample_rate}, duration, &filter_in_rescale_delta_last,
         audio_stream->time_base) - ost_tb_start_time;
-    Debug(4, "rescaled dts is: (%d)", opkt.dts );
+    Debug(2, "rescaled dts is: (%d)", opkt.dts );
   }
 
-  opkt.duration = av_rescale_q(ipkt->duration, input_video_stream->time_base, audio_stream->time_base);
+  opkt.duration = av_rescale_q(ipkt->duration, input_audio_stream->time_base, audio_stream->time_base);
   opkt.pos=-1;
   opkt.flags = ipkt->flags;
-
-  opkt.data = ipkt->data;
-  opkt.size = ipkt->size;
   opkt.stream_index = ipkt->stream_index;
 
-  int ret;
+  if ( audio_output_codec ) {
+
+  
+
+    AVFrame *input_frame;
+    AVFrame *output_frame;
+  // Need to re-encode
+if ( 0 ) {
+  //avcodec_send_packet( input_audio_stream->codec, ipkt);
+  //avcodec_receive_frame( input_audio_stream->codec, input_frame );
+  //avcodec_send_frame( audio_stream->codec, input_frame );
+//
+  ////avcodec_receive_packet( audio_stream->codec, &opkt );
+} else {
+
+    /** Create a new frame to store the audio samples. */
+    if (!(input_frame = av_frame_alloc())) {
+        Error("Could not allocate input frame");
+        zm_av_unref_packet(&opkt);
+        return 0;
+    } else {
+      Debug(2, "Got input frame alloc");
+    }
+
+    /**
+     * Decode the audio frame stored in the packet.
+     * The input audio stream decoder is used to do this.
+     * If we are at the end of the file, pass an empty packet to the decoder
+     * to flush it.
+     */
+    if ((ret = avcodec_decode_audio4(input_audio_stream->codec, input_frame,
+                                       &data_present, ipkt)) < 0) {
+        Error( "Could not decode frame (error '%s')\n",
+                av_make_error_string(ret).c_str());
+        dumpPacket( ipkt);
+        av_frame_free(&input_frame);
+        zm_av_unref_packet(&opkt);
+        return 0;
+    }
+    
+    /** Create a new frame to store the audio samples. */
+    if (!(output_frame = av_frame_alloc())) {
+        Error("Could not allocate output frame");
+        av_frame_free(&input_frame);
+        zm_av_unref_packet(&opkt);
+        return 0;
+    } else {
+      Debug(2, "Got output frame alloc");
+    }
+    /**
+     * Set the frame's parameters, especially its size and format.
+     * av_frame_get_buffer needs this to allocate memory for the
+     * audio samples of the frame.
+     * Default channel layouts based on the number of channels
+     * are assumed for simplicity.
+     */
+    output_frame->nb_samples     = audio_stream->codec->frame_size;
+    output_frame->channel_layout = audio_output_context->channel_layout;
+    output_frame->channels = audio_output_context->channels;
+    output_frame->format         = audio_output_context->sample_fmt;
+    output_frame->sample_rate    = audio_output_context->sample_rate;
+    /**
+     * Allocate the samples of the created frame. This call will make
+     * sure that the audio frame can hold as many samples as specified.
+     */
+    Debug(2, "getting buffer");
+    if (( ret = av_frame_get_buffer( output_frame, 0)) < 0) {
+        Error( "Couldnt allocate output frame buffer samples (error '%s')",
+                av_make_error_string(ret).c_str() );
+        Error("Frame: samples(%d) layout (%d) format(%d) rate(%d)", output_frame->nb_samples,
+output_frame->channel_layout, output_frame->format , output_frame->sample_rate 
+ );
+        av_frame_free(&input_frame);
+        av_frame_free(&output_frame);
+        zm_av_unref_packet(&opkt);
+        return 0;
+    }
+
+    /** Set a timestamp based on the sample rate for the container. */
+    if (output_frame) {
+        output_frame->pts = opkt.pts;
+    }
+    /**
+     * Encode the audio frame and store it in the temporary packet.
+     * The output audio stream encoder is used to do this.
+     */
+    if (( ret = avcodec_encode_audio2( audio_output_context, &opkt,
+                                       input_frame, &data_present )) < 0) {
+        Error( "Could not encode frame (error '%s')",
+                av_make_error_string(ret).c_str());
+        zm_av_unref_packet(&opkt);
+        return 0;
+    }
+}
+  } else {
+    opkt.data = ipkt->data;
+    opkt.size = ipkt->size;
+  }
   ret = av_interleaved_write_frame(oc, &opkt);
   if(ret!=0){
     Fatal("Error encoding audio frame packet: %s\n", av_make_error_string(ret).c_str());
diff --git a/src/zm_videostore.h b/src/zm_videostore.h
index 0ef51cd1c..8478285d7 100644
--- a/src/zm_videostore.h
+++ b/src/zm_videostore.h
@@ -14,6 +14,11 @@ private:
 	AVFormatContext *oc;
 	AVStream *video_stream;
 	AVStream *audio_stream;
+
+  // The following are used when encoding the audio stream to AAC
+  AVCodec *audio_output_codec;
+  AVCodecContext *audio_output_context;
+  int data_present;
     
 	const char *filename;
 	const char *format;