From 3645ccea1b62360069c14e520408b1da74f1ae7f Mon Sep 17 00:00:00 2001
From: Isaac Connor <isaac@zoneminder.com>
Date: Tue, 4 May 2021 20:20:33 -0400
Subject: [PATCH] Make vaapi accelerated encoding work

---
 src/zm_packet.cpp     |  45 ++++++++------
 src/zm_packet.h       |   3 +-
 src/zm_videostore.cpp | 138 ++++++++++++++++++++++++++++++------------
 src/zm_videostore.h   | 134 ++++++++++++++++++++--------------------
 4 files changed, 197 insertions(+), 123 deletions(-)

diff --git a/src/zm_packet.cpp b/src/zm_packet.cpp
index 573ee3b8c..7aef2ddca 100644
--- a/src/zm_packet.cpp
+++ b/src/zm_packet.cpp
@@ -232,45 +232,50 @@ AVPacket *ZMPacket::set_packet(AVPacket *p) {
   return &packet;
 }
 
-AVFrame *ZMPacket::get_out_frame(const AVCodecContext *ctx) {
-  if ( !out_frame ) {
+AVFrame *ZMPacket::get_out_frame(int width, int height, AVPixelFormat format) {
+  if (!out_frame) {
     out_frame = zm_av_frame_alloc();
-    if ( !out_frame ) {
+    if (!out_frame) {
       Error("Unable to allocate a frame");
       return nullptr;
     }
 
 #if LIBAVUTIL_VERSION_CHECK(54, 6, 0, 6, 0)
+    
     codec_imgsize = av_image_get_buffer_size(
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height, 32);
+        format, width, height, 32);
+    Debug(1, "buffer size %u from %s %dx%d", codec_imgsize, av_get_pix_fmt_name(format), width, height);
     buffer = (uint8_t *)av_malloc(codec_imgsize);
-    av_image_fill_arrays(
+    int ret;
+    if ((ret=av_image_fill_arrays(
         out_frame->data,
         out_frame->linesize,
         buffer,
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height,
-        32);
+        format,
+        width,
+        height,
+        32))<0) {
+      Error("Failed to fill_arrays %s", av_make_error_string(ret).c_str());
+      av_frame_free(&out_frame);
+      return nullptr;
+    }
 #else
     codec_imgsize = avpicture_get_size(
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height);
+        format,
+        width,
+        >height);
     buffer = (uint8_t *)av_malloc(codec_imgsize);
     avpicture_fill(
         (AVPicture *)out_frame,
         buffer,
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height
+        format,
+        width,
+        height
         );
 #endif
-    out_frame->width = ctx->width;
-    out_frame->height = ctx->height;
-    out_frame->format = ctx->pix_fmt;
+    out_frame->width = width;
+    out_frame->height = height;
+    out_frame->format = format;
   }
   return out_frame;
 } // end AVFrame *ZMPacket::get_out_frame( AVCodecContext *ctx );
diff --git a/src/zm_packet.h b/src/zm_packet.h
index 0c146c5cd..a7deca325 100644
--- a/src/zm_packet.h
+++ b/src/zm_packet.h
@@ -74,7 +74,8 @@ class ZMPacket {
     ZMPacket();
     ~ZMPacket();
 
-    AVFrame *get_out_frame(const AVCodecContext *ctx);
+    //AVFrame *get_out_frame(const AVCodecContext *ctx);
+    AVFrame *get_out_frame(int width, int height, AVPixelFormat format);
     int get_codec_imgsize() { return codec_imgsize; };
 };
 
diff --git a/src/zm_videostore.cpp b/src/zm_videostore.cpp
index 4e33f234e..9a4a3d569 100644
--- a/src/zm_videostore.cpp
+++ b/src/zm_videostore.cpp
@@ -28,11 +28,11 @@ extern "C" {
 }
 
 VideoStore::CodecData VideoStore::codec_data[] = {
-  { AV_CODEC_ID_H264, "h264", "h264_vaapi", AV_PIX_FMT_NV12 },
-  { AV_CODEC_ID_H264, "h264", "h264_omx", AV_PIX_FMT_YUV420P },
-  { AV_CODEC_ID_H264, "h264", "h264", AV_PIX_FMT_YUV420P },
-  { AV_CODEC_ID_H264, "h264", "libx264", AV_PIX_FMT_YUV420P },
-  { AV_CODEC_ID_MJPEG, "mjpeg", "mjpeg", AV_PIX_FMT_YUVJ422P },
+  { AV_CODEC_ID_H264, "h264", "h264_vaapi", AV_PIX_FMT_NV12, AV_PIX_FMT_VAAPI, AV_HWDEVICE_TYPE_VAAPI },
+  { AV_CODEC_ID_H264, "h264", "h264_omx", AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P,  AV_HWDEVICE_TYPE_NONE },
+  { AV_CODEC_ID_H264, "h264", "h264", AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P,  AV_HWDEVICE_TYPE_NONE },
+  { AV_CODEC_ID_H264, "h264", "libx264", AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P, AV_HWDEVICE_TYPE_NONE  },
+  { AV_CODEC_ID_MJPEG, "mjpeg", "mjpeg", AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ422P, AV_HWDEVICE_TYPE_NONE },
 };
 
 VideoStore::VideoStore(
@@ -44,6 +44,7 @@ VideoStore::VideoStore(
     AVCodecContext *p_audio_in_ctx,
     Monitor *p_monitor
     ) :
+  chosen_codec_data(nullptr),
   monitor(p_monitor),
   out_format(nullptr),
   oc(nullptr),
@@ -61,8 +62,10 @@ VideoStore::VideoStore(
   video_in_frame(nullptr),
   in_frame(nullptr),
   out_frame(nullptr),
+  hw_frame(nullptr),
   packets_written(0),
   frame_count(0),
+  hw_device_ctx(nullptr),
 #if defined(HAVE_LIBSWRESAMPLE) || defined(HAVE_LIBAVRESAMPLE)
   resample_ctx(nullptr),
 #if defined(HAVE_LIBSWRESAMPLE)
@@ -163,14 +166,15 @@ bool VideoStore::open() {
       }
       std::string wanted_encoder = monitor->Encoder();
 
-      for ( unsigned int i = 0; i < sizeof(codec_data) / sizeof(*codec_data); i++ ) {
-        if ( wanted_encoder != "" and wanted_encoder != "auto" ) {
-          if ( wanted_encoder != codec_data[i].codec_name ) {
+      for (unsigned int i = 0; i < sizeof(codec_data) / sizeof(*codec_data); i++) {
+        chosen_codec_data = &codec_data[i];
+        if (wanted_encoder != "" and wanted_encoder != "auto") {
+          if (wanted_encoder != codec_data[i].codec_name) {
             Debug(1, "Not the right codec name %s != %s", codec_data[i].codec_name, wanted_encoder.c_str());
             continue;
           }
         }
-        if ( codec_data[i].codec_id != wanted_codec ) {
+        if (codec_data[i].codec_id != wanted_codec) {
           Debug(1, "Not the right codec %d %s != %d %s",
 							codec_data[i].codec_id,
 							avcodec_get_name(codec_data[i].codec_id),
@@ -181,13 +185,13 @@ bool VideoStore::open() {
         }
 
         video_out_codec = avcodec_find_encoder_by_name(codec_data[i].codec_name);
-        if ( !video_out_codec ) {
+        if (!video_out_codec) {
           Debug(1, "Didn't find encoder for %s", codec_data[i].codec_name);
           continue;
         }
         Debug(1, "Found video codec for %s", codec_data[i].codec_name);
         video_out_ctx = avcodec_alloc_context3(video_out_codec);
-        if ( oc->oformat->flags & AVFMT_GLOBALHEADER ) {
+        if (oc->oformat->flags & AVFMT_GLOBALHEADER) {
 #if LIBAVCODEC_VERSION_CHECK(56, 35, 0, 64, 0)
           video_out_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 #else
@@ -198,7 +202,7 @@ bool VideoStore::open() {
         // When encoding, we are going to use the timestamp values instead of packet pts/dts
         video_out_ctx->time_base = AV_TIME_BASE_Q;
         video_out_ctx->codec_id = codec_data[i].codec_id;
-        video_out_ctx->pix_fmt = codec_data[i].pix_fmt;
+        video_out_ctx->pix_fmt = codec_data[i].hw_pix_fmt;
         video_out_ctx->level = 32;
 
         // Don't have an input stream, so need to tell it what we are sending it, or are transcoding
@@ -220,21 +224,53 @@ bool VideoStore::open() {
           video_out_ctx->mb_decision = 2;
         }
 
+        if (codec_data[i].hwdevice_type != AV_HWDEVICE_TYPE_NONE) {
+          ret = av_hwdevice_ctx_create(&hw_device_ctx,
+              codec_data[i].hwdevice_type,
+              //AV_HWDEVICE_TYPE_VAAPI,
+              NULL, NULL, 0);
+
+          AVBufferRef *hw_frames_ref;
+          AVHWFramesContext *frames_ctx = NULL;
+
+          if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
+            Error("Failed to create hwaccel frame context.");
+            return -1;
+          }
+          frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
+          frames_ctx->format    = codec_data[i].hw_pix_fmt;
+          frames_ctx->sw_format = codec_data[i].sw_pix_fmt;
+          frames_ctx->width     = monitor->Width();
+          frames_ctx->height    = monitor->Height();
+          frames_ctx->initial_pool_size = 20;
+          if ((ret = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
+            Error("Failed to initialize hwaccel frame context."
+                "Error code: %s",av_err2str(ret));
+            av_buffer_unref(&hw_frames_ref);
+          } else {
+            video_out_ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
+            if (!video_out_ctx->hw_frames_ctx) {
+              Error("Failed to allocate hw_frames_ctx");
+            }
+          }
+          av_buffer_unref(&hw_frames_ref);
+        }
+
         AVDictionary *opts = 0;
         std::string Options = monitor->GetEncoderOptions();
         Debug(2, "Options? %s", Options.c_str());
         ret = av_dict_parse_string(&opts, Options.c_str(), "=", ",#\n", 0);
-        if ( ret < 0 ) {
+        if (ret < 0) {
           Warning("Could not parse ffmpeg encoder options list '%s'\n", Options.c_str());
         } else {
           AVDictionaryEntry *e = nullptr;
-          while ( (e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != NULL ) {
+          while ((e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != NULL) {
             Debug(3, "Encoder Option %s=%s", e->key, e->value);
           }
         }
 
-        if ( (ret = avcodec_open2(video_out_ctx, video_out_codec, &opts)) < 0 ) {
-          if ( wanted_encoder != "" and wanted_encoder != "auto" ) {
+        if ((ret = avcodec_open2(video_out_ctx, video_out_codec, &opts)) < 0) {
+          if (wanted_encoder != "" and wanted_encoder != "auto") {
             Warning("Can't open video codec (%s) %s",
                 video_out_codec->name,
                 av_make_error_string(ret).c_str()
@@ -248,27 +284,26 @@ bool VideoStore::open() {
           video_out_codec = nullptr;
         }
 
+        Debug(1, "Success");
         AVDictionaryEntry *e = nullptr;
-        while ( (e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != nullptr ) {
+        while ((e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != nullptr) {
           Warning("Encoder Option %s not recognized by ffmpeg codec", e->key);
         }
-        //av_dict_free(&opts);
-        if ( video_out_codec ) break;
+        if (video_out_codec) break;
         avcodec_free_context(&video_out_ctx);
-      } // end foreach codec
+        if (hw_device_ctx) av_buffer_unref(&hw_device_ctx);
+      }  // end foreach codec
 
-      if ( !video_out_codec ) {
+      if (!video_out_codec) {
         Error("Can't open video codec!");
 #if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
         // We allocate and copy in newer ffmpeg, so need to free it
         avcodec_free_context(&video_out_ctx);
 #endif
-        //video_out_ctx = nullptr;
-
         return false;
-      } // end if can't open codec
+      }  // end if can't open codec
       Debug(2, "Success opening codec");
-    } // end if copying or transcoding
+    }  // end if copying or transcoding
     zm_dump_codec(video_out_ctx);
   }  // end if video_in_stream
 
@@ -958,18 +993,18 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
   frame_count += 1;
 
   // if we have to transcode
-  if ( monitor->GetOptVideoWriter() == Monitor::ENCODE ) {
+  if (monitor->GetOptVideoWriter() == Monitor::ENCODE) {
     Debug(3, "Have encoding video frame count (%d)", frame_count);
 
-    if ( !zm_packet->out_frame ) {
+    if (!zm_packet->out_frame) {
       Debug(3, "Have no out frame");
-      AVFrame *out_frame = zm_packet->get_out_frame(video_out_ctx);
-      if ( !out_frame ) {
+      AVFrame *out_frame = zm_packet->get_out_frame(video_out_ctx->width, video_out_ctx->height, chosen_codec_data->sw_pix_fmt);
+      if (!out_frame) {
         Error("Unable to allocate a frame");
         return 0;
       }
 
-      if ( zm_packet->image ) {
+      if (zm_packet->image) {
         Debug(2, "Have an image, convert it");
         //Go straight to out frame
         swscale.Convert(
@@ -977,7 +1012,7 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
             zm_packet->buffer,
             zm_packet->codec_imgsize,
             zm_packet->image->AVPixFormat(),
-            video_out_ctx->pix_fmt,
+            chosen_codec_data->sw_pix_fmt,
             video_out_ctx->width,
             video_out_ctx->height
             );
@@ -1003,6 +1038,32 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
       } // end if no in_frame
     } // end if no out_frame
 
+    AVFrame *frame = zm_packet->out_frame;
+
+    if (video_out_ctx->hw_frames_ctx) {
+      if (!(hw_frame = av_frame_alloc())) {
+        ret = AVERROR(ENOMEM);
+        return ret;
+      }
+      if ((ret = av_hwframe_get_buffer(video_out_ctx->hw_frames_ctx, hw_frame, 0)) < 0) {
+        Error("Error code: %s", av_err2str(ret));
+        av_frame_free(&hw_frame);
+        return ret;
+      }
+      if (!hw_frame->hw_frames_ctx) {
+        Error("Outof ram!");
+        av_frame_free(&hw_frame);
+        return 0;
+      }
+      if ((ret = av_hwframe_transfer_data(hw_frame, zm_packet->out_frame, 0)) < 0) {
+        Error("Error while transferring frame data to surface: %s.", av_err2str(ret));
+        av_frame_free(&hw_frame);
+        return ret;
+      }
+
+      frame = hw_frame;
+    }  // end if hwaccel
+
     //zm_packet->out_frame->coded_picture_number = frame_count;
     //zm_packet->out_frame->display_picture_number = frame_count;
     //zm_packet->out_frame->sample_aspect_ratio = (AVRational){ 0, 1 };
@@ -1010,7 +1071,7 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
     //zm_packet->out_frame->pict_type = AV_PICTURE_TYPE_NONE;
     //zm_packet->out_frame->key_frame = zm_packet->keyframe;
 #if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
-    zm_packet->out_frame->pkt_duration = 0;
+    frame->pkt_duration = 0;
 #endif
 
     int64_t in_pts = zm_packet->timestamp->tv_sec * (uint64_t)1000000 + zm_packet->timestamp->tv_usec;
@@ -1020,14 +1081,14 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
             video_first_pts,
             static_cast<int64>(zm_packet->timestamp->tv_sec),
             static_cast<int64>(zm_packet->timestamp->tv_usec));
-      zm_packet->out_frame->pts = 0;
+      frame->pts = 0;
     } else {
       uint64_t useconds = in_pts - video_first_pts;
-      zm_packet->out_frame->pts = av_rescale_q(useconds, AV_TIME_BASE_Q, video_out_ctx->time_base);
+      frame->pts = av_rescale_q(useconds, AV_TIME_BASE_Q, video_out_ctx->time_base);
       Debug(2,
             "Setting pts for frame(%d) to (%" PRId64 ") from (start %" PRIu64 " - %" PRIu64 " - secs(%" PRIi64 ") usecs(%" PRIi64 ") @ %d/%d",
             frame_count,
-            zm_packet->out_frame->pts,
+            frame->pts,
             video_first_pts,
             useconds,
             static_cast<int64>(zm_packet->timestamp->tv_sec),
@@ -1040,9 +1101,9 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
     opkt.data = nullptr;
     opkt.size = 0;
 
-    ret = zm_send_frame_receive_packet(video_out_ctx, zm_packet->out_frame, opkt);
-    if ( ret <= 0 ) {
-      if ( ret < 0 ) {
+    ret = zm_send_frame_receive_packet(video_out_ctx, frame, opkt);
+    if (ret <= 0) {
+      if (ret < 0) {
         Error("Could not send frame (error '%s')", av_make_error_string(ret).c_str());
       }
       return ret;
@@ -1132,6 +1193,7 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
 
   write_packet(&opkt, video_out_stream);
   zm_av_packet_unref(&opkt);
+  if (hw_frame) av_frame_free(&hw_frame);
 
   return 1;
 }  // end int VideoStore::writeVideoFramePacket( AVPacket *ipkt )
diff --git a/src/zm_videostore.h b/src/zm_videostore.h
index 0afadf915..55c71a9aa 100644
--- a/src/zm_videostore.h
+++ b/src/zm_videostore.h
@@ -30,89 +30,95 @@ class VideoStore {
       const AVCodecID codec_id;
       const char *codec_codec;
       const char *codec_name;
-      const enum AVPixelFormat pix_fmt;
+      const enum AVPixelFormat sw_pix_fmt;
+      const enum AVPixelFormat hw_pix_fmt;
+      const AVHWDeviceType hwdevice_type;
     };
 
     static struct CodecData codec_data[];
+    CodecData *chosen_codec_data;
 
-  Monitor *monitor;
-	AVOutputFormat *out_format;
-	AVFormatContext *oc;
-	AVStream *video_out_stream;
-	AVStream *audio_out_stream;
+    Monitor *monitor;
+    AVOutputFormat *out_format;
+    AVFormatContext *oc;
+    AVStream *video_out_stream;
+    AVStream *audio_out_stream;
 
-  AVCodec *video_out_codec;
-  AVCodecContext *video_in_ctx;
-  AVCodecContext *video_out_ctx;
+    AVCodec *video_out_codec;
+    AVCodecContext *video_in_ctx;
+    AVCodecContext *video_out_ctx;
 
-  AVStream *video_in_stream;
-  AVStream *audio_in_stream;
+    AVStream *video_in_stream;
+    AVStream *audio_in_stream;
 
-  const AVCodec *audio_in_codec;
-  AVCodecContext *audio_in_ctx;
-  // The following are used when encoding the audio stream to AAC
-  AVCodec *audio_out_codec;
-  AVCodecContext *audio_out_ctx;
-  // Move this into the object so that we aren't constantly allocating/deallocating it on the stack
-  AVPacket opkt;
-  // we are transcoding
-  AVFrame *video_in_frame;
-  AVFrame *in_frame;
-  AVFrame *out_frame;
+    const AVCodec *audio_in_codec;
+    AVCodecContext *audio_in_ctx;
+    // The following are used when encoding the audio stream to AAC
+    AVCodec *audio_out_codec;
+    AVCodecContext *audio_out_ctx;
+    // Move this into the object so that we aren't constantly allocating/deallocating it on the stack
+    AVPacket opkt;
+    // we are transcoding
+    AVFrame *video_in_frame;
+    AVFrame *in_frame;
+    AVFrame *out_frame;
+    AVFrame *hw_frame;
 
-  SWScale swscale;
-  unsigned int packets_written;
-  unsigned int frame_count;
+    SWScale swscale;
+    unsigned int packets_written;
+    unsigned int frame_count;
+
+    AVBufferRef *hw_device_ctx;
 
 #ifdef HAVE_LIBSWRESAMPLE
-  SwrContext *resample_ctx;
-  AVAudioFifo *fifo;
+    SwrContext *resample_ctx;
+    AVAudioFifo *fifo;
 #else
 #ifdef HAVE_LIBAVRESAMPLE
-  AVAudioResampleContext* resample_ctx;
+    AVAudioResampleContext* resample_ctx;
 #endif
 #endif
-  uint8_t *converted_in_samples;
-    
-	const char *filename;
-	const char *format;
-    
-  // These are for in
-  int64_t video_first_pts;
-  int64_t video_first_dts;
-  int64_t audio_first_pts;
-  int64_t audio_first_dts;
-  int64_t video_last_pts;
-  int64_t audio_last_pts;
+    uint8_t *converted_in_samples;
 
-  // These are for out, should start at zero.  We assume they do not wrap because we just aren't going to save files that big.
-  int64_t *next_dts;
-  int64_t audio_next_pts;
+    const char *filename;
+    const char *format;
 
-  int max_stream_index;
+    // These are for in
+    int64_t video_first_pts;
+    int64_t video_first_dts;
+    int64_t audio_first_pts;
+    int64_t audio_first_dts;
+    int64_t video_last_pts;
+    int64_t audio_last_pts;
 
-  bool setup_resampler();
-  int write_packet(AVPacket *pkt, AVStream *stream);
+    // These are for out, should start at zero.  We assume they do not wrap because we just aren't going to save files that big.
+    int64_t *next_dts;
+    int64_t audio_next_pts;
 
-public:
-	VideoStore(
-      const char *filename_in,
-      const char *format_in,
-      AVStream *video_in_stream,
-      AVCodecContext  *video_in_ctx,
-      AVStream *audio_in_stream,
-      AVCodecContext  *audio_in_ctx,
-      Monitor * p_monitor);
-	~VideoStore();
-  bool  open();
+    int max_stream_index;
 
-  void write_video_packet(AVPacket &pkt);
-  void write_audio_packet(AVPacket &pkt);
-  int writeVideoFramePacket(ZMPacket *pkt);
-  int writeAudioFramePacket(ZMPacket *pkt);
-  int writePacket(ZMPacket *pkt);
-  int write_packets(PacketQueue &queue);
-  void flush_codecs();
+    bool setup_resampler();
+    int write_packet(AVPacket *pkt, AVStream *stream);
+
+  public:
+    VideoStore(
+        const char *filename_in,
+        const char *format_in,
+        AVStream *video_in_stream,
+        AVCodecContext  *video_in_ctx,
+        AVStream *audio_in_stream,
+        AVCodecContext  *audio_in_ctx,
+        Monitor * p_monitor);
+    ~VideoStore();
+    bool  open();
+
+    void write_video_packet(AVPacket &pkt);
+    void write_audio_packet(AVPacket &pkt);
+    int writeVideoFramePacket(ZMPacket *pkt);
+    int writeAudioFramePacket(ZMPacket *pkt);
+    int writePacket(ZMPacket *pkt);
+    int write_packets(PacketQueue &queue);
+    void flush_codecs();
 };
 
 #endif //havelibav