Make vaapi accelerated encoding work

2021-05-04 20:20:33 -04:00 · 2021-05-04 20:20:33 -04:00 · 3645ccea1b
parent 6d07c7ce9f
commit 3645ccea1b
4 changed files with 197 additions and 123 deletions
--- a/src/zm_packet.cpp
+++ b/src/zm_packet.cpp
@ -232,45 +232,50 @@ AVPacket *ZMPacket::set_packet(AVPacket *p) {
  return &packet;
 }

-AVFrame *ZMPacket::get_out_frame(const AVCodecContext *ctx) {
-  if ( !out_frame ) {
+AVFrame *ZMPacket::get_out_frame(int width, int height, AVPixelFormat format) {
+  if (!out_frame) {
    out_frame = zm_av_frame_alloc();
-    if ( !out_frame ) {
+    if (!out_frame) {
      Error("Unable to allocate a frame");
      return nullptr;
    }

 #if LIBAVUTIL_VERSION_CHECK(54, 6, 0, 6, 0)
+    
    codec_imgsize = av_image_get_buffer_size(
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height, 32);
+        format, width, height, 32);
+    Debug(1, "buffer size %u from %s %dx%d", codec_imgsize, av_get_pix_fmt_name(format), width, height);
    buffer = (uint8_t *)av_malloc(codec_imgsize);
-    av_image_fill_arrays(
+    int ret;
+    if ((ret=av_image_fill_arrays(
        out_frame->data,
        out_frame->linesize,
        buffer,
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height,
-        32);
+        format,
+        width,
+        height,
+        32))<0) {
+      Error("Failed to fill_arrays %s", av_make_error_string(ret).c_str());
+      av_frame_free(&out_frame);
+      return nullptr;
+    }
 #else
    codec_imgsize = avpicture_get_size(
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height);
+        format,
+        width,
+        >height);
    buffer = (uint8_t *)av_malloc(codec_imgsize);
    avpicture_fill(
        (AVPicture *)out_frame,
        buffer,
-        ctx->pix_fmt,
-        ctx->width,
-        ctx->height
+        format,
+        width,
+        height
        );
 #endif
-    out_frame->width = ctx->width;
-    out_frame->height = ctx->height;
-    out_frame->format = ctx->pix_fmt;
+    out_frame->width = width;
+    out_frame->height = height;
+    out_frame->format = format;
  }
  return out_frame;
 } // end AVFrame *ZMPacket::get_out_frame( AVCodecContext *ctx );
--- a/src/zm_packet.h
+++ b/src/zm_packet.h
@ -74,7 +74,8 @@ class ZMPacket {
    ZMPacket();
    ~ZMPacket();

-    AVFrame *get_out_frame(const AVCodecContext *ctx);
+    //AVFrame *get_out_frame(const AVCodecContext *ctx);
+    AVFrame *get_out_frame(int width, int height, AVPixelFormat format);
    int get_codec_imgsize() { return codec_imgsize; };
 };

--- a/src/zm_videostore.cpp
+++ b/src/zm_videostore.cpp
@ -28,11 +28,11 @@ extern "C" {
 }

 VideoStore::CodecData VideoStore::codec_data[] = {
-  { AV_CODEC_ID_H264, "h264", "h264_vaapi", AV_PIX_FMT_NV12 },
-  { AV_CODEC_ID_H264, "h264", "h264_omx", AV_PIX_FMT_YUV420P },
-  { AV_CODEC_ID_H264, "h264", "h264", AV_PIX_FMT_YUV420P },
-  { AV_CODEC_ID_H264, "h264", "libx264", AV_PIX_FMT_YUV420P },
-  { AV_CODEC_ID_MJPEG, "mjpeg", "mjpeg", AV_PIX_FMT_YUVJ422P },
+  { AV_CODEC_ID_H264, "h264", "h264_vaapi", AV_PIX_FMT_NV12, AV_PIX_FMT_VAAPI, AV_HWDEVICE_TYPE_VAAPI },
+  { AV_CODEC_ID_H264, "h264", "h264_omx", AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P,  AV_HWDEVICE_TYPE_NONE },
+  { AV_CODEC_ID_H264, "h264", "h264", AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P,  AV_HWDEVICE_TYPE_NONE },
+  { AV_CODEC_ID_H264, "h264", "libx264", AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P, AV_HWDEVICE_TYPE_NONE  },
+  { AV_CODEC_ID_MJPEG, "mjpeg", "mjpeg", AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ422P, AV_HWDEVICE_TYPE_NONE },
 };

 VideoStore::VideoStore(
@ -44,6 +44,7 @@ VideoStore::VideoStore(
    AVCodecContext *p_audio_in_ctx,
    Monitor *p_monitor
    ) :
+  chosen_codec_data(nullptr),
  monitor(p_monitor),
  out_format(nullptr),
  oc(nullptr),
@ -61,8 +62,10 @@ VideoStore::VideoStore(
  video_in_frame(nullptr),
  in_frame(nullptr),
  out_frame(nullptr),
+  hw_frame(nullptr),
  packets_written(0),
  frame_count(0),
+  hw_device_ctx(nullptr),
 #if defined(HAVE_LIBSWRESAMPLE) || defined(HAVE_LIBAVRESAMPLE)
  resample_ctx(nullptr),
 #if defined(HAVE_LIBSWRESAMPLE)
@ -163,14 +166,15 @@ bool VideoStore::open() {
      }
      std::string wanted_encoder = monitor->Encoder();

-      for ( unsigned int i = 0; i < sizeof(codec_data) / sizeof(*codec_data); i++ ) {
-        if ( wanted_encoder != "" and wanted_encoder != "auto" ) {
-          if ( wanted_encoder != codec_data[i].codec_name ) {
+      for (unsigned int i = 0; i < sizeof(codec_data) / sizeof(*codec_data); i++) {
+        chosen_codec_data = &codec_data[i];
+        if (wanted_encoder != "" and wanted_encoder != "auto") {
+          if (wanted_encoder != codec_data[i].codec_name) {
            Debug(1, "Not the right codec name %s != %s", codec_data[i].codec_name, wanted_encoder.c_str());
            continue;
          }
        }
-        if ( codec_data[i].codec_id != wanted_codec ) {
+        if (codec_data[i].codec_id != wanted_codec) {
          Debug(1, "Not the right codec %d %s != %d %s",
 							codec_data[i].codec_id,
 							avcodec_get_name(codec_data[i].codec_id),
@ -181,13 +185,13 @@ bool VideoStore::open() {
        }

        video_out_codec = avcodec_find_encoder_by_name(codec_data[i].codec_name);
-        if ( !video_out_codec ) {
+        if (!video_out_codec) {
          Debug(1, "Didn't find encoder for %s", codec_data[i].codec_name);
          continue;
        }
        Debug(1, "Found video codec for %s", codec_data[i].codec_name);
        video_out_ctx = avcodec_alloc_context3(video_out_codec);
-        if ( oc->oformat->flags & AVFMT_GLOBALHEADER ) {
+        if (oc->oformat->flags & AVFMT_GLOBALHEADER) {
 #if LIBAVCODEC_VERSION_CHECK(56, 35, 0, 64, 0)
          video_out_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 #else
@ -198,7 +202,7 @@ bool VideoStore::open() {
        // When encoding, we are going to use the timestamp values instead of packet pts/dts
        video_out_ctx->time_base = AV_TIME_BASE_Q;
        video_out_ctx->codec_id = codec_data[i].codec_id;
-        video_out_ctx->pix_fmt = codec_data[i].pix_fmt;
+        video_out_ctx->pix_fmt = codec_data[i].hw_pix_fmt;
        video_out_ctx->level = 32;

        // Don't have an input stream, so need to tell it what we are sending it, or are transcoding
@ -220,21 +224,53 @@ bool VideoStore::open() {
          video_out_ctx->mb_decision = 2;
        }

+        if (codec_data[i].hwdevice_type != AV_HWDEVICE_TYPE_NONE) {
+          ret = av_hwdevice_ctx_create(&hw_device_ctx,
+              codec_data[i].hwdevice_type,
+              //AV_HWDEVICE_TYPE_VAAPI,
+              NULL, NULL, 0);
+
+          AVBufferRef *hw_frames_ref;
+          AVHWFramesContext *frames_ctx = NULL;
+
+          if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
+            Error("Failed to create hwaccel frame context.");
+            return -1;
+          }
+          frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
+          frames_ctx->format    = codec_data[i].hw_pix_fmt;
+          frames_ctx->sw_format = codec_data[i].sw_pix_fmt;
+          frames_ctx->width     = monitor->Width();
+          frames_ctx->height    = monitor->Height();
+          frames_ctx->initial_pool_size = 20;
+          if ((ret = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
+            Error("Failed to initialize hwaccel frame context."
+                "Error code: %s",av_err2str(ret));
+            av_buffer_unref(&hw_frames_ref);
+          } else {
+            video_out_ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
+            if (!video_out_ctx->hw_frames_ctx) {
+              Error("Failed to allocate hw_frames_ctx");
+            }
+          }
+          av_buffer_unref(&hw_frames_ref);
+        }
+
        AVDictionary *opts = 0;
        std::string Options = monitor->GetEncoderOptions();
        Debug(2, "Options? %s", Options.c_str());
        ret = av_dict_parse_string(&opts, Options.c_str(), "=", ",#\n", 0);
-        if ( ret < 0 ) {
+        if (ret < 0) {
          Warning("Could not parse ffmpeg encoder options list '%s'\n", Options.c_str());
        } else {
          AVDictionaryEntry *e = nullptr;
-          while ( (e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != NULL ) {
+          while ((e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != NULL) {
            Debug(3, "Encoder Option %s=%s", e->key, e->value);
          }
        }

-        if ( (ret = avcodec_open2(video_out_ctx, video_out_codec, &opts)) < 0 ) {
-          if ( wanted_encoder != "" and wanted_encoder != "auto" ) {
+        if ((ret = avcodec_open2(video_out_ctx, video_out_codec, &opts)) < 0) {
+          if (wanted_encoder != "" and wanted_encoder != "auto") {
            Warning("Can't open video codec (%s) %s",
                video_out_codec->name,
                av_make_error_string(ret).c_str()
@ -248,27 +284,26 @@ bool VideoStore::open() {
          video_out_codec = nullptr;
        }

+        Debug(1, "Success");
        AVDictionaryEntry *e = nullptr;
-        while ( (e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != nullptr ) {
+        while ((e = av_dict_get(opts, "", e, AV_DICT_IGNORE_SUFFIX)) != nullptr) {
          Warning("Encoder Option %s not recognized by ffmpeg codec", e->key);
        }
-        //av_dict_free(&opts);
-        if ( video_out_codec ) break;
+        if (video_out_codec) break;
        avcodec_free_context(&video_out_ctx);
-      } // end foreach codec
+        if (hw_device_ctx) av_buffer_unref(&hw_device_ctx);
+      }  // end foreach codec

-      if ( !video_out_codec ) {
+      if (!video_out_codec) {
        Error("Can't open video codec!");
 #if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
        // We allocate and copy in newer ffmpeg, so need to free it
        avcodec_free_context(&video_out_ctx);
 #endif
-        //video_out_ctx = nullptr;
-
        return false;
-      } // end if can't open codec
+      }  // end if can't open codec
      Debug(2, "Success opening codec");
-    } // end if copying or transcoding
+    }  // end if copying or transcoding
    zm_dump_codec(video_out_ctx);
  }  // end if video_in_stream

@ -958,18 +993,18 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
  frame_count += 1;

  // if we have to transcode
-  if ( monitor->GetOptVideoWriter() == Monitor::ENCODE ) {
+  if (monitor->GetOptVideoWriter() == Monitor::ENCODE) {
    Debug(3, "Have encoding video frame count (%d)", frame_count);

-    if ( !zm_packet->out_frame ) {
+    if (!zm_packet->out_frame) {
      Debug(3, "Have no out frame");
-      AVFrame *out_frame = zm_packet->get_out_frame(video_out_ctx);
-      if ( !out_frame ) {
+      AVFrame *out_frame = zm_packet->get_out_frame(video_out_ctx->width, video_out_ctx->height, chosen_codec_data->sw_pix_fmt);
+      if (!out_frame) {
        Error("Unable to allocate a frame");
        return 0;
      }

-      if ( zm_packet->image ) {
+      if (zm_packet->image) {
        Debug(2, "Have an image, convert it");
        //Go straight to out frame
        swscale.Convert(
@ -977,7 +1012,7 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
            zm_packet->buffer,
            zm_packet->codec_imgsize,
            zm_packet->image->AVPixFormat(),
-            video_out_ctx->pix_fmt,
+            chosen_codec_data->sw_pix_fmt,
            video_out_ctx->width,
            video_out_ctx->height
            );
@ -1003,6 +1038,32 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
      } // end if no in_frame
    } // end if no out_frame

+    AVFrame *frame = zm_packet->out_frame;
+
+    if (video_out_ctx->hw_frames_ctx) {
+      if (!(hw_frame = av_frame_alloc())) {
+        ret = AVERROR(ENOMEM);
+        return ret;
+      }
+      if ((ret = av_hwframe_get_buffer(video_out_ctx->hw_frames_ctx, hw_frame, 0)) < 0) {
+        Error("Error code: %s", av_err2str(ret));
+        av_frame_free(&hw_frame);
+        return ret;
+      }
+      if (!hw_frame->hw_frames_ctx) {
+        Error("Outof ram!");
+        av_frame_free(&hw_frame);
+        return 0;
+      }
+      if ((ret = av_hwframe_transfer_data(hw_frame, zm_packet->out_frame, 0)) < 0) {
+        Error("Error while transferring frame data to surface: %s.", av_err2str(ret));
+        av_frame_free(&hw_frame);
+        return ret;
+      }
+
+      frame = hw_frame;
+    }  // end if hwaccel
+
    //zm_packet->out_frame->coded_picture_number = frame_count;
    //zm_packet->out_frame->display_picture_number = frame_count;
    //zm_packet->out_frame->sample_aspect_ratio = (AVRational){ 0, 1 };
@ -1010,7 +1071,7 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
    //zm_packet->out_frame->pict_type = AV_PICTURE_TYPE_NONE;
    //zm_packet->out_frame->key_frame = zm_packet->keyframe;
 #if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
-    zm_packet->out_frame->pkt_duration = 0;
+    frame->pkt_duration = 0;
 #endif

    int64_t in_pts = zm_packet->timestamp->tv_sec * (uint64_t)1000000 + zm_packet->timestamp->tv_usec;
@ -1020,14 +1081,14 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
            video_first_pts,
            static_cast<int64>(zm_packet->timestamp->tv_sec),
            static_cast<int64>(zm_packet->timestamp->tv_usec));
-      zm_packet->out_frame->pts = 0;
+      frame->pts = 0;
    } else {
      uint64_t useconds = in_pts - video_first_pts;
-      zm_packet->out_frame->pts = av_rescale_q(useconds, AV_TIME_BASE_Q, video_out_ctx->time_base);
+      frame->pts = av_rescale_q(useconds, AV_TIME_BASE_Q, video_out_ctx->time_base);
      Debug(2,
            "Setting pts for frame(%d) to (%" PRId64 ") from (start %" PRIu64 " - %" PRIu64 " - secs(%" PRIi64 ") usecs(%" PRIi64 ") @ %d/%d",
            frame_count,
-            zm_packet->out_frame->pts,
+            frame->pts,
            video_first_pts,
            useconds,
            static_cast<int64>(zm_packet->timestamp->tv_sec),
@ -1040,9 +1101,9 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {
    opkt.data = nullptr;
    opkt.size = 0;

-    ret = zm_send_frame_receive_packet(video_out_ctx, zm_packet->out_frame, opkt);
-    if ( ret <= 0 ) {
-      if ( ret < 0 ) {
+    ret = zm_send_frame_receive_packet(video_out_ctx, frame, opkt);
+    if (ret <= 0) {
+      if (ret < 0) {
        Error("Could not send frame (error '%s')", av_make_error_string(ret).c_str());
      }
      return ret;
@ -1132,6 +1193,7 @@ int VideoStore::writeVideoFramePacket(ZMPacket *zm_packet) {

  write_packet(&opkt, video_out_stream);
  zm_av_packet_unref(&opkt);
+  if (hw_frame) av_frame_free(&hw_frame);

  return 1;
 }  // end int VideoStore::writeVideoFramePacket( AVPacket *ipkt )
--- a/src/zm_videostore.h
+++ b/src/zm_videostore.h
@ -30,89 +30,95 @@ class VideoStore {
      const AVCodecID codec_id;
      const char *codec_codec;
      const char *codec_name;
-      const enum AVPixelFormat pix_fmt;
+      const enum AVPixelFormat sw_pix_fmt;
+      const enum AVPixelFormat hw_pix_fmt;
+      const AVHWDeviceType hwdevice_type;
    };

    static struct CodecData codec_data[];
+    CodecData *chosen_codec_data;

-  Monitor *monitor;
-	AVOutputFormat *out_format;
-	AVFormatContext *oc;
-	AVStream *video_out_stream;
-	AVStream *audio_out_stream;
+    Monitor *monitor;
+    AVOutputFormat *out_format;
+    AVFormatContext *oc;
+    AVStream *video_out_stream;
+    AVStream *audio_out_stream;

-  AVCodec *video_out_codec;
-  AVCodecContext *video_in_ctx;
-  AVCodecContext *video_out_ctx;
+    AVCodec *video_out_codec;
+    AVCodecContext *video_in_ctx;
+    AVCodecContext *video_out_ctx;

-  AVStream *video_in_stream;
-  AVStream *audio_in_stream;
+    AVStream *video_in_stream;
+    AVStream *audio_in_stream;

-  const AVCodec *audio_in_codec;
-  AVCodecContext *audio_in_ctx;
-  // The following are used when encoding the audio stream to AAC
-  AVCodec *audio_out_codec;
-  AVCodecContext *audio_out_ctx;
-  // Move this into the object so that we aren't constantly allocating/deallocating it on the stack
-  AVPacket opkt;
-  // we are transcoding
-  AVFrame *video_in_frame;
-  AVFrame *in_frame;
-  AVFrame *out_frame;
+    const AVCodec *audio_in_codec;
+    AVCodecContext *audio_in_ctx;
+    // The following are used when encoding the audio stream to AAC
+    AVCodec *audio_out_codec;
+    AVCodecContext *audio_out_ctx;
+    // Move this into the object so that we aren't constantly allocating/deallocating it on the stack
+    AVPacket opkt;
+    // we are transcoding
+    AVFrame *video_in_frame;
+    AVFrame *in_frame;
+    AVFrame *out_frame;
+    AVFrame *hw_frame;

-  SWScale swscale;
-  unsigned int packets_written;
-  unsigned int frame_count;
+    SWScale swscale;
+    unsigned int packets_written;
+    unsigned int frame_count;
+
+    AVBufferRef *hw_device_ctx;

 #ifdef HAVE_LIBSWRESAMPLE
-  SwrContext *resample_ctx;
-  AVAudioFifo *fifo;
+    SwrContext *resample_ctx;
+    AVAudioFifo *fifo;
 #else
 #ifdef HAVE_LIBAVRESAMPLE
-  AVAudioResampleContext* resample_ctx;
+    AVAudioResampleContext* resample_ctx;
 #endif
 #endif
-  uint8_t *converted_in_samples;
-    
-	const char *filename;
-	const char *format;
-    
-  // These are for in
-  int64_t video_first_pts;
-  int64_t video_first_dts;
-  int64_t audio_first_pts;
-  int64_t audio_first_dts;
-  int64_t video_last_pts;
-  int64_t audio_last_pts;
+    uint8_t *converted_in_samples;

-  // These are for out, should start at zero.  We assume they do not wrap because we just aren't going to save files that big.
-  int64_t *next_dts;
-  int64_t audio_next_pts;
+    const char *filename;
+    const char *format;

-  int max_stream_index;
+    // These are for in
+    int64_t video_first_pts;
+    int64_t video_first_dts;
+    int64_t audio_first_pts;
+    int64_t audio_first_dts;
+    int64_t video_last_pts;
+    int64_t audio_last_pts;

-  bool setup_resampler();
-  int write_packet(AVPacket *pkt, AVStream *stream);
+    // These are for out, should start at zero.  We assume they do not wrap because we just aren't going to save files that big.
+    int64_t *next_dts;
+    int64_t audio_next_pts;

-public:
-	VideoStore(
-      const char *filename_in,
-      const char *format_in,
-      AVStream *video_in_stream,
-      AVCodecContext  *video_in_ctx,
-      AVStream *audio_in_stream,
-      AVCodecContext  *audio_in_ctx,
-      Monitor * p_monitor);
-	~VideoStore();
-  bool  open();
+    int max_stream_index;

-  void write_video_packet(AVPacket &pkt);
-  void write_audio_packet(AVPacket &pkt);
-  int writeVideoFramePacket(ZMPacket *pkt);
-  int writeAudioFramePacket(ZMPacket *pkt);
-  int writePacket(ZMPacket *pkt);
-  int write_packets(PacketQueue &queue);
-  void flush_codecs();
+    bool setup_resampler();
+    int write_packet(AVPacket *pkt, AVStream *stream);
+
+  public:
+    VideoStore(
+        const char *filename_in,
+        const char *format_in,
+        AVStream *video_in_stream,
+        AVCodecContext  *video_in_ctx,
+        AVStream *audio_in_stream,
+        AVCodecContext  *audio_in_ctx,
+        Monitor * p_monitor);
+    ~VideoStore();
+    bool  open();
+
+    void write_video_packet(AVPacket &pkt);
+    void write_audio_packet(AVPacket &pkt);
+    int writeVideoFramePacket(ZMPacket *pkt);
+    int writeAudioFramePacket(ZMPacket *pkt);
+    int writePacket(ZMPacket *pkt);
+    int write_packets(PacketQueue &queue);
+    void flush_codecs();
 };

 #endif //havelibav