From 9e1f245708a157231c427c0ef9b91729d59a30e1 Mon Sep 17 00:00:00 2001
From: John Stebbins <jstebbins.hb@gmail.com>
Date: Tue, 28 Nov 2017 08:22:54 -0800
Subject: [PATCH] add sdtp (sample dependency) box to mp4 (#1006)

* add sdtp (sample dependency) box to mp4

The AppleTV 4K requires this box in order to play 2160p60 video.
---
 contrib/ffmpeg/A21-mp4-sdtp.patch | 126 ++++++++++++++++++++++++++++++++++++++
 libhb/encavcodec.c                |   8 ++-
 libhb/encx264.c                   |   7 +--
 libhb/encx265.c                   |   3 +
 libhb/muxavformat.c               |   4 ++
 5 files changed, 140 insertions(+), 8 deletions(-)
 create mode 100644 contrib/ffmpeg/A21-mp4-sdtp.patch

diff --git a/contrib/ffmpeg/A21-mp4-sdtp.patch b/contrib/ffmpeg/A21-mp4-sdtp.patch
new file mode 100644
index 000000000..5d2221c47
--- /dev/null
+++ b/contrib/ffmpeg/A21-mp4-sdtp.patch
@@ -0,0 +1,126 @@
+From c2204c579c8411771b53a4bc66324c7c5d07698a Mon Sep 17 00:00:00 2001
+From: John Stebbins <stebbins@jetheaddev.com>
+Date: Thu, 16 Nov 2017 14:40:12 -0800
+Subject: [PATCH] movenc: add sdtp (sample dependency) box to mp4
+
+The AppleTV 4K requires this box in order to play 2160p60 video.
+---
+ libavcodec/avcodec.h |  9 +++++++--
+ libavformat/movenc.c | 34 +++++++++++++++++++++++++++++++++-
+ libavformat/movenc.h |  6 ++++++
+ 3 files changed, 46 insertions(+), 3 deletions(-)
+
+diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
+index 5624835023..48e9d23679 100644
+--- a/libavcodec/avcodec.h
++++ b/libavcodec/avcodec.h
+@@ -1161,8 +1161,13 @@ typedef struct AVPacket {
+     int64_t convergence_duration;
+ #endif
+ } AVPacket;
+-#define AV_PKT_FLAG_KEY     0x0001 ///< The packet contains a keyframe
+-#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted
++#define AV_PKT_FLAG_KEY         0x0001 ///< The packet contains a keyframe
++#define AV_PKT_FLAG_CORRUPT     0x0002 ///< The packet content is corrupted
++/**
++ * Flag is used to indicate packets that contain frames that can
++ * be discarded by the decoder.  I.e. Non-reference frames.
++ */
++#define AV_PKT_FLAG_DISPOSABLE  0x0004
+ 
+ enum AVSideDataParamChangeFlags {
+     AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT  = 0x0001,
+diff --git a/libavformat/movenc.c b/libavformat/movenc.c
+index a546fdfead..d263f444f4 100644
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -227,6 +227,30 @@ static int mov_write_stss_tag(AVIOContext *pb, MOVTrack *track, uint32_t flag)
+     return update_size(pb, pos);
+ }
+ 
++/* Sample dependency atom */
++static int mov_write_sdtp_tag(AVIOContext *pb, MOVTrack *track)
++{
++    int i;
++    uint8_t leading, dependent, reference, redundancy;
++    int64_t pos = avio_tell(pb);
++    avio_wb32(pb, 0); // size
++    ffio_wfourcc(pb, "sdtp");
++    avio_wb32(pb, 0); // version & flags
++    for (i = 0; i < track->entry; i++) {
++        dependent = MOV_SAMPLE_DEPENDENCY_YES;
++        leading = reference = redundancy = MOV_SAMPLE_DEPENDENCY_UNKNOWN;
++        if (track->cluster[i].flags & MOV_DISPOSABLE_SAMPLE) {
++            reference = MOV_SAMPLE_DEPENDENCY_NO;
++        }
++        if (track->cluster[i].flags & MOV_SYNC_SAMPLE) {
++            dependent = MOV_SAMPLE_DEPENDENCY_NO;
++        }
++        avio_w8(pb, (leading << 6)   | (dependent << 4) |
++                    (reference << 2) | redundancy);
++    }
++    return update_size(pb, pos);
++}
++
+ static int mov_write_amr_tag(AVIOContext *pb, MOVTrack *track)
+ {
+     avio_wb32(pb, 0x11); /* size */
+@@ -1211,8 +1235,12 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
+     mov_write_stts_tag(pb, track);
+     if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
+          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
+-        track->has_keyframes && track->has_keyframes < track->entry)
++        track->has_keyframes && track->has_keyframes < track->entry) {
+         mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
++    }
++    if (track->par->codec_type == AVMEDIA_TYPE_VIDEO && track->has_disposable) {
++        mov_write_sdtp_tag(pb, track);
++    }
+     if (track->mode == MODE_MOV && track->flags & MOV_TRACK_STPS)
+         mov_write_stss_tag(pb, track, MOV_PARTIAL_SYNC_SAMPLE);
+     if (track->par->codec_type == AVMEDIA_TYPE_VIDEO &&
+@@ -3647,6 +3675,10 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
+         if (trk->cluster[trk->entry].flags & MOV_SYNC_SAMPLE)
+             trk->has_keyframes++;
+     }
++    if (pkt->flags & AV_PKT_FLAG_DISPOSABLE) {
++        trk->cluster[trk->entry].flags |= MOV_DISPOSABLE_SAMPLE;
++        trk->has_disposable++;
++    }
+     trk->entry++;
+     trk->sample_count += samples_in_chunk;
+     mov->mdat_size    += size;
+diff --git a/libavformat/movenc.h b/libavformat/movenc.h
+index 008f4671c6..f4eb4e8078 100644
+--- a/libavformat/movenc.h
++++ b/libavformat/movenc.h
+@@ -51,6 +51,7 @@ typedef struct MOVIentry {
+     int          cts;
+ #define MOV_SYNC_SAMPLE         0x0001
+ #define MOV_PARTIAL_SYNC_SAMPLE 0x0002
++#define MOV_DISPOSABLE_SAMPLE   0x0004
+     uint32_t     flags;
+ } MOVIentry;
+ 
+@@ -85,6 +86,7 @@ typedef struct MOVTrack {
+     long        sample_count;
+     long        sample_size;
+     int         has_keyframes;
++    int         has_disposable;
+ #define MOV_TRACK_CTTS         0x0001
+ #define MOV_TRACK_STPS         0x0002
+ #define MOV_TRACK_ENABLED      0x0004
+@@ -180,6 +182,10 @@ typedef struct MOVMuxContext {
+     int missing_duration_warned;
+ } MOVMuxContext;
+ 
++#define MOV_SAMPLE_DEPENDENCY_UNKNOWN 0x0
++#define MOV_SAMPLE_DEPENDENCY_YES     0x1
++#define MOV_SAMPLE_DEPENDENCY_NO      0x2
++
+ #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
+ #define FF_MOV_FLAG_FRAGMENT              (1 <<  1)
+ #define FF_MOV_FLAG_EMPTY_MOOV            (1 <<  2)
+-- 
+2.13.6
+
diff --git a/libhb/encavcodec.c b/libhb/encavcodec.c
index 59d491148..9960a21b4 100644
--- a/libhb/encavcodec.c
+++ b/libhb/encavcodec.c
@@ -532,12 +532,14 @@ static void get_packets( hb_work_object_t * w, hb_buffer_list_t * list )
         out->s.duration = get_frame_duration(pv, frameno);
         out->s.stop     = out->s.stop + out->s.duration;
         // libav 12 deprecated context->coded_frame, so we can't determine
-        // the exact frame type any more.  Luckily for us, we really don't
-        // require it.
+        // the exact frame type any more. So until I can completely
+        // wire up ffmpeg with AV_PKT_DISPOSABLE_FRAME, all frames
+        // must be considered to potentially be reference frames
+        out->s.flags     = HB_FLAG_FRAMETYPE_REF;
         out->s.frametype = 0;
         if (pkt.flags & AV_PKT_FLAG_KEY)
         {
-            out->s.flags = HB_FLAG_FRAMETYPE_REF | HB_FLAG_FRAMETYPE_KEY;
+            out->s.flags |= HB_FLAG_FRAMETYPE_KEY;
             hb_chapter_dequeue(pv->chapter_queue, out);
         }
         out = process_delay_list(pv, out);
diff --git a/libhb/encx264.c b/libhb/encx264.c
index ce7879ec7..26645213a 100644
--- a/libhb/encx264.c
+++ b/libhb/encx264.c
@@ -701,6 +701,7 @@ static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
              frames we only get the duration of the first which will
              eventually screw up the muxer & decoder. */
     int i;
+    buf->s.flags &= ~HB_FLAG_FRAMETYPE_REF;
     for( i = 0; i < i_nal; i++ )
     {
         int size = nal[i].i_payload;
@@ -737,11 +738,7 @@ static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
          * Also, since libx264 doesn't tell us when B-frames are
          * themselves reference frames, figure it out on our own.
          */
-        if (nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE)
-        {
-            buf->s.flags &= ~HB_FLAG_FRAMETYPE_REF;
-        }
-        else
+        if (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE)
         {
             if (buf->s.frametype == HB_FRAME_B)
             {
diff --git a/libhb/encx265.c b/libhb/encx265.c
index ec7b7d574..242dc6f1f 100644
--- a/libhb/encx265.c
+++ b/libhb/encx265.c
@@ -425,16 +425,19 @@ static hb_buffer_t* nal_encode(hb_work_object_t *w,
             buf->s.frametype = HB_FRAME_IDR;
             break;
         case X265_TYPE_P:
+            buf->s.flags |= HB_FLAG_FRAMETYPE_REF;
             buf->s.frametype = HB_FRAME_P;
             break;
         case X265_TYPE_B:
             buf->s.frametype = HB_FRAME_B;
             break;
         case X265_TYPE_BREF:
+            buf->s.flags |= HB_FLAG_FRAMETYPE_REF;
             buf->s.frametype = HB_FRAME_BREF;
             break;
         case X265_TYPE_I:
         default:
+            buf->s.flags |= HB_FLAG_FRAMETYPE_REF;
             buf->s.frametype = HB_FRAME_I;
             break;
     }
diff --git a/libhb/muxavformat.c b/libhb/muxavformat.c
index f85336dfe..62a270d5b 100644
--- a/libhb/muxavformat.c
+++ b/libhb/muxavformat.c
@@ -1197,6 +1197,10 @@ static int avformatMux(hb_mux_object_t *m, hb_mux_data_t *track, hb_buffer_t *bu
         {
             pkt.flags |= AV_PKT_FLAG_KEY;
         }
+        if (!(buf->s.flags & HB_FLAG_FRAMETYPE_REF))
+        {
+            pkt.flags |= AV_PKT_FLAG_DISPOSABLE;
+        }
     }
     else if (buf->s.frametype & HB_FRAME_MASK_KEY)
     {