From 9e1f245708a157231c427c0ef9b91729d59a30e1 Mon Sep 17 00:00:00 2001 From: John Stebbins Date: Tue, 28 Nov 2017 08:22:54 -0800 Subject: [PATCH] add sdtp (sample dependency) box to mp4 (#1006) * add sdtp (sample dependency) box to mp4 The AppleTV 4K requires this box in order to play 2160p60 video. --- contrib/ffmpeg/A21-mp4-sdtp.patch | 126 ++++++++++++++++++++++++++++++++++++++ libhb/encavcodec.c | 8 ++- libhb/encx264.c | 7 +-- libhb/encx265.c | 3 + libhb/muxavformat.c | 4 ++ 5 files changed, 140 insertions(+), 8 deletions(-) create mode 100644 contrib/ffmpeg/A21-mp4-sdtp.patch diff --git a/contrib/ffmpeg/A21-mp4-sdtp.patch b/contrib/ffmpeg/A21-mp4-sdtp.patch new file mode 100644 index 000000000..5d2221c47 --- /dev/null +++ b/contrib/ffmpeg/A21-mp4-sdtp.patch @@ -0,0 +1,126 @@ +From c2204c579c8411771b53a4bc66324c7c5d07698a Mon Sep 17 00:00:00 2001 +From: John Stebbins +Date: Thu, 16 Nov 2017 14:40:12 -0800 +Subject: [PATCH] movenc: add sdtp (sample dependency) box to mp4 + +The AppleTV 4K requires this box in order to play 2160p60 video. +--- + libavcodec/avcodec.h | 9 +++++++-- + libavformat/movenc.c | 34 +++++++++++++++++++++++++++++++++- + libavformat/movenc.h | 6 ++++++ + 3 files changed, 46 insertions(+), 3 deletions(-) + +diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h +index 5624835023..48e9d23679 100644 +--- a/libavcodec/avcodec.h ++++ b/libavcodec/avcodec.h +@@ -1161,8 +1161,13 @@ typedef struct AVPacket { + int64_t convergence_duration; + #endif + } AVPacket; +-#define AV_PKT_FLAG_KEY 0x0001 ///< The packet contains a keyframe +-#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted ++#define AV_PKT_FLAG_KEY 0x0001 ///< The packet contains a keyframe ++#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted ++/** ++ * Flag is used to indicate packets that contain frames that can ++ * be discarded by the decoder. I.e. Non-reference frames. ++ */ ++#define AV_PKT_FLAG_DISPOSABLE 0x0004 + + enum AVSideDataParamChangeFlags { + AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT = 0x0001, +diff --git a/libavformat/movenc.c b/libavformat/movenc.c +index a546fdfead..d263f444f4 100644 +--- a/libavformat/movenc.c ++++ b/libavformat/movenc.c +@@ -227,6 +227,30 @@ static int mov_write_stss_tag(AVIOContext *pb, MOVTrack *track, uint32_t flag) + return update_size(pb, pos); + } + ++/* Sample dependency atom */ ++static int mov_write_sdtp_tag(AVIOContext *pb, MOVTrack *track) ++{ ++ int i; ++ uint8_t leading, dependent, reference, redundancy; ++ int64_t pos = avio_tell(pb); ++ avio_wb32(pb, 0); // size ++ ffio_wfourcc(pb, "sdtp"); ++ avio_wb32(pb, 0); // version & flags ++ for (i = 0; i < track->entry; i++) { ++ dependent = MOV_SAMPLE_DEPENDENCY_YES; ++ leading = reference = redundancy = MOV_SAMPLE_DEPENDENCY_UNKNOWN; ++ if (track->cluster[i].flags & MOV_DISPOSABLE_SAMPLE) { ++ reference = MOV_SAMPLE_DEPENDENCY_NO; ++ } ++ if (track->cluster[i].flags & MOV_SYNC_SAMPLE) { ++ dependent = MOV_SAMPLE_DEPENDENCY_NO; ++ } ++ avio_w8(pb, (leading << 6) | (dependent << 4) | ++ (reference << 2) | redundancy); ++ } ++ return update_size(pb, pos); ++} ++ + static int mov_write_amr_tag(AVIOContext *pb, MOVTrack *track) + { + avio_wb32(pb, 0x11); /* size */ +@@ -1211,8 +1235,12 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra + mov_write_stts_tag(pb, track); + if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO || + track->par->codec_tag == MKTAG('r','t','p',' ')) && +- track->has_keyframes && track->has_keyframes < track->entry) ++ track->has_keyframes && track->has_keyframes < track->entry) { + mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE); ++ } ++ if (track->par->codec_type == AVMEDIA_TYPE_VIDEO && track->has_disposable) { ++ mov_write_sdtp_tag(pb, track); ++ } + if (track->mode == MODE_MOV && track->flags & MOV_TRACK_STPS) + mov_write_stss_tag(pb, track, MOV_PARTIAL_SYNC_SAMPLE); + if (track->par->codec_type == AVMEDIA_TYPE_VIDEO && +@@ -3647,6 +3675,10 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) + if (trk->cluster[trk->entry].flags & MOV_SYNC_SAMPLE) + trk->has_keyframes++; + } ++ if (pkt->flags & AV_PKT_FLAG_DISPOSABLE) { ++ trk->cluster[trk->entry].flags |= MOV_DISPOSABLE_SAMPLE; ++ trk->has_disposable++; ++ } + trk->entry++; + trk->sample_count += samples_in_chunk; + mov->mdat_size += size; +diff --git a/libavformat/movenc.h b/libavformat/movenc.h +index 008f4671c6..f4eb4e8078 100644 +--- a/libavformat/movenc.h ++++ b/libavformat/movenc.h +@@ -51,6 +51,7 @@ typedef struct MOVIentry { + int cts; + #define MOV_SYNC_SAMPLE 0x0001 + #define MOV_PARTIAL_SYNC_SAMPLE 0x0002 ++#define MOV_DISPOSABLE_SAMPLE 0x0004 + uint32_t flags; + } MOVIentry; + +@@ -85,6 +86,7 @@ typedef struct MOVTrack { + long sample_count; + long sample_size; + int has_keyframes; ++ int has_disposable; + #define MOV_TRACK_CTTS 0x0001 + #define MOV_TRACK_STPS 0x0002 + #define MOV_TRACK_ENABLED 0x0004 +@@ -180,6 +182,10 @@ typedef struct MOVMuxContext { + int missing_duration_warned; + } MOVMuxContext; + ++#define MOV_SAMPLE_DEPENDENCY_UNKNOWN 0x0 ++#define MOV_SAMPLE_DEPENDENCY_YES 0x1 ++#define MOV_SAMPLE_DEPENDENCY_NO 0x2 ++ + #define FF_MOV_FLAG_RTP_HINT (1 << 0) + #define FF_MOV_FLAG_FRAGMENT (1 << 1) + #define FF_MOV_FLAG_EMPTY_MOOV (1 << 2) +-- +2.13.6 + diff --git a/libhb/encavcodec.c b/libhb/encavcodec.c index 59d491148..9960a21b4 100644 --- a/libhb/encavcodec.c +++ b/libhb/encavcodec.c @@ -532,12 +532,14 @@ static void get_packets( hb_work_object_t * w, hb_buffer_list_t * list ) out->s.duration = get_frame_duration(pv, frameno); out->s.stop = out->s.stop + out->s.duration; // libav 12 deprecated context->coded_frame, so we can't determine - // the exact frame type any more. Luckily for us, we really don't - // require it. + // the exact frame type any more. So until I can completely + // wire up ffmpeg with AV_PKT_DISPOSABLE_FRAME, all frames + // must be considered to potentially be reference frames + out->s.flags = HB_FLAG_FRAMETYPE_REF; out->s.frametype = 0; if (pkt.flags & AV_PKT_FLAG_KEY) { - out->s.flags = HB_FLAG_FRAMETYPE_REF | HB_FLAG_FRAMETYPE_KEY; + out->s.flags |= HB_FLAG_FRAMETYPE_KEY; hb_chapter_dequeue(pv->chapter_queue, out); } out = process_delay_list(pv, out); diff --git a/libhb/encx264.c b/libhb/encx264.c index ce7879ec7..26645213a 100644 --- a/libhb/encx264.c +++ b/libhb/encx264.c @@ -701,6 +701,7 @@ static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out, frames we only get the duration of the first which will eventually screw up the muxer & decoder. */ int i; + buf->s.flags &= ~HB_FLAG_FRAMETYPE_REF; for( i = 0; i < i_nal; i++ ) { int size = nal[i].i_payload; @@ -737,11 +738,7 @@ static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out, * Also, since libx264 doesn't tell us when B-frames are * themselves reference frames, figure it out on our own. */ - if (nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE) - { - buf->s.flags &= ~HB_FLAG_FRAMETYPE_REF; - } - else + if (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) { if (buf->s.frametype == HB_FRAME_B) { diff --git a/libhb/encx265.c b/libhb/encx265.c index ec7b7d574..242dc6f1f 100644 --- a/libhb/encx265.c +++ b/libhb/encx265.c @@ -425,16 +425,19 @@ static hb_buffer_t* nal_encode(hb_work_object_t *w, buf->s.frametype = HB_FRAME_IDR; break; case X265_TYPE_P: + buf->s.flags |= HB_FLAG_FRAMETYPE_REF; buf->s.frametype = HB_FRAME_P; break; case X265_TYPE_B: buf->s.frametype = HB_FRAME_B; break; case X265_TYPE_BREF: + buf->s.flags |= HB_FLAG_FRAMETYPE_REF; buf->s.frametype = HB_FRAME_BREF; break; case X265_TYPE_I: default: + buf->s.flags |= HB_FLAG_FRAMETYPE_REF; buf->s.frametype = HB_FRAME_I; break; } diff --git a/libhb/muxavformat.c b/libhb/muxavformat.c index f85336dfe..62a270d5b 100644 --- a/libhb/muxavformat.c +++ b/libhb/muxavformat.c @@ -1197,6 +1197,10 @@ static int avformatMux(hb_mux_object_t *m, hb_mux_data_t *track, hb_buffer_t *bu { pkt.flags |= AV_PKT_FLAG_KEY; } + if (!(buf->s.flags & HB_FLAG_FRAMETYPE_REF)) + { + pkt.flags |= AV_PKT_FLAG_DISPOSABLE; + } } else if (buf->s.frametype & HB_FRAME_MASK_KEY) {