Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: This patch adds support for DTS:X Profile 2 audio in MP4 files. #1303

Merged
merged 8 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions packager/app/test/packager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,13 @@ def testAacHe(self):
self._GetFlags(output_dash=True))
self._CheckTestResults('acc-he')

def testDtsx(self):
self.assertPackageSuccess(
self._GetStreams(
['audio'], test_files=['bear-dtsx.mp4']),
self._GetFlags(output_dash=True))
self._CheckTestResults('dtsx-dash')

def testVideoAudioWebVTT(self):
audio_video_streams = self._GetStreams(['audio', 'video'])
text_stream = self._GetStreams(['text'], test_files=['bear-english.vtt'])
Expand Down
Binary file not shown.
15 changes: 15 additions & 0 deletions packager/app/test/testdata/dtsx-dash/output.mpd
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT3.114667S">
<Period id="0">
<AdaptationSet id="0" contentType="audio" subsegmentAlignment="true">
<Representation id="0" bandwidth="227665" codecs="dtsx" mimeType="audio/mp4" audioSamplingRate="48000">
<AudioChannelConfiguration schemeIdUri="tag:dts.com,2018:uhd:audio_channel_configuration" value="0000003F"/>
<BaseURL>bear-dtsx-audio.mp4</BaseURL>
<SegmentBase indexRange="742-821" timescale="48000">
<Initialization range="0-741"/>
</SegmentBase>
</Representation>
</AdaptationSet>
</Period>
</MPD>
2 changes: 2 additions & 0 deletions packager/media/base/audio_stream_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ std::string AudioStreamInfo::GetCodecString(Codec codec,
return "dts-";
case kCodecDTSP:
return "dts+";
case kCodecDTSX:
return "dtsx";
case kCodecEAC3:
return "ec-3";
case kCodecAC4:
Expand Down
6 changes: 4 additions & 2 deletions packager/media/base/fourccs.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ enum FourCC : uint32_t {
FOURCC_dtsl = 0x6474736c,
FOURCC_dtsm = 0x6474732d, // "dts-"
FOURCC_dtsp = 0x6474732b, // "dts+"
FOURCC_dtsx = 0x64747378, // "dtsx"
FOURCC_dvcC = 0x64766343,
FOURCC_dvh1 = 0x64766831,
FOURCC_dvhe = 0x64766865,
Expand Down Expand Up @@ -151,8 +152,9 @@ enum FourCC : uint32_t {
FOURCC_trex = 0x74726578,
FOURCC_trun = 0x7472756e,
FOURCC_udta = 0x75647461,
FOURCC_url = 0x75726c20, // "url "
FOURCC_urn = 0x75726e20, // "urn "
FOURCC_udts = 0x75647473, // "udts"
FOURCC_url = 0x75726c20, // "url "
FOURCC_urn = 0x75726e20, // "urn "
FOURCC_uuid = 0x75756964,
FOURCC_vide = 0x76696465,
FOURCC_vlab = 0x766c6162,
Expand Down
1 change: 1 addition & 0 deletions packager/media/base/stream_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ enum Codec {
kCodecDTSL,
kCodecDTSM,
kCodecDTSP,
kCodecDTSX,
kCodecEAC3,
kCodecFlac,
kCodecOpus,
Expand Down
1 change: 1 addition & 0 deletions packager/media/codecs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ add_library(media_codecs STATIC
avc_decoder_configuration_record.cc
decoder_configuration_record.cc
dovi_decoder_configuration_record.cc
dts_audio_specific_config.cc
ec3_audio_util.cc
ac4_audio_util.cc
es_descriptor.cc
Expand Down
28 changes: 28 additions & 0 deletions packager/media/codecs/dts_audio_specific_config.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <packager/media/codecs/dts_audio_specific_config.h>

#include <packager/media/base/bit_reader.h>
#include <packager/media/base/rcheck.h>

namespace shaka {
namespace media {

bool GetDTSXChannelMask(const std::vector<uint8_t>& udts, uint32_t& mask) {
// udts is the DTS-UHD Specific Box: ETSI TS 103 491 V1.2.1 Table B-2
// DecoderProfileCode(6 bits)
// FrameDurationCode(2 bits)
// MaxPayloadCode(3 bits)
// NumPresentationsCode(5 bits)
// ChannelMask (32 bits)
BitReader bit_reader(udts.data(), udts.size());
RCHECK(bit_reader.SkipBits(16));
RCHECK(bit_reader.ReadBits(32, &mask));
return true;
}

} // namespace media
} // namespace shaka
24 changes: 24 additions & 0 deletions packager/media/codecs/dts_audio_specific_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef PACKAGER_MEDIA_CODECS_DTS_AUDIO_SPECIFIC_CONFIG_H_
#define PACKAGER_MEDIA_CODECS_DTS_AUDIO_SPECIFIC_CONFIG_H_

#include <stddef.h>
#include <stdint.h>

#include <vector>

namespace shaka {
namespace media {

class BitReader;

bool GetDTSXChannelMask(const std::vector<uint8_t>& udts, uint32_t& mask);

} // namespace media
} // namespace shaka

#endif // PACKAGER_MEDIA_CODECS_DTS_AUDIO_SPECIFIC_CONFIG_H_
37 changes: 37 additions & 0 deletions packager/media/codecs/dts_audio_specific_config_unittest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <gtest/gtest.h>

#include "packager/media/codecs/dts_audio_specific_config.h"

namespace shaka {
namespace media {

TEST(DTSAudioSpecificConfigTest, BasicProfileTest) {
uint8_t buffer[] = {0x01, 0x20, 0x00, 0x00, 0x0, 0x3F, 0x80, 0x00};
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
uint32_t mask;
EXPECT_TRUE(GetDTSXChannelMask(data, mask));
EXPECT_EQ(0x3F, mask);
}

TEST(DTSAudioSpecificConfigTest, ChannelMaskBytes) {
uint8_t buffer[] = {0x01, 0x20, 0x12, 0x34, 0x56, 0x78, 0x80, 0x00};
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
uint32_t mask;
EXPECT_TRUE(GetDTSXChannelMask(data, mask));
EXPECT_EQ(0x12345678, mask);
}

TEST(DTSAudioSpecificConfigTest, Truncated) {
uint8_t buffer[] = {0x01, 0x20, 0x00, 0x00, 0x00};
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
uint32_t mask;
EXPECT_FALSE(GetDTSXChannelMask(data, mask));
}

} // namespace media
} // namespace shaka
11 changes: 11 additions & 0 deletions packager/media/event/muxer_listener_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <packager/media/base/text_stream_info.h>
#include <packager/media/base/video_stream_info.h>
#include <packager/media/codecs/ac4_audio_util.h>
#include <packager/media/codecs/dts_audio_specific_config.h>
#include <packager/media/codecs/ec3_audio_util.h>
#include <packager/mpd/base/media_info.pb.h>
#include <packager/utils/bytes_to_string_view.h>
Expand Down Expand Up @@ -165,6 +166,16 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info,
codec_data->set_ac4_ims_flag(ac4_ims_flag);
codec_data->set_ac4_cbi_flag(ac4_cbi_flag);
}

if (audio_stream_info->codec() == kCodecDTSX) {
auto* codec_data = audio_info->mutable_codec_specific_data();
uint32_t channel_mask;
if (!GetDTSXChannelMask(codec_config, channel_mask)) {
LOG(ERROR) << "Failed to parse DTSX channel mask.";
return;
}
codec_data->set_channel_mask(channel_mask);
}
}

void AddTextInfo(const TextStreamInfo& text_stream_info,
Expand Down
18 changes: 18 additions & 0 deletions packager/media/event/muxer_listener_internal_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ TEST_F(MuxerListenerInternalVideoStreamTest, TransferCharacteristics) {
EXPECT_EQ(18u, media_info.video_info().transfer_characteristics());
}

class MuxerListenerInternalAudioStreamTest : public MuxerListenerInternalTest {
};

// AddAudioInfo function should parse the channel mask
TEST_F(MuxerListenerInternalAudioStreamTest, DTSX) {
MediaInfo media_info;
std::shared_ptr<AudioStreamInfo> audio_info = CreateAudioStreamInfo(
GetAudioStreamInfoParams(kCodecDTSX, "dtsx",
{0x01, 0x20, 0x00, 0x00, 0x0, 0x3F, 0x80,
0x00})); // Channel mask = 3F
ASSERT_TRUE(GenerateMediaInfo(MuxerOptions(), *audio_info,
kReferenceTimeScale,
MuxerListener::kContainerMp4, &media_info));
MediaInfo_AudioInfo* info = media_info.mutable_audio_info();
auto* codec_data = info->mutable_codec_specific_data();
EXPECT_EQ(0x3F, codec_data->channel_mask());
}

} // namespace internal
} // namespace media
} // namespace shaka
48 changes: 48 additions & 0 deletions packager/media/event/muxer_listener_test_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,53 @@ std::vector<ProtectionSystemSpecificInfo> GetDefaultKeySystemInfo() {
std::end(kExpectedDefaultPsshBox) - 1}}};
}

AudioStreamInfoParameters::AudioStreamInfoParameters() {}
AudioStreamInfoParameters::~AudioStreamInfoParameters() {}

std::shared_ptr<AudioStreamInfo> CreateAudioStreamInfo(
const AudioStreamInfoParameters& param) {
return std::make_shared<AudioStreamInfo>(
param.track_id, param.time_scale, param.duration, param.codec,
param.codec_string, param.codec_config.data(), param.codec_config.size(),
param.sample_bits, param.num_channels, param.sampling_frequency,
param.seek_preroll_ns, param.codec_delay_ns, param.max_bitrate,
param.avg_bitrate, param.language, param.is_encrypted);
}

AudioStreamInfoParameters GetAudioStreamInfoParams(
Codec codec,
const char* codec_string,
const std::vector<uint8_t>& codec_config) {
const int kTrackId = 0;
const int32_t kTimeScale = 10;
const int64_t kAudioStreamDuration = 200;
const char* kLanuageUndefined = "und";
const uint8_t kSampleBits = 16;
const uint8_t kNumChannels = 6;
const uint32_t kSamplingFrequency = 48000;
const uint64_t kSeekPrerollNs = 0;
const uint64_t kCodecDelayNs = 0;
const uint32_t kMaxBitrate = 0;
const uint32_t kAvgBitrate = 0;
const bool kEncryptedFlag = false;
AudioStreamInfoParameters params;
params.track_id = kTrackId;
params.time_scale = kTimeScale;
params.duration = kAudioStreamDuration;
params.codec = codec;
params.codec_string = codec_string;
params.language = kLanuageUndefined;
params.sample_bits = kSampleBits;
params.num_channels = kNumChannels;
params.sampling_frequency = kSamplingFrequency;
params.seek_preroll_ns = kSeekPrerollNs;
params.codec_delay_ns = kCodecDelayNs;
params.max_bitrate = kMaxBitrate;
params.avg_bitrate = kAvgBitrate;
params.codec_config = codec_config;
params.is_encrypted = kEncryptedFlag;
return params;
}

} // namespace media
} // namespace shaka
34 changes: 34 additions & 0 deletions packager/media/event/muxer_listener_test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <cstdint>
#include <vector>

#include "packager/media/base/audio_stream_info.h"
#include <packager/media/base/key_source.h>
#include <packager/media/base/muxer_options.h>
#include <packager/media/base/stream_info.h>
Expand Down Expand Up @@ -93,6 +94,29 @@ struct VideoStreamInfoParameters {
bool is_encrypted;
};

// Struct that gets passed for to CreateAudioStreamInfo() to create a
// StreamInfo instance. Useful for generating multiple AudioStreamInfo with
// slightly different parameters.
struct AudioStreamInfoParameters {
AudioStreamInfoParameters();
~AudioStreamInfoParameters();
int track_id;
int32_t time_scale;
int64_t duration;
Codec codec;
std::string codec_string;
std::vector<uint8_t> codec_config;
uint8_t sample_bits;
uint8_t num_channels;
uint32_t sampling_frequency;
uint64_t seek_preroll_ns;
uint64_t codec_delay_ns;
uint32_t max_bitrate;
uint32_t avg_bitrate;
std::string language;
bool is_encrypted;
};

struct OnNewSegmentParameters {
std::string file_name;
int64_t start_time;
Expand All @@ -113,6 +137,16 @@ std::shared_ptr<VideoStreamInfo> CreateVideoStreamInfo(
// Returns the "default" VideoStreamInfoParameters for testing.
VideoStreamInfoParameters GetDefaultVideoStreamInfoParams();

// Creates StreamInfo instance from AudioStreamInfoParameters.
std::shared_ptr<AudioStreamInfo> CreateAudioStreamInfo(
const AudioStreamInfoParameters& param);

// Returns the "default" configuration for testing given codec and parameters.
AudioStreamInfoParameters GetAudioStreamInfoParams(
Codec codec,
const char* codec_string,
const std::vector<uint8_t>& codec_config);

// Returns the "default" values for OnMediaEnd().
OnMediaEndParameters GetDefaultOnMediaEndParams();

Expand Down
24 changes: 23 additions & 1 deletion packager/media/formats/mp4/box_definitions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,27 @@ size_t DTSSpecific::ComputeSizeInternal() {
sizeof(kDdtsExtraData);
}

UDTSSpecific::UDTSSpecific() = default;
UDTSSpecific::~UDTSSpecific() = default;

FourCC UDTSSpecific::BoxType() const {
return FOURCC_udts;
}

bool UDTSSpecific::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(ReadWriteHeaderInternal(buffer) &&
buffer->ReadWriteVector(
&data, buffer->Reading() ? buffer->BytesLeft() : data.size()));
return true;
}

size_t UDTSSpecific::ComputeSizeInternal() {
// This box is optional. Skip it if not initialized.
if (data.empty())
return 0;
return HeaderSize() + data.size();
}

AC3Specific::AC3Specific() = default;
AC3Specific::~AC3Specific() = default;

Expand Down Expand Up @@ -1983,6 +2004,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) {

RCHECK(buffer->TryReadWriteChild(&esds));
RCHECK(buffer->TryReadWriteChild(&ddts));
RCHECK(buffer->TryReadWriteChild(&udts));
RCHECK(buffer->TryReadWriteChild(&dac3));
RCHECK(buffer->TryReadWriteChild(&dec3));
RCHECK(buffer->TryReadWriteChild(&dac4));
Expand Down Expand Up @@ -2014,7 +2036,7 @@ size_t AudioSampleEntry::ComputeSizeInternal() {
sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() +
esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() +
dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() +
dac4.ComputeSize() + mhac.ComputeSize() +
dac4.ComputeSize() + mhac.ComputeSize() + udts.ComputeSize() +
// Reserved and predefined bytes.
6 + 8 + // 6 + 8 bytes reserved.
4; // 4 bytes predefined.
Expand Down
7 changes: 7 additions & 0 deletions packager/media/formats/mp4/box_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,12 @@ struct DTSSpecific : Box {
std::vector<uint8_t> extra_data;
};

struct UDTSSpecific : Box {
DECLARE_BOX_METHODS(UDTSSpecific);

std::vector<uint8_t> data;
};

struct AC3Specific : Box {
DECLARE_BOX_METHODS(AC3Specific);

Expand Down Expand Up @@ -396,6 +402,7 @@ struct AudioSampleEntry : Box {

ElementaryStreamDescriptor esds;
DTSSpecific ddts;
UDTSSpecific udts;
AC3Specific dac3;
EC3Specific dec3;
AC4Specific dac4;
Expand Down
Loading
Loading