Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V1.1 test vectors #120

Merged
merged 10 commits into from
Sep 3, 2024
Merged
  •  
  •  
  •  
25 changes: 23 additions & 2 deletions proto/audio_element.proto
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,28 @@ enum LoudspeakerLayout {
LOUDSPEAKER_LAYOUT_7_1_4_CH = 8;
LOUDSPEAKER_LAYOUT_3_1_2_CH = 9;
LOUDSPEAKER_LAYOUT_BINAURAL = 10;
LOUDSPEAKER_LAYOUT_RESERVED_BEGIN = 11;
LOUDSPEAKER_LAYOUT_RESERVED_END = 16;
LOUDSPEAKER_LAYOUT_RESERVED_10 = 11;
LOUDSPEAKER_LAYOUT_RESERVED_14 = 15;
LOUDSPEAKER_LAYOUT_EXPANDED = 16;
}

enum ExpandedLoudspeakerLayout {
EXPANDED_LOUDSPEAKER_LAYOUT_INVALID = 0;
EXPANDED_LOUDSPEAKER_LAYOUT_LFE = 1;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_S = 2;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_SS = 3;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_RS = 4;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_TF = 5;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_TB = 6;
EXPANDED_LOUDSPEAKER_LAYOUT_TOP_4_CH = 7;
EXPANDED_LOUDSPEAKER_LAYOUT_3_0_CH = 8;
EXPANDED_LOUDSPEAKER_LAYOUT_9_1_6_CH = 9;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_F = 10;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_SI = 11;
EXPANDED_LOUDSPEAKER_LAYOUT_STEREO_TP_SI = 12;
EXPANDED_LOUDSPEAKER_LAYOUT_TOP_6_CH = 13;
EXPANDED_LOUDSPEAKER_LAYOUT_RESERVED_13 = 14;
EXPANDED_LOUDSPEAKER_LAYOUT_RESERVED_255 = 256;
}

message ChannelAudioLayerConfig {
Expand All @@ -65,6 +85,7 @@ message ChannelAudioLayerConfig {
optional uint32 output_gain_flag = 7;
optional uint32 reserved_b = 8;
optional int32 output_gain = 9;
optional ExpandedLoudspeakerLayout expanded_loudspeaker_layout = 11;
}

message ScalableChannelLayoutConfig {
Expand Down
12 changes: 11 additions & 1 deletion proto/codec_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,20 @@ enum OpusApplicationFlag {

// Settings to configure `libopus`.
message OpusEncoderMetadata {
optional int32 target_bitrate_per_channel = 1;
optional OpusApplicationFlag application = 2;
optional bool use_float_api = 3 [default = true];

// Fields to control the bitrate.
//
// - If the substream is present in `substream_id_to_bitrate_override`, the
// bitrate override is used.
// - Otherwise a bitrate is calculated based on the number of channels.
// - One channel: `target_bitrate_per_channel`.
// - Two channels: `target_bitrate_per_channel * 2 *
// coupling_rate_adjustment`.
optional int32 target_bitrate_per_channel = 1;
optional float coupling_rate_adjustment = 4 [default = 1.0];
map<uint32, int32> substream_id_to_bitrate_override = 5;
}

message OpusDecoderConfig {
Expand Down
1 change: 1 addition & 0 deletions proto/ia_sequence_header.proto
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ enum ProfileVersion {
PROFILE_VERSION_SIMPLE = 1;
PROFILE_VERSION_BASE = 2;
PROFILE_VERSION_BASE_ENHANCED = 3;
PROFILE_VERSION_RESERVED_255 = 256;
}

message IASequenceHeaderObuMetadata {
Expand Down
55 changes: 49 additions & 6 deletions proto/mix_presentation.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,20 @@ import "obu_header.proto";
import "param_definitions.proto";

message MixPresentationAnnotations {
option deprecated = true;

optional string mix_presentation_friendly_label = 1;
}

message MixPresentationElementAnnotations {
option deprecated = true;

optional string audio_element_friendly_label = 1;
}

message ElementMixConfig {
option deprecated = true;

optional MixGainParamDefinition mix_gain = 1;
}

Expand All @@ -45,13 +51,21 @@ message RenderingConfig {
message SubMixAudioElement {
optional uint32 audio_element_id = 1;
reserved 2;
// Superseded by `localized_element_annotations`.
repeated MixPresentationElementAnnotations
mix_presentation_element_annotations_array = 4;
mix_presentation_element_annotations_array = 4 [deprecated = true];
// Length should be equal to `count_label` in the OBU.
repeated string localized_element_annotations = 6;

optional RenderingConfig rendering_config = 5;
optional ElementMixConfig element_mix_config = 3;
// Superseded by `element_mix_gain`.
optional ElementMixConfig element_mix_config = 3 [deprecated = true];
optional MixGainParamDefinition element_mix_gain = 7;
}

message OutputMixConfig {
option deprecated = true;

optional MixGainParamDefinition output_mix_gain = 1;
}

Expand Down Expand Up @@ -147,18 +161,47 @@ message MixPresentationLayout {
message MixPresentationSubMix {
optional uint32 num_audio_elements = 1;
repeated SubMixAudioElement audio_elements = 2;
optional OutputMixConfig output_mix_config = 3;

// Superseded by `output_mix_gain`.
optional OutputMixConfig output_mix_config = 3 [deprecated = true];
optional MixGainParamDefinition output_mix_gain = 6;

optional uint32 num_layouts = 4;
repeated MixPresentationLayout layouts = 5;
}

message MixPresentationTag {
optional string tag_name = 1;
optional string tag_value = 2;
}

message MixPresentationTags {
optional uint32 num_tags = 1;
repeated MixPresentationTag tags = 2;
}

message MixPresentationObuMetadata {
reserved 2;
optional uint32 mix_presentation_id = 1;
optional uint32 count_label = 5;
repeated string language_labels = 6;
reserved 2;
repeated MixPresentationAnnotations mix_presentation_annotations_array = 7;

// Superseded by `annotations_language`.
repeated string language_labels = 6 [deprecated = true];
// Length should be equal to `count_label`.
repeated string annotations_language = 9;

// Superseded by `localized_presentation_annotations`.
repeated MixPresentationAnnotations mix_presentation_annotations_array = 7
[deprecated = true];
// Length should be equal to `count_label`.
repeated string localized_presentation_annotations = 10;

optional uint32 num_sub_mixes = 3;
repeated MixPresentationSubMix sub_mixes = 4;

// When true, `mix_presentation_tags` will be examined.
optional bool include_mix_presentation_tags = 11;
optional MixPresentationTags mix_presentation_tags = 12;

optional ObuHeaderMetadata obu_header = 8;
}
27 changes: 27 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,33 @@ Title | Summary
`sine_1000_48kHz_512ms.wav` | Sine wave. | 2 | 48kHz | pcm_s16le | 512ms
`sine_1000_48kHz.wav` | Sine wave. | 2 | 48kHz | pcm_s16le | 500ms
`sine_1000_4oa_48kHz.wav` | Sine wave using fourth-order ambisonics. | 25 | 48kHz | pcm_s16le | 5000ms
`sine_1500_stereo_48khz_-15dBFS.wav` | Sine wave using at -15dBFS. | 2 | 48kHz | pcm_s16le | 5000ms
`stereo_8_samples_48khz_s16le.wav` | Tiny test file. The first channel encodes 1, 2, ... 8. The second channel encodes 65535, 65534, ... 65528. | 2 | 48kHz | pcm_s16le | 8 samples
`stereo_8_samples_48khz_s24le.wav` | Tiny test file. The first channel encodes 1, 2, ... 8. The second channel encodes 16777216, 16777215, ... 16777209. | 2 | 48kHz | pcm_s24le | 8 samples
`Transport_TOA_5s.wav` | Short clip of vehicles driving by using third-order ambisonics. | 16 | 48kHz | pcm_s16le | 5s
`Transport_9.1.6_5s.wav` | Short clip of vehicles driving by using 9.1.6. | 16 | 48kHz | pcm_s16le | 5s

# Output WAV files

Output wav files are based on the
[layout](https://aomediacodec.github.io/iamf/#syntax-layout) in the mix
presentation. Typically the ordering of channels is based on the related
[ITU-2051-3](https://www.itu.int/rec/R-REC-BS.2051) layout.

Mix Presentation Layout | Channel Order Convention | Channel Order
----------------------- | ------------------------ | -------------
Sound System A (0+2+0) | ITU-2051-3 | L, R
Sound System B (0+5+0) | ITU-2051-3 | L, R, C, LFE, Ls, Rs
Sound System C (2+5+0) | ITU-2051-3 | L, R, C, LFE, Ls, Rs, Ltf, Rtf
Sound System D (4+5+0) | ITU-2051-3 | L, R, C, LFE, Ls, Rs, Ltf, Rtf, Ltr, Rtr
Sound System E (4+5+1) | ITU-2051-3 | L, R, C, LFE, Ls, Rs, Ltf, Rtf, Ltr, Rtr, Cbf
Sound System F (3+7+0) | ITU-2051-3 | C, L, R, LH, RH, LS, LB, RB, CH, LFE1, LFE2
Sound System G (4+9+0) | ITU-2051-3 | L, R, C, LFE, Lss, Rss, Lrs, Rrs, Ltf, Rtf, Ltb, Rtb, Lsc, Rsc
Sound System H (9+10+3) | ITU-2051-3 | FL, FR, FC, LFE1, BL, BR, FLc, FRc, BC, LFE2, SiL, SiR, TpFL, TpFR, TpFC, TpC, TpBL, TpBR, TpSiL, TpSiR, TpBC, BtFC, BtFL, BtFR
Sound System I (0+7+0) | ITU_2051-3 | L, R, C, LFE, Lss, Rss, Lrs, Rrs
Sound System J (4+7+0) | ITU_2051-3 | L, R, C, LFE, Lss, Rss, Lrs, Rrs, Ltf, Rtf, Ltb, Rtb
Sound System 10 | IAMF | L7, R7, C, LFE, Lss7, Rss7, Lrs7, Rrs7, Ltf2, Rtf2
Sound System 11 | IAMF | L3, R3, C, LFE, Ltf3, Rtf3,
Sound System 12 | IAMF | C
Sound System 13 | IAMF | FL, FR, FC, LFE, BL, BR, FLc, FRc, SiL, SiR, TpFL, TpFR, TpBL, TpBR, TpSiL, TpSiR
Binaural Layout | IAMF | L2, R2
Binary file added tests/Transport_9.1.6_5s.wav
Binary file not shown.
Loading