diff --git a/code/include/IAMF_defines.h b/code/include/IAMF_defines.h index d4584f54..f6c122be 100755 --- a/code/include/IAMF_defines.h +++ b/code/include/IAMF_defines.h @@ -58,7 +58,7 @@ typedef enum IAMF_SoundSystem { SOUND_SYSTEM_J, // 4+7+0, 1 SOUND_SYSTEM_EXT_712, // 2+7+0, 1 SOUND_SYSTEM_EXT_312, // 2+3+0, 1 - SOUND_SYSTEM_MONO, // 0+1+0, 1 + SOUND_SYSTEM_MONO, // 0+1+0, 0 SOUND_SYSTEM_END } IAMF_SoundSystem; diff --git a/code/src/common/audio_defines.h b/code/src/common/audio_defines.h index 1fe14478..185ace07 100755 --- a/code/src/common/audio_defines.h +++ b/code/src/common/audio_defines.h @@ -25,20 +25,6 @@ #define LIMITER_ReleaseSec 0.200f #define LIMITER_LookAhead 240 -typedef enum { - CHANNELUNKNOWN = 0, - CHANNELMONO, - CHANNELSTEREO, - CHANNEL51, - CHANNEL512, - CHANNEL514, - CHANNEL71, - CHANNEL712, - CHANNEL714, - CHANNEL312, - CHANNELBINAURAL -} channelLayout; - #define MAX_CHANNELS 12 #define MAX_OUTPUT_CHANNELS 24 #define MAX_DELAYSIZE 4096 diff --git a/code/src/iamf_dec/IAMF_OBU.c b/code/src/iamf_dec/IAMF_OBU.c index b3c5b449..17733253 100755 --- a/code/src/iamf_dec/IAMF_OBU.c +++ b/code/src/iamf_dec/IAMF_OBU.c @@ -254,6 +254,10 @@ static int _valid_profile(uint8_t primary, uint8_t addional) { IAMF_Version *iamf_version_new(IAMF_OBU *obu) { IAMF_Version *ver = 0; BitStream b; + union { + uint32_t _id; + uint8_t _4cc[4]; + } code = {._4cc = {'i', 'a', 'm', 'f'}}; ver = IAMF_MALLOCZ(IAMF_Version, 1); if (!ver) { @@ -273,6 +277,12 @@ IAMF_Version *iamf_version_new(IAMF_OBU *obu) { "%u.", (char *)&ver->iamf_code, ver->primary_profile, ver->additional_profile); + if (ver->iamf_code != code._id) { + ia_loge("ia sequence header object: Invalid iamf code %.4s.", + (char *)&ver->iamf_code); + goto version_fail; + } + if (!_valid_profile(ver->primary_profile, ver->additional_profile)) { ia_loge( "ia sequence header object: Invalid primary profile %u or additional " @@ -296,6 +306,18 @@ static int _valid_codec(uint32_t codec) { return iamf_codec_check(iamf_codec_4cc_get_codecID(codec)); } +#define OPUS_VERSION_MAX 15 +static int _valid_decoder_config(uint32_t codec, uint8_t *conf, size_t size) { + if (iamf_codec_4cc_get_codecID(codec) == IAMF_CODEC_OPUS) { + if (conf[0] > OPUS_VERSION_MAX) { + ia_logw("opus config invalid: version %u should less than %u.", conf[0], + OPUS_VERSION_MAX); + return 0; + } + } + return 1; +} + IAMF_CodecConf *iamf_codec_conf_new(IAMF_OBU *obu) { IAMF_CodecConf *conf = 0; BitStream b; @@ -335,6 +357,12 @@ IAMF_CodecConf *iamf_codec_conf_new(IAMF_OBU *obu) { goto codec_conf_fail; } + if (!_valid_decoder_config(conf->codec_id, conf->decoder_conf, + conf->decoder_conf_size)) { + ia_logw("decoder config is invalid, codec: %.4s", (char *)&conf->codec_id); + goto codec_conf_fail; + } + #if SUPPORT_VERIFIER vlog_obu(IAMF_OBU_CODEC_CONFIG, conf, 0, 0); #endif diff --git a/code/src/iamf_dec/IAMF_decoder.c b/code/src/iamf_dec/IAMF_decoder.c index 0c4fbbf5..e1d83863 100755 --- a/code/src/iamf_dec/IAMF_decoder.c +++ b/code/src/iamf_dec/IAMF_decoder.c @@ -729,9 +729,15 @@ static int iamf_codec_conf_get_sampling_rate(IAMF_CodecConf *c) { static int sf[] = {96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350, 0, 0, 0}; - /* DecoderConfigDescriptor (14 bytes) + DecSpecificInfoTag (1 byte) */ - if (c->decoder_conf_size < 16) return IAMF_ERR_BAD_ARG; - bs(&b, c->decoder_conf + 15, c->decoder_conf_size - 15); + /* DecoderConfigDescriptor */ + bs(&b, c->decoder_conf, c->decoder_conf_size); + bs_get32b(&b, 8); + bs_getExpandableSize(&b); + bs_skipABytes(&b, 13); + + /* DecSpecificInfoTag */ + bs_get32b(&b, 8); + bs_getExpandableSize(&b); type = bs_get32b(&b, 5); if (type == 31) bs_get32b(&b, 6); diff --git a/code/src/iamf_dec/aac/IAMF_aac_decoder.c b/code/src/iamf_dec/aac/IAMF_aac_decoder.c index a5a55a13..af6b93d2 100755 --- a/code/src/iamf_dec/aac/IAMF_aac_decoder.c +++ b/code/src/iamf_dec/aac/IAMF_aac_decoder.c @@ -23,6 +23,7 @@ #include "IAMF_debug.h" #include "IAMF_types.h" #include "aac_multistream_decoder.h" +#include "bitstream.h" #ifdef IA_TAG #undef IA_TAG @@ -58,35 +59,28 @@ typedef struct IAMF_AAC_Context { static int iamf_aac_init(IAMF_CodecContext *ths) { IAMF_AAC_Context *ctx = (IAMF_AAC_Context *)ths->priv; uint8_t *config = ths->cspec; - int len = ths->clen; + BitStream b; int ret = 0; - if (!ths->cspec || ths->clen <= 0) { - return IAMF_ERR_BAD_ARG; - } + bs(&b, ths->cspec, ths->clen); + if (bs_get32b(&b, 8) != 0x04) return IAMF_ERR_BAD_ARG; + bs_getExpandableSize(&b); - int idx = 1; - if (config[idx] != 0x40 /* Audio ISO/IEC 14496-3 */ - || (config[idx + 1] >> 2 & 0x3f) != 5 /* AudioStream */ - || (config[idx + 1] >> 1 & 0x1) != 0) { /* upstream */ + if (bs_get32b(&b, 8) != 0x40 || bs_get32b(&b, 6) != 5 || bs_get32b(&b, 1)) return IAMF_ERR_BAD_ARG; - } - idx += 13; - if (config[idx] != 0x05) { - return IAMF_ERR_BAD_ARG; // MP4DecSpecificDescrTag - } - ++idx; - ths->cspec = &config[idx]; - ths->clen = len - idx; + bs_skipABytes(&b, 11); + + if (bs_get32b(&b, 8) != 0x05) return IAMF_ERR_BAD_ARG; + + ths->clen = bs_getExpandableSize(&b); + ths->cspec = config + bs_tell(&b); ia_logd("aac codec spec info size %d", ths->clen); ctx->dec = aac_multistream_decoder_open(ths->cspec, ths->clen, ths->streams, ths->coupled_streams, AUDIO_FRAME_PLANE, &ret); - if (!ctx->dec) { - return IAMF_ERR_INVALID_STATE; - } + if (!ctx->dec) return IAMF_ERR_INVALID_STATE; ctx->out = (short *)malloc(sizeof(short) * MAX_AAC_FRAME_SIZE * (ths->streams + ths->coupled_streams)); diff --git a/code/src/iamf_dec/bitstream.c b/code/src/iamf_dec/bitstream.c index 0a460814..04672c31 100755 --- a/code/src/iamf_dec/bitstream.c +++ b/code/src/iamf_dec/bitstream.c @@ -141,6 +141,23 @@ uint64_t bs_getAleb128(BitStream *b) { return ret; } +/// @brief Read descriptor size of ISO/IEC 14496-1. +uint32_t bs_getExpandableSize(BitStream *b) { + uint32_t ret = 0; + uint8_t byte; + + for (uint32_t i = 0; i < 4; i++) { + byte = b->data[b->b8sp + i]; + ret = (ret << 7) | (byte & 0x7f); + if (!(byte & 0x80)) { + b->b8sp += (i + 1); + break; + } + } + + return ret; +} + int32_t bs_read(BitStream *b, uint8_t *data, int n) { bs_align(b); if (data) memcpy(data, &b->data[b->b8sp], n); diff --git a/code/src/iamf_dec/bitstream.h b/code/src/iamf_dec/bitstream.h index f0f1719c..8a18342c 100755 --- a/code/src/iamf_dec/bitstream.h +++ b/code/src/iamf_dec/bitstream.h @@ -42,6 +42,7 @@ uint32_t bs_getA8b(BitStream *b); uint32_t bs_getA16b(BitStream *b); uint32_t bs_getA32b(BitStream *b); uint64_t bs_getAleb128(BitStream *b); +uint32_t bs_getExpandableSize(BitStream *b); int32_t bs_read(BitStream *b, uint8_t *data, int n); int32_t bs_readString(BitStream *b, char *data, int n); uint32_t bs_tell(BitStream *b); diff --git a/code/src/iamf_dec/h2m_rdr.c b/code/src/iamf_dec/h2m_rdr.c index 8679ef43..5bd144ab 100755 --- a/code/src/iamf_dec/h2m_rdr.c +++ b/code/src/iamf_dec/h2m_rdr.c @@ -14,7 +14,7 @@ AOM-IAMF Standard Deliverable Status: This software module is out of scope and not part of the IAMF Final Deliverable. */ - + /** * @file h2m_rdr.c * @brief HOA to Multichannels rendering. @@ -486,20 +486,17 @@ float soa_bs470[][9] = {{1.31381003e-01, 1.48448976e-01, -1.90212725e-01, 1.58493458e-01, -2.13843703e-01, 2.85769131e-04}}; float soa_iamf312[][9] = { - {2.327932e-01, 2.848056e-01, -2.581401e-02, 3.031344e-01, 3.715644e-01, - -3.044674e-02, 8.758713e-02, 1.078822e-03, -1.160062e-02}, // L - {2.327919e-01, -2.848050e-01, -2.582280e-02, 3.031372e-01, -3.715555e-01, - 3.046395e-02, 8.761200e-02, 1.071232e-03, -1.159248e-02}, // R - {1.02002565e-01, 1.16679273e-05, -1.32667792e-01, 1.91514833e-01, - 3.22324409e-05, -9.39024240e-06, 3.59275434e-02, -1.19192733e-01, - 2.13323852e-01}, // C - {4.54071480e-01, 4.40034892e-01, 8.54014466e-03, -4.09726522e-01, - -9.36552906e-02, 4.92427716e-03, 9.13601796e-02, 4.00474901e-03, - -8.49272007e-02}, // Hfl - {4.54065786e-01, -4.40013199e-01, 8.55303445e-03, -4.09744042e-01, - 9.36608249e-02, -4.92139383e-03, 9.13448355e-02, 4.01492543e-03, - -8.49451616e-02} // Hfr -}; + {4.726751e-01, 5.264195e-01, -1.779691e-01, -8.917566e-02, 2.168611e-01, + -1.274446e-01, 6.197195e-02, -1.513111e-01, -9.806416e-02}, + {4.726787e-01, -5.264100e-01, -1.779698e-01, -8.916373e-02, -2.168667e-01, + 1.274678e-01, 6.195992e-02, -1.513142e-01, -9.804430e-02}, + {1.020026e-01, 1.166793e-05, -1.326678e-01, 1.915148e-01, 3.223244e-05, + -9.390242e-06, 3.592754e-02, -1.191927e-01, 2.133239e-01}, + {1.148273e-01, 9.834126e-02, 2.237200e-01, 1.450837e-01, 1.251282e-01, + 1.420999e-01, 1.275855e-01, 2.195166e-01, 3.735070e-02}, + {1.148147e-01, -9.833212e-02, 2.237213e-01, 1.450532e-01, -1.251021e-01, + -1.421055e-01, 1.276224e-01, 2.195205e-01, 3.731617e-02}}; + float soa_iamf712[][9] = { {1.31381003e-01, 1.48448976e-01, -1.90212725e-01, 1.74601956e-01, 2.49201104e-01, -1.12860844e-01, 3.90171076e-02, -1.38956589e-01, @@ -930,26 +927,27 @@ float toa_bs470[][16] = { -3.35050564e-02, -9.10396767e-02, 1.32770538e-03, 2.85003118e-02}}; float toa_iamf312[][16] = { - {4.600323e-01, 5.123392e-01, -1.732089e-01, -8.679045e-02, 2.110607e-01, - -1.240358e-01, 6.031437e-02, -1.472639e-01, -9.544121e-02, 9.626710e-03, + {4.600323e-01, 5.123393e-01, -1.732089e-01, -8.679045e-02, 2.110607e-01, + -1.240358e-01, 6.031437e-02, -1.472639e-01, -9.544121e-02, 9.626709e-03, -1.475499e-01, -2.170848e-02, -1.757221e-02, -3.459537e-02, 5.974536e-03, -1.740531e-01}, {4.600358e-01, -5.123299e-01, -1.732095e-01, -8.677885e-02, -2.110661e-01, 1.240584e-01, 6.030266e-02, -1.472670e-01, -9.542189e-02, -9.640919e-03, 1.475400e-01, 2.169693e-02, -1.758859e-02, -3.460532e-02, 5.974916e-03, -1.740174e-01}, - {9.92742752e-02, 1.13558421e-05, -1.29119291e-01, 1.86392335e-01, - 3.13703112e-05, -9.13907907e-06, 3.49665798e-02, -1.16004653e-01, - 2.07618022e-01, 3.94596117e-05, -2.87941415e-05, 4.17936507e-06, - -6.57885105e-02, -1.79100086e-02, -8.36478901e-02, 2.21708712e-01}, - {4.41926308e-01, 4.28265161e-01, 8.31171912e-03, -3.98767457e-01, - -9.11502674e-02, 4.79256619e-03, 8.89165443e-02, 3.89763292e-03, - -8.26556301e-02, -1.64741393e-01, 7.34130565e-03, 4.55127066e-02, - 1.57174239e-03, -4.06651499e-02, -7.74208653e-03, -2.84149500e-02}, - {4.41920767e-01, -4.28244047e-01, 8.32426414e-03, -3.98784508e-01, - 9.11556537e-02, -4.78975999e-03, 8.89016107e-02, 3.90753714e-03, - -8.26731106e-02, 1.64745462e-01, -7.35804577e-03, -4.55056466e-02, - 1.56664042e-03, -4.06703815e-02, -7.75179027e-03, -2.83998198e-02}}; + {9.927428e-02, 1.135584e-05, -1.291193e-01, 1.863923e-01, 3.137031e-05, + -9.139079e-06, 3.496658e-02, -1.160047e-01, 2.076180e-01, 3.945961e-05, + -2.879414e-05, 4.179365e-06, -6.578851e-02, -1.791001e-02, -8.364789e-02, + 2.217087e-01}, + {1.117560e-01, 9.571090e-02, 2.177361e-01, 1.412031e-01, 1.217814e-01, + 1.382991e-01, 1.241730e-01, 2.136451e-01, 3.635167e-02, 6.785955e-02, + 1.732515e-01, 6.485935e-02, -1.129651e-02, 1.236321e-01, 4.362752e-02, + -5.882758e-03}, + {1.117437e-01, -9.570201e-02, 2.177374e-01, 1.411734e-01, -1.217560e-01, + -1.383046e-01, 1.242088e-01, 2.136489e-01, 3.631807e-02, -6.782208e-02, + -1.732464e-01, -6.488207e-02, -1.128750e-02, 1.236767e-01, 4.363238e-02, + -5.932943e-03}, +}; float toa_iamf712[][16] = { {1.24843144e-01, 1.41061771e-01, -1.80747248e-01, 1.65913311e-01, @@ -1150,8 +1148,7 @@ int IAMF_element_renderer_render_H2M(struct h2m_rdr_t *h2mMatrix, float *in[], out[lfe1][j] = output * 0.5; else out[lfe1][j] = output / sqrt(n_size); - } - else { // lfe off + } else { // lfe off out[lfe1][j] = 0; } } @@ -1162,8 +1159,7 @@ int IAMF_element_renderer_render_H2M(struct h2m_rdr_t *h2mMatrix, float *in[], if (lfe) { // lfe on if (lfe1 >= 0) { // already compute lfe out[lfe2][j] = out[lfe1][j]; - } - else { // compute lfe + } else { // compute lfe float output; output = lfefilter_update(lfe, in[0][j]); // use W if (n_size <= 2) @@ -1171,8 +1167,7 @@ int IAMF_element_renderer_render_H2M(struct h2m_rdr_t *h2mMatrix, float *in[], else out[lfe2][j] = output / sqrt(n_size); } - } - else { // lfe off + } else { // lfe off out[lfe2][j] = 0; } } diff --git a/code/src/iamf_dec/vlogging_tool_sr.c b/code/src/iamf_dec/vlogging_tool_sr.c index 16fe8d63..0cdf0451 100755 --- a/code/src/iamf_dec/vlogging_tool_sr.c +++ b/code/src/iamf_dec/vlogging_tool_sr.c @@ -753,7 +753,7 @@ static void write_mix_presentation_log(uint64_t idx, void* obu, char* log) { // loudness log += write_yaml_form(log, 3, "loudness:"); - log += write_yaml_form(log, 4, "info_type: %u", + log += write_yaml_form(log, 4, "info_type_bit_masks: [%u]", submix->loudness[j].info_type); log += write_yaml_form(log, 4, "integrated_loudness: %d", submix->loudness[j].integrated_loudness);