diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 5871409..e88d479 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -21,6 +21,7 @@ jobs: is-bot: ${{ steps.check-author.outputs.is_bot }} is-cfc: ${{ steps.check-cfc.outputs.is_cfc }} is-page: ${{ steps.check-page.outputs.is_page }} + is-skip: ${{ steps.check-skip.outputs.is_skip }} steps: - name: Checkout uses: actions/checkout@v3 @@ -57,13 +58,23 @@ jobs: fi done + - name: Check if action is label but it is unrelated + id: check-skip + run: | + if [[ '${{ github.event.action }}' == 'labeled' ]]; then + if [[ '${{ github.event.label.name }}' != 'conformance-file' ]]; then + echo "is_skip=true" >> $GITHUB_OUTPUT + fi + fi + cfc: needs: changes if: | github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'conformance-file') && needs.changes.outputs.is-cfc == 'true' && - needs.changes.outputs.is-bot != 'true' + needs.changes.outputs.is-bot != 'true' && + needs.changes.outputs.is-skip != 'true' uses: ./.github/workflows/cfc.yml secrets: inherit @@ -72,7 +83,8 @@ jobs: if: | always() && (needs.cfc.result == 'success' || needs.cfc.result == 'skipped') && - needs.cfc.outputs.created != 'true' + needs.cfc.outputs.created != 'true' && + needs.changes.outputs.is-skip != 'true' uses: ./.github/workflows/test.yml with: test-page: ${{ github.event_name != 'pull_request' || needs.changes.outputs.is-page == 'true' }} diff --git a/data/schemas/gpac-extension.schema.json b/data/schemas/gpac-extension.schema.json index 2158354..75a307c 100644 --- a/data/schemas/gpac-extension.schema.json +++ b/data/schemas/gpac-extension.schema.json @@ -2,6 +2,20 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "GPAC MP4Box extension declaration schema", "$defs": { + "box_descendant": { + "allOf": [ + { + "type": "object", + "properties": { + "@Type": { + "type": "string" + } + }, + "required": ["@Type"] + }, + { "$ref": "#/$defs/box" } + ] + }, "box": { "type": "object", "properties": { @@ -30,19 +44,23 @@ "description": "Combined flags in base 10" }, "@Specification": { "type": "string" }, - "@Container": { "type": "string" }, - "@data": { - "type": "string", - "description": "dump of the box in hexadecimal format", - "pattern": "^0x[0-9a-fA-F]+$" - }, - "children": { - "type": "array", - "items": { "$ref": "#/$defs/box" } + "@Container": { "type": "string" } + }, + "patternProperties": { + "^(?!@).+": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/box_descendant" + } + }, + { "$ref": "#/$defs/box_descendant" } + ] } }, "additionalProperties": true, - "required": ["@Type", "@data"] + "required": ["@Type"] } }, "type": "object", diff --git a/src/construct/coverage.py b/src/construct/coverage.py index 330ca09..25a7cf5 100644 --- a/src/construct/coverage.py +++ b/src/construct/coverage.py @@ -5,6 +5,26 @@ from common import * +def get_all_fourccs_inside(root): + fourccs = set([root["@Type"]]) + + def crawl(hierarchy): + for key, value in hierarchy.items(): + if isinstance(value, dict): + if "@Type" not in value: + continue + fourcc = value["@Type"] + + fourccs.add(fourcc) + crawl(value) + elif isinstance(value, list): + for item in value: + crawl({key: item}) + + crawl(root) + return fourccs + + def main(): with open("output/dictionary.json", "r", encoding="utf-8") as f: dictionary = json.load(f) @@ -80,11 +100,13 @@ def main(): for extension in extensions: with open(extension, "r", encoding="utf-8") as f: ext_data = json.load(f) - fourcc_in_extensions.update([e["box"]["@Type"] for e in ext_data["extensions"]]) missing_extensions.update( [f"{e['location']}.{e['box']['@Type']}" for e in ext_data["extensions"]] ) + for e in ext_data["extensions"]: + fourcc_in_extensions.update(get_all_fourccs_inside(e["box"])) + # Remove all known and unknown paths from missing extensions (to reduce redundancy) missing_extensions.difference_update(set(files["not_found"])) missing_extensions.difference_update(set(files["path_file_map"].keys())) diff --git a/src/construct/files.py b/src/construct/files.py index 0ce1bdb..983097d 100644 --- a/src/construct/files.py +++ b/src/construct/files.py @@ -98,15 +98,30 @@ def add(path, variant): paths[path].add(variant) def crawl(root, path): - if mp4ra_check and path[-1] not in get_mp4ra_boxes(): - return - add_variant(add, root, path) - if "children" in root: - for child in root["children"]: - crawl(child, path + [root["@Type"]]) + for key, value in root.items(): + if isinstance(value, dict): + if "@Type" not in value: + continue + fourcc = value["@Type"] + + if mp4ra_check and fourcc not in get_mp4ra_boxes(): + continue + + add_variant(add, value, path) + crawl(value, path + [fourcc]) + elif isinstance(value, list): + for item in value: + crawl({key: item}, path) for ext in extension["extensions"]: - crawl(ext["box"], ext["location"].split(".")) + root = ext["box"] + path = ext["location"].split(".") + + if mp4ra_check and path[-1] not in get_mp4ra_boxes(): + continue + + add_variant(add, root, path) + crawl(root, path + [root["@Type"]]) return paths diff --git a/src/feature_extractor/file_features.py b/src/feature_extractor/file_features.py index 4b06d86..3e520fe 100644 --- a/src/feature_extractor/file_features.py +++ b/src/feature_extractor/file_features.py @@ -722,7 +722,13 @@ def _extract_file_features(args, exit_on_error=True): with open(gpac_path, "r", encoding="utf-8") as f: gpac_dict_gt = json.load(f) - if gpac_dict_gt["IsoMediaFile"] != gpac_dict["IsoMediaFile"]: + # Check if this was manual dump + manual_dump = gpac_dict_gt.get("manualDump", False) + + if ( + gpac_dict_gt["IsoMediaFile"] != gpac_dict["IsoMediaFile"] + and not manual_dump + ): print( f'WARNING: GPAC file for "{input_path}" already exists but the contents have been changed. Forcing overwrite!' ) @@ -733,10 +739,19 @@ def _extract_file_features(args, exit_on_error=True): else: dump_to_json(gpac_path, gpac_dict) + # If it was a manual dump, use that as the GPAC file + if not args.overwrite and manual_dump: + print(f'WARNING: "{gpac_path}" was a manual dump. Using that as the GPAC file.') + gpac_dict = gpac_dict_gt + # Create GPAC extension mp4ra_check = "under_consideration" not in os.path.dirname(metadata_path) unknown_boxes = traverse_gpac_dict(gpac_dict, mp4ra_check) if len(unknown_boxes) == 0: + # Remove GPAC extension if there are no unknown boxes anymore + if os.path.exists(gpac_extension_path): + os.remove(gpac_extension_path) + if exit_on_error: sys.exit(0) else: diff --git a/src/tests/test_files.py b/src/tests/test_files.py index d7e265d..ec6e12b 100644 --- a/src/tests/test_files.py +++ b/src/tests/test_files.py @@ -314,10 +314,8 @@ def test_gpac_ext_consistency(check): gpac_ext_dict = json.load(f) # Test if boxes are the same - gt_locations = [(ub["location"], ub["box"]["@data"]) for ub in unknown_boxes] - ref_locations = [ - (ub["location"], ub["box"]["@data"]) for ub in gpac_ext_dict["extensions"] - ] + gt_locations = [ub["location"] for ub in unknown_boxes] + ref_locations = [ub["location"] for ub in gpac_ext_dict["extensions"]] # Reference must match exactly with check: