Skip to content

Commit

Permalink
Merge pull request #1150 from microbiomedata/issue-1149
Browse files Browse the repository at this point in the history
`local/mongo_as_unvalidated_nmdc_database.yaml` target needs pure dev input and pure prod input modes
  • Loading branch information
turbomam authored Oct 3, 2023
2 parents 4af3bcf + 827be0b commit 11b80fc
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
19 changes: 15 additions & 4 deletions nmdc_schema/migration_recursion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
doi_url_pattern = r'^https?:\/\/[a-zA-Z\.]+\/10\.'
curie_pattern = r'^[a-zA-Z_][a-zA-Z0-9_-]*:[a-zA-Z0-9_][a-zA-Z0-9_.-]*$'

#migration_plan = {
#
#}


class Migrator:
def __init__(self):
Expand Down Expand Up @@ -194,28 +198,35 @@ def main(schema_path, input_path, output_path, salvage_prefix):
for tdk, tdv in total_dict.items():
logger.info(f"Starting migration of {tdk}")
end_dict[tdk] = migrator.apply_changes_recursively_by_key(tdv, set(migrateable_slots))

# if tdk == "study_set":
# logger.info(f"Starting {tdk}-specific migrations")
# for current_study in tdv:
# migrator.migrate_studies_7_7_2_to_7_8(current_study)

####

# if tdk == "biosample_set":
# logger.info(f"Starting {tdk}-specific migrations")
# for current_biosample in tdv:
# migrator.migrate_uc_gold_biosample_identifiers_7_8_0_to_8_0_0(current_biosample)

# if tdk == "extraction_set":
# logger.info(f"Starting {tdk}-specific migrations")
# for current_extraction in tdv:
# migrator.migrate_extractions_7_8_0_to_8_0_0(current_extraction)

# if tdk == "omics_processing_set":
# logger.info(f"Starting {tdk}-specific migrations")
# for current_omics_processing in tdv:
# migrator.migrate_uc_gold_sequencing_project_identifiers_7_8_0_to_8_0_0(current_omics_processing)
# if tdk == "biosample_set":
# logger.info(f"Starting {tdk}-specific migrations")
# for current_biosample in tdv:
# migrator.migrate_uc_gold_biosample_identifiers_7_8_0_to_8_0_0(current_biosample)

# if tdk == "study_set":
# logger.info(f"Starting {tdk}-specific migrations")
# for current_study in tdv:
# migrator.migrate_uc_gold_study_identifiers_7_8_0_to_8_0_0(current_study)


logger.info(f"Saving migrated data to {output_path}")
with open(output_path, "w") as f:
yaml.dump(end_dict, f)
Expand Down
7 changes: 6 additions & 1 deletion project.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,14 @@ nmdc_schema/nmdc_schema_accepting_legacy_ids.py: nmdc_schema/nmdc_schema_accepti

make-rdf: rdf-clean local/mongo_as_nmdc_database_validation.log local/mongo_as_nmdc_database_cuire_repaired.ttl

# --selected-collections functional_annotation_agg \ # huge, no publically avaiaible reference data (kegg)
temp:

# --selected-collections functional_annotation_agg \ # huge, no publicly available reference data (kegg)
# --selected-collections metaproteomics_analysis_activity_set \ # next slowest

# when connecting to the dev MongoDB, also use --client-base-url https://api-dev.microbiomedata.org
# make pre-composed prod and dev makefile tasks! since they require the user to provide a matching mongo port and api url

local/mongo_as_unvalidated_nmdc_database.yaml:
date # 276.50 seconds on 2023-08-30 without functional_annotation_agg or metaproteomics_analysis_activity_set
time $(RUN) pure-export \
Expand Down

0 comments on commit 11b80fc

Please sign in to comment.