Merge pull request #1150 from microbiomedata/issue-1149

`local/mongo_as_unvalidated_nmdc_database.yaml` target needs pure dev input and pure prod input modes
microbiomedata · Oct 3, 2023 · 11b80fc · 11b80fc
2 parents 4af3bcf + 827be0b
commit 11b80fc
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 5 deletions.
diff --git a/nmdc_schema/migration_recursion.py b/nmdc_schema/migration_recursion.py
@@ -14,6 +14,10 @@
 doi_url_pattern = r'^https?:\/\/[a-zA-Z\.]+\/10\.'
 curie_pattern = r'^[a-zA-Z_][a-zA-Z0-9_-]*:[a-zA-Z0-9_][a-zA-Z0-9_.-]*$'
 
+#migration_plan = {
+#
+#}
+
 
 class Migrator:
     def __init__(self):
@@ -194,28 +198,35 @@ def main(schema_path, input_path, output_path, salvage_prefix):
     for tdk, tdv in total_dict.items():
         logger.info(f"Starting migration of {tdk}")
         end_dict[tdk] = migrator.apply_changes_recursively_by_key(tdv, set(migrateable_slots))
+
         # if tdk == "study_set":
         #     logger.info(f"Starting {tdk}-specific migrations")
         #     for current_study in tdv:
         #         migrator.migrate_studies_7_7_2_to_7_8(current_study)
+
+        ####
+
+        # if tdk == "biosample_set":
+        #     logger.info(f"Starting {tdk}-specific migrations")
+        #     for current_biosample in tdv:
+        #         migrator.migrate_uc_gold_biosample_identifiers_7_8_0_to_8_0_0(current_biosample)
 
         # if tdk == "extraction_set":
         #     logger.info(f"Starting {tdk}-specific migrations")
         #     for current_extraction in tdv:
         #         migrator.migrate_extractions_7_8_0_to_8_0_0(current_extraction)
+
         # if tdk == "omics_processing_set":
         #     logger.info(f"Starting {tdk}-specific migrations")
         #     for current_omics_processing in tdv:
         #         migrator.migrate_uc_gold_sequencing_project_identifiers_7_8_0_to_8_0_0(current_omics_processing)
-        # if tdk == "biosample_set":
-        #     logger.info(f"Starting {tdk}-specific migrations")
-        #     for current_biosample in tdv:
-        #         migrator.migrate_uc_gold_biosample_identifiers_7_8_0_to_8_0_0(current_biosample)
+
         # if tdk == "study_set":
         #     logger.info(f"Starting {tdk}-specific migrations")
         #     for current_study in tdv:
         #         migrator.migrate_uc_gold_study_identifiers_7_8_0_to_8_0_0(current_study)
 
+
     logger.info(f"Saving migrated data to {output_path}")
     with open(output_path, "w") as f:
         yaml.dump(end_dict, f)

diff --git a/project.Makefile b/project.Makefile
@@ -417,9 +417,14 @@ nmdc_schema/nmdc_schema_accepting_legacy_ids.py: nmdc_schema/nmdc_schema_accepti
 
 make-rdf: rdf-clean local/mongo_as_nmdc_database_validation.log local/mongo_as_nmdc_database_cuire_repaired.ttl
 
-#   		--selected-collections functional_annotation_agg \ # huge, no publically avaiaible reference data (kegg)
+temp:
+
+#   		--selected-collections functional_annotation_agg \ # huge, no publicly available reference data (kegg)
 #   		--selected-collections metaproteomics_analysis_activity_set \ # next slowest
 
+# when connecting to the dev MongoDB, also use --client-base-url https://api-dev.microbiomedata.org
+# make pre-composed prod and dev makefile tasks! since they require the user to provide a matching mongo port and api url
+
 local/mongo_as_unvalidated_nmdc_database.yaml:
 	date  # 276.50 seconds on 2023-08-30 without functional_annotation_agg or metaproteomics_analysis_activity_set
 	time $(RUN) pure-export \