From 6165fbc82159cb354f34a362c5376eaf33a76582 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 10:21:58 -0800 Subject: [PATCH 01/15] Update nmdc.yaml Make subclasses of DataGeneration --- src/schema/nmdc.yaml | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index e63264417b..9f4551e126 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -144,6 +144,48 @@ subsets: classes: + NucleotideSequencing: + description: A DataGeneration class for generating sequencing data. (ie from an Illumina or Pacific Biosciences instrument). + is_a: DataGeneration + slots: + - gold_sequencing_project_identifiers + - seq_meth + - ncbi_project_name + - insdc_bioproject_identifiers + - insdc_experiment_identifiers + DiMs: + description: A DataGeneration class for generating direct infusion mass spectrometry data. + is_a: DataGeneration + aliases: + - DI-MS + - Direct Infusion Mass Spectrometry + LcMs: + description: A DataGeneration class for generating liquid chromatography-mass spectrometry data. + is_a: DataGeneration + aliases: + - Liquid Chromatography mass spectrometry + - LC-MS + LcMsMs: + description: A DataGeneration class for generating liquid chromatography with tandem mass spectrometry data. + is_a: DataGeneration + aliases: + - Liquid Chromatography with tandem mass spectrometry + - LC-MS-MS + - LC-MS/MS + GcMs: + description: A DataGeneration class for generating gas chromatography-mass spectrometry data. + is_a: DataGeneration + aliases: + - Gas Chromatography mass spectrometry + - GC-MS + GcMsMs: + description: A DataGeneration class for generating gas chromatography with tandem mass spectrometry data. + is_a: DataGeneration + aliases: + - Gas Chromatography with tandem mass spectrometry + - GC-MS MS + - GS-MS/MS + FunctionalAnnotationAggMember: slots: - metagenome_annotation_id From e96fee1293428631bc5a32d33ae625394d2d1a87 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 10:38:00 -0800 Subject: [PATCH 02/15] Update nmdc.yaml Update DataGenerate subclasses description and add exact mappings. --- src/schema/nmdc.yaml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 9f4551e126..fc8ead2868 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -145,8 +145,9 @@ subsets: classes: NucleotideSequencing: - description: A DataGeneration class for generating sequencing data. (ie from an Illumina or Pacific Biosciences instrument). + description: A DataGeneration in which the sequence of DNA or RNA molecules is generated. is_a: DataGeneration + comments: For example data generated from an Illumina or Pacific Biosciences instrument. slots: - gold_sequencing_project_identifiers - seq_meth @@ -154,37 +155,45 @@ classes: - insdc_bioproject_identifiers - insdc_experiment_identifiers DiMs: - description: A DataGeneration class for generating direct infusion mass spectrometry data. + description: A DataGeneration in which direct infusion mass spectrometry data is generated is_a: DataGeneration aliases: - DI-MS - Direct Infusion Mass Spectrometry LcMs: - description: A DataGeneration class for generating liquid chromatography-mass spectrometry data. + description: A DataGeneration in which liquid chromatography-mass spectrometry data is generated. is_a: DataGeneration aliases: - Liquid Chromatography mass spectrometry - LC-MS + exact_mapping: + - CHMO:0000524 LcMsMs: - description: A DataGeneration class for generating liquid chromatography with tandem mass spectrometry data. + description: A DataGeneration in which liquid chromatography with tandem mass spectrometry data is generated. is_a: DataGeneration aliases: - Liquid Chromatography with tandem mass spectrometry - LC-MS-MS - LC-MS/MS + exact_mapping: + - CHMO:0000701 GcMs: - description: A DataGeneration class for generating gas chromatography-mass spectrometry data. + description: A DataGeneration in which gas chromatography-mass spectrometry data is generated. is_a: DataGeneration aliases: - Gas Chromatography mass spectrometry - - GC-MS + - GC-MS + exact_mapping: + - CHMO:0000497 GcMsMs: - description: A DataGeneration class for generating gas chromatography with tandem mass spectrometry data. + description: A DataGeneration in which gas chromatography with tandem mass spectrometry data is generated. is_a: DataGeneration aliases: - Gas Chromatography with tandem mass spectrometry - GC-MS MS - GS-MS/MS + exact_mapping: + - CHMO:0002862 FunctionalAnnotationAggMember: slots: From bdfb4dae6127737dd5859e8a776ceb1176552185 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 10:43:45 -0800 Subject: [PATCH 03/15] Update nmdc.yaml Fix whitespace issue. --- src/schema/nmdc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index fc8ead2868..8ed706e3a7 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -185,7 +185,7 @@ classes: - GC-MS exact_mapping: - CHMO:0000497 - GcMsMs: + GcMsMs: description: A DataGeneration in which gas chromatography with tandem mass spectrometry data is generated. is_a: DataGeneration aliases: From ea8969b04c45ad974056f70f3d73573705eeab86 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 10:53:03 -0800 Subject: [PATCH 04/15] Update nmdc.yaml witespace debugging --- src/schema/nmdc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 8ed706e3a7..806d614ea6 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -149,7 +149,7 @@ classes: is_a: DataGeneration comments: For example data generated from an Illumina or Pacific Biosciences instrument. slots: - - gold_sequencing_project_identifiers + - gold_sequencing_project_identifiers - seq_meth - ncbi_project_name - insdc_bioproject_identifiers From d3d8c4dcb64761264446553180de833e10ec3996 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 10:58:08 -0800 Subject: [PATCH 05/15] Update nmdc.yaml Fixing exact_mapping to exact_mappings for DataGeneration subclasses --- src/schema/nmdc.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 806d614ea6..56d079113f 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -166,7 +166,7 @@ classes: aliases: - Liquid Chromatography mass spectrometry - LC-MS - exact_mapping: + exact_mappings: - CHMO:0000524 LcMsMs: description: A DataGeneration in which liquid chromatography with tandem mass spectrometry data is generated. @@ -175,7 +175,7 @@ classes: - Liquid Chromatography with tandem mass spectrometry - LC-MS-MS - LC-MS/MS - exact_mapping: + exact_mappings: - CHMO:0000701 GcMs: description: A DataGeneration in which gas chromatography-mass spectrometry data is generated. @@ -183,7 +183,7 @@ classes: aliases: - Gas Chromatography mass spectrometry - GC-MS - exact_mapping: + exact_mappings: - CHMO:0000497 GcMsMs: description: A DataGeneration in which gas chromatography with tandem mass spectrometry data is generated. @@ -192,7 +192,7 @@ classes: - Gas Chromatography with tandem mass spectrometry - GC-MS MS - GS-MS/MS - exact_mapping: + exact_mappings: - CHMO:0002862 FunctionalAnnotationAggMember: From 73a94e971125658fd425899fa86be40aaa7f1b4b Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 14:12:19 -0800 Subject: [PATCH 06/15] Update nmdc.yaml Remove tandem MS subclasses, add structured_pattern slots for DataGeneration subclasses --- src/schema/nmdc.yaml | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 56d079113f..35a1815a97 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -153,13 +153,22 @@ classes: - seq_meth - ncbi_project_name - insdc_bioproject_identifiers - - insdc_experiment_identifiers + slot_usage: + id: + structured_pattern: + syntax: "{id_nmdc_prefix}:dgns-{id_shoulder}-{id_blade}{id_version}{id_locus}" + interpolated: true DiMs: description: A DataGeneration in which direct infusion mass spectrometry data is generated is_a: DataGeneration aliases: - DI-MS - Direct Infusion Mass Spectrometry + slot_usage: + id: + structured_pattern: + syntax: "{id_nmdc_prefix}:dgdims-{id_shoulder}-{id_blade}{id_version}{id_locus}" + interpolated: true LcMs: description: A DataGeneration in which liquid chromatography-mass spectrometry data is generated. is_a: DataGeneration @@ -168,15 +177,11 @@ classes: - LC-MS exact_mappings: - CHMO:0000524 - LcMsMs: - description: A DataGeneration in which liquid chromatography with tandem mass spectrometry data is generated. - is_a: DataGeneration - aliases: - - Liquid Chromatography with tandem mass spectrometry - - LC-MS-MS - - LC-MS/MS - exact_mappings: - - CHMO:0000701 + slot_usage: + id: + structured_pattern: + syntax: "{id_nmdc_prefix}:dglcms-{id_shoulder}-{id_blade}{id_version}{id_locus}" + interpolated: true GcMs: description: A DataGeneration in which gas chromatography-mass spectrometry data is generated. is_a: DataGeneration @@ -185,15 +190,11 @@ classes: - GC-MS exact_mappings: - CHMO:0000497 - GcMsMs: - description: A DataGeneration in which gas chromatography with tandem mass spectrometry data is generated. - is_a: DataGeneration - aliases: - - Gas Chromatography with tandem mass spectrometry - - GC-MS MS - - GS-MS/MS - exact_mappings: - - CHMO:0002862 + slot_usage: + id: + structured_pattern: + syntax: "{id_nmdc_prefix}:dggcms-{id_shoulder}-{id_blade}{id_version}{id_locus}" + interpolated: true FunctionalAnnotationAggMember: slots: From 83258ce8dd12a2e16fcee85fdc30e29667a4c179 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 14:46:53 -0800 Subject: [PATCH 07/15] Update nmdc.yaml Updates to DataGeneration subclasses, adding MassSpectrometry, updating subclass names. --- src/schema/nmdc.yaml | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 35a1815a97..4117bbdde2 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -158,9 +158,17 @@ classes: structured_pattern: syntax: "{id_nmdc_prefix}:dgns-{id_shoulder}-{id_blade}{id_version}{id_locus}" interpolated: true - DiMs: - description: A DataGeneration in which direct infusion mass spectrometry data is generated + MassSpectrometry: + description: Spectrometry where the sample is converted into gaseous ions which are characterised by their mass-to-charge ratio and relative abundanc is_a: DataGeneration + abstract: true + exact_mappings: CHMO:0000470 + slots: + - aquisition_category + - resolution_category + DirectInfusionMassSpectrometry: + description: A DataGeneration in which direct infusion mass spectrometry data is generated + is_a: MassSpectrometry aliases: - DI-MS - Direct Infusion Mass Spectrometry @@ -169,9 +177,9 @@ classes: structured_pattern: syntax: "{id_nmdc_prefix}:dgdims-{id_shoulder}-{id_blade}{id_version}{id_locus}" interpolated: true - LcMs: + LiquidChromatographyMassSpectrometry: description: A DataGeneration in which liquid chromatography-mass spectrometry data is generated. - is_a: DataGeneration + is_a: MassSpectrometry aliases: - Liquid Chromatography mass spectrometry - LC-MS @@ -182,9 +190,9 @@ classes: structured_pattern: syntax: "{id_nmdc_prefix}:dglcms-{id_shoulder}-{id_blade}{id_version}{id_locus}" interpolated: true - GcMs: + GasChromatographyMassSpectrometry: description: A DataGeneration in which gas chromatography-mass spectrometry data is generated. - is_a: DataGeneration + is_a: MassSpectrometry aliases: - Gas Chromatography mass spectrometry - GC-MS From ed7c3a2a6f7f41311f0c4b19ff2cc4337a673fc5 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 14:58:45 -0800 Subject: [PATCH 08/15] Update basic_slots.yaml New slots acquisition_category and resolution_category --- src/schema/basic_slots.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/schema/basic_slots.yaml b/src/schema/basic_slots.yaml index 4dd7e4b3ef..66d76f6f6b 100644 --- a/src/schema/basic_slots.yaml +++ b/src/schema/basic_slots.yaml @@ -28,6 +28,15 @@ types: pattern: '^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$' slots: + acquisition_category: + domain: MassSpectrometry + range: AcquisitionCategoryEnum + description: Captures which type of aquisition category was on a mass spectrometry run. + comments: DDA vs DIA + resolution_category: + domain: MassSpectrometry + range: ResolutionCategoryEnum + description: Captures the resolution of the mass spectrometry run. processing_institution: domain: PlannedProcess range: processing_institution_enum From ac436a5738832843c9c3312353f1d40c8180a865 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 15:10:00 -0800 Subject: [PATCH 09/15] Update nmdc.yaml Adding enumerations for MassSpectrometry slots --- src/schema/nmdc.yaml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 4117bbdde2..2c7fa19895 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -164,7 +164,7 @@ classes: abstract: true exact_mappings: CHMO:0000470 slots: - - aquisition_category + - acquisition_category - resolution_category DirectInfusionMassSpectrometry: description: A DataGeneration in which direct infusion mass spectrometry data is generated @@ -1791,6 +1791,18 @@ classes: - https://casrai.org/credit/ enums: + AcquisitionCategoryEnum: + permissible_values: + Full_Scan: + alias: + - MS + Tandem_MassSpec: + alias: + -MSn + ResolutionCategoryEnum: + permissible_values: + High: { } + Low: { } InstrumentModelEnum: permissible_values: Orbitrap: From f17b1621040e4f3c54eb09d623468c9c87b593b0 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 15:13:46 -0800 Subject: [PATCH 10/15] Update nmdc.yaml Fixing typo of spelling of aliases --- src/schema/nmdc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 2c7fa19895..f79a589a94 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -1794,7 +1794,7 @@ enums: AcquisitionCategoryEnum: permissible_values: Full_Scan: - alias: + aliases: - MS Tandem_MassSpec: alias: From 6e9a143bbf531c23d179c3cd5aac98d2a2909081 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 15:20:21 -0800 Subject: [PATCH 11/15] Update nmdc.yaml Fixing more misspellings of aliases --- src/schema/nmdc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index f79a589a94..60001185a1 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -1797,7 +1797,7 @@ enums: aliases: - MS Tandem_MassSpec: - alias: + aliases: -MSn ResolutionCategoryEnum: permissible_values: From d59b527b12167bf44c98d2f41c2f74be2751e181 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 15:39:53 -0800 Subject: [PATCH 12/15] Update nmdc.yaml updates to permissible values for AcuisitionCategoryEnum and ResolutionCategoryEnum --- src/schema/nmdc.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 60001185a1..d62a211f4d 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -159,7 +159,7 @@ classes: syntax: "{id_nmdc_prefix}:dgns-{id_shoulder}-{id_blade}{id_version}{id_locus}" interpolated: true MassSpectrometry: - description: Spectrometry where the sample is converted into gaseous ions which are characterised by their mass-to-charge ratio and relative abundanc + description: Spectrometry where the sample is converted into gaseous ions which are characterised by their mass-to-charge ratio and relative abundance. is_a: DataGeneration abstract: true exact_mappings: CHMO:0000470 @@ -1793,16 +1793,16 @@ classes: enums: AcquisitionCategoryEnum: permissible_values: - Full_Scan: + full_scan: aliases: - MS - Tandem_MassSpec: + tandem_mass_spectrum: aliases: -MSn ResolutionCategoryEnum: permissible_values: - High: { } - Low: { } + high: { } + low: { } InstrumentModelEnum: permissible_values: Orbitrap: From e611de709f104149248d637c776d329ab59faa59 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 15:41:20 -0800 Subject: [PATCH 13/15] Update basic_slots.yaml updates to comments for slot acquisition_category --- src/schema/basic_slots.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/basic_slots.yaml b/src/schema/basic_slots.yaml index 66d76f6f6b..8b41686de5 100644 --- a/src/schema/basic_slots.yaml +++ b/src/schema/basic_slots.yaml @@ -32,7 +32,7 @@ slots: domain: MassSpectrometry range: AcquisitionCategoryEnum description: Captures which type of aquisition category was on a mass spectrometry run. - comments: DDA vs DIA + comments: MS vs MSn resolution_category: domain: MassSpectrometry range: ResolutionCategoryEnum From 6a44dac1d0f501f1ed39b3d30de681217da9d325 Mon Sep 17 00:00:00 2001 From: aclum Date: Tue, 12 Dec 2023 15:59:56 -0800 Subject: [PATCH 14/15] Update nmdc.yaml Getting rid of ms subclasses in favor of just MassSpectrometry --- src/schema/nmdc.yaml | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index a4e280fb1d..5d3bc835b7 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -161,47 +161,14 @@ classes: MassSpectrometry: description: Spectrometry where the sample is converted into gaseous ions which are characterised by their mass-to-charge ratio and relative abundance. is_a: DataGeneration - abstract: true exact_mappings: CHMO:0000470 slots: - acquisition_category - resolution_category - DirectInfusionMassSpectrometry: - description: A DataGeneration in which direct infusion mass spectrometry data is generated - is_a: MassSpectrometry - aliases: - - DI-MS - - Direct Infusion Mass Spectrometry - slot_usage: - id: - structured_pattern: - syntax: "{id_nmdc_prefix}:dgdims-{id_shoulder}-{id_blade}{id_version}{id_locus}" - interpolated: true - LiquidChromatographyMassSpectrometry: - description: A DataGeneration in which liquid chromatography-mass spectrometry data is generated. - is_a: MassSpectrometry - aliases: - - Liquid Chromatography mass spectrometry - - LC-MS - exact_mappings: - - CHMO:0000524 - slot_usage: - id: - structured_pattern: - syntax: "{id_nmdc_prefix}:dglcms-{id_shoulder}-{id_blade}{id_version}{id_locus}" - interpolated: true - GasChromatographyMassSpectrometry: - description: A DataGeneration in which gas chromatography-mass spectrometry data is generated. - is_a: MassSpectrometry - aliases: - - Gas Chromatography mass spectrometry - - GC-MS - exact_mappings: - - CHMO:0000497 slot_usage: id: structured_pattern: - syntax: "{id_nmdc_prefix}:dggcms-{id_shoulder}-{id_blade}{id_version}{id_locus}" + syntax: "{id_nmdc_prefix}:dgms-{id_shoulder}-{id_blade}{id_version}{id_locus}" interpolated: true FunctionalAnnotationAggMember: From 6af010f7bf1f129f8abf137cee40aeeff8acc660 Mon Sep 17 00:00:00 2001 From: aclum Date: Wed, 13 Dec 2023 09:05:54 -0800 Subject: [PATCH 15/15] Update basic_slots.yaml Fix spelling typo. --- src/schema/basic_slots.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/basic_slots.yaml b/src/schema/basic_slots.yaml index 8b41686de5..9432e15abe 100644 --- a/src/schema/basic_slots.yaml +++ b/src/schema/basic_slots.yaml @@ -31,7 +31,7 @@ slots: acquisition_category: domain: MassSpectrometry range: AcquisitionCategoryEnum - description: Captures which type of aquisition category was on a mass spectrometry run. + description: Captures which type of acquisition category was on a mass spectrometry run. comments: MS vs MSn resolution_category: domain: MassSpectrometry