From 39d98c09f7c381a4266a3399c620c0f96b0ad3aa Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Mon, 11 Sep 2023 10:16:15 -0400 Subject: [PATCH 1/3] #1105 --- .../classes_with_no_database_aggregation.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 nmdc_schema/classes_with_no_database_aggregation.py diff --git a/nmdc_schema/classes_with_no_database_aggregation.py b/nmdc_schema/classes_with_no_database_aggregation.py new file mode 100644 index 0000000000..f14c23a65b --- /dev/null +++ b/nmdc_schema/classes_with_no_database_aggregation.py @@ -0,0 +1,28 @@ +import pprint + +from linkml_runtime import SchemaView +from linkml_runtime.dumpers import yaml_dumper + +schema_file = "../src/schema/nmdc.yaml" +target_class = "Database" + +nmdc_view = SchemaView(schema_file) + +nmdc_classes = nmdc_view.all_classes() + +nmdc_class_names = [] +for ck, cv in nmdc_classes.items(): + nmdc_class_names.append(cv.name) + +database_slots = nmdc_view.class_induced_slots(target_class) + +database_slots_to_ranges = {} +database_slot_ranges = [] +for i in database_slots: + database_slots_to_ranges[i.name] = i.range + database_slot_ranges.append(i.range) + + +non_database_classes = list(set(nmdc_class_names) - set(database_slot_ranges)) +non_database_classes.sort() +pprint.pprint(non_database_classes) From cd0d699290624bedcd2a6bb047a1671f3c15f2b5 Mon Sep 17 00:00:00 2001 From: Mark Andrew Miller Date: Thu, 26 Oct 2023 11:55:27 -0400 Subject: [PATCH 2/3] for testing with binder --- notebooks/incremental_test_for_binder.ipynb | 62 +++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 notebooks/incremental_test_for_binder.ipynb diff --git a/notebooks/incremental_test_for_binder.ipynb b/notebooks/incremental_test_for_binder.ipynb new file mode 100644 index 0000000000..1abc09487b --- /dev/null +++ b/notebooks/incremental_test_for_binder.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2023-10-26T15:54:39.234636532Z", + "start_time": "2023-10-26T15:54:39.212299021Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n" + ] + } + ], + "source": [ + "x = 1\n", + "y = 2\n", + "z = x + y\n", + "print(z)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "15b8c7c5a5cbe272" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 121e5336a8842ae21c687a29a03305212e80d71f Mon Sep 17 00:00:00 2001 From: Mark Andrew Miller Date: Thu, 26 Oct 2023 13:37:25 -0400 Subject: [PATCH 3/3] As notebook. jupyter not added to pyproject.toml --- .../classes_with_no_database_aggregation.py | 28 -- notebooks/in-repo-anaggregateds-report.ipynb | 248 ++++++++++++++++++ notebooks/incremental_test_for_binder.ipynb | 62 ----- 3 files changed, 248 insertions(+), 90 deletions(-) delete mode 100644 nmdc_schema/classes_with_no_database_aggregation.py create mode 100644 notebooks/in-repo-anaggregateds-report.ipynb delete mode 100644 notebooks/incremental_test_for_binder.ipynb diff --git a/nmdc_schema/classes_with_no_database_aggregation.py b/nmdc_schema/classes_with_no_database_aggregation.py deleted file mode 100644 index f14c23a65b..0000000000 --- a/nmdc_schema/classes_with_no_database_aggregation.py +++ /dev/null @@ -1,28 +0,0 @@ -import pprint - -from linkml_runtime import SchemaView -from linkml_runtime.dumpers import yaml_dumper - -schema_file = "../src/schema/nmdc.yaml" -target_class = "Database" - -nmdc_view = SchemaView(schema_file) - -nmdc_classes = nmdc_view.all_classes() - -nmdc_class_names = [] -for ck, cv in nmdc_classes.items(): - nmdc_class_names.append(cv.name) - -database_slots = nmdc_view.class_induced_slots(target_class) - -database_slots_to_ranges = {} -database_slot_ranges = [] -for i in database_slots: - database_slots_to_ranges[i.name] = i.range - database_slot_ranges.append(i.range) - - -non_database_classes = list(set(nmdc_class_names) - set(database_slot_ranges)) -non_database_classes.sort() -pprint.pprint(non_database_classes) diff --git a/notebooks/in-repo-anaggregateds-report.ipynb b/notebooks/in-repo-anaggregateds-report.ipynb new file mode 100644 index 0000000000..1a291a9c8b --- /dev/null +++ b/notebooks/in-repo-anaggregateds-report.ipynb @@ -0,0 +1,248 @@ +{ + "cells": [ + { + "cell_type": "raw", + "source": [ + "## This notebook just reports the nmdc-schema classes for which there's not slot to aggregate the class instances in a Database" + ], + "metadata": { + "collapsed": false + }, + "id": "c5ab1e5a4842b2c7" + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [], + "source": [ + "from linkml_runtime import SchemaView" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:48.257190841Z", + "start_time": "2023-10-26T17:35:48.213037258Z" + } + }, + "id": "9f4bdf7c057f2b62" + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [], + "source": [ + "local_schema_file = \"../src/schema/nmdc.yaml\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:48.727871066Z", + "start_time": "2023-10-26T17:35:48.713324998Z" + } + }, + "id": "15b8c7c5a5cbe272" + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [], + "source": [ + "target_class = \"Database\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:49.158342115Z", + "start_time": "2023-10-26T17:35:49.154137528Z" + } + }, + "id": "6e8eacd9ea93f492" + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [], + "source": [ + "schema_view = SchemaView(local_schema_file)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:50.067096805Z", + "start_time": "2023-10-26T17:35:49.550408252Z" + } + }, + "id": "58bc42c5d64a43" + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [ + { + "data": { + "text/plain": "'NMDC'" + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schema_view.schema.name" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:50.078747622Z", + "start_time": "2023-10-26T17:35:50.072438952Z" + } + }, + "id": "f0ec13d56a627848" + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [], + "source": [ + "nmdc_classes = schema_view.all_classes()\n", + "\n", + "nmdc_class_names = []" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.534377790Z", + "start_time": "2023-10-26T17:35:50.623681692Z" + } + }, + "id": "2edc737a2ecca55" + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [], + "source": [ + "for ck, cv in nmdc_classes.items():\n", + " nmdc_class_names.append(cv.name)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.585166952Z", + "start_time": "2023-10-26T17:35:53.537178124Z" + } + }, + "id": "7f51633981a8ed3c" + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [], + "source": [ + "database_slots = schema_view.class_induced_slots(target_class)\n", + "\n", + "database_slots_to_ranges = {}\n", + "database_slot_ranges = []" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.629344497Z", + "start_time": "2023-10-26T17:35:53.585007303Z" + } + }, + "id": "4f0dad8c4e071cd9" + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [], + "source": [ + "for i in database_slots:\n", + " database_slots_to_ranges[i.name] = i.range\n", + " database_slot_ranges.append(i.range)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.629584895Z", + "start_time": "2023-10-26T17:35:53.629062467Z" + } + }, + "id": "9dc38dc6f6ed34a4" + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [], + "source": [ + "non_database_classes = list(set(nmdc_class_names) - set(database_slot_ranges))\n", + "non_database_classes.sort()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.629714870Z", + "start_time": "2023-10-26T17:35:53.629308789Z" + } + }, + "id": "f1c81b6f02d0c47c" + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [ + { + "data": { + "text/plain": "['Activity',\n 'AnalyticalSample',\n 'AttributeValue',\n 'BiosampleProcessing',\n 'BooleanValue',\n 'ChemicalEntity',\n 'ControlledIdentifiedTermValue',\n 'ControlledTermValue',\n 'CreditAssociation',\n 'Database',\n 'EnvironmentalMaterialTerm',\n 'FunctionalAnnotationTerm',\n 'GeneProduct',\n 'GeolocationValue',\n 'ImageValue',\n 'Instrument',\n 'InstrumentValue',\n 'IntegerValue',\n 'LabDevice',\n 'MagBin',\n 'MaterialContainer',\n 'MaterialEntity',\n 'MetaboliteQuantification',\n 'MetatranscriptomeAnnotationActivity',\n 'MetatranscriptomeAssembly',\n 'NamedThing',\n 'OntologyClass',\n 'OrthologyGroup',\n 'Pathway',\n 'PeptideQuantification',\n 'PersonValue',\n 'PlannedProcess',\n 'ProteinQuantification',\n 'Protocol',\n 'QualityControlReport',\n 'QuantityValue',\n 'Reaction',\n 'ReactionParticipant',\n 'Site',\n 'TextValue',\n 'TimestampValue',\n 'UrlValue']" + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_database_classes" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.630475480Z", + "start_time": "2023-10-26T17:35:53.629442737Z" + } + }, + "id": "bf7d109513dd46db" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "3971d41bfafd02bf" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/incremental_test_for_binder.ipynb b/notebooks/incremental_test_for_binder.ipynb deleted file mode 100644 index 1abc09487b..0000000000 --- a/notebooks/incremental_test_for_binder.ipynb +++ /dev/null @@ -1,62 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "initial_id", - "metadata": { - "collapsed": true, - "ExecuteTime": { - "end_time": "2023-10-26T15:54:39.234636532Z", - "start_time": "2023-10-26T15:54:39.212299021Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3\n" - ] - } - ], - "source": [ - "x = 1\n", - "y = 2\n", - "z = x + y\n", - "print(z)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [], - "metadata": { - "collapsed": false - }, - "id": "15b8c7c5a5cbe272" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}