diff --git a/notebooks/in-repo-anaggregateds-report.ipynb b/notebooks/in-repo-anaggregateds-report.ipynb new file mode 100644 index 0000000000..1a291a9c8b --- /dev/null +++ b/notebooks/in-repo-anaggregateds-report.ipynb @@ -0,0 +1,248 @@ +{ + "cells": [ + { + "cell_type": "raw", + "source": [ + "## This notebook just reports the nmdc-schema classes for which there's not slot to aggregate the class instances in a Database" + ], + "metadata": { + "collapsed": false + }, + "id": "c5ab1e5a4842b2c7" + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [], + "source": [ + "from linkml_runtime import SchemaView" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:48.257190841Z", + "start_time": "2023-10-26T17:35:48.213037258Z" + } + }, + "id": "9f4bdf7c057f2b62" + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [], + "source": [ + "local_schema_file = \"../src/schema/nmdc.yaml\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:48.727871066Z", + "start_time": "2023-10-26T17:35:48.713324998Z" + } + }, + "id": "15b8c7c5a5cbe272" + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [], + "source": [ + "target_class = \"Database\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:49.158342115Z", + "start_time": "2023-10-26T17:35:49.154137528Z" + } + }, + "id": "6e8eacd9ea93f492" + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [], + "source": [ + "schema_view = SchemaView(local_schema_file)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:50.067096805Z", + "start_time": "2023-10-26T17:35:49.550408252Z" + } + }, + "id": "58bc42c5d64a43" + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [ + { + "data": { + "text/plain": "'NMDC'" + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schema_view.schema.name" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:50.078747622Z", + "start_time": "2023-10-26T17:35:50.072438952Z" + } + }, + "id": "f0ec13d56a627848" + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [], + "source": [ + "nmdc_classes = schema_view.all_classes()\n", + "\n", + "nmdc_class_names = []" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.534377790Z", + "start_time": "2023-10-26T17:35:50.623681692Z" + } + }, + "id": "2edc737a2ecca55" + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [], + "source": [ + "for ck, cv in nmdc_classes.items():\n", + " nmdc_class_names.append(cv.name)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.585166952Z", + "start_time": "2023-10-26T17:35:53.537178124Z" + } + }, + "id": "7f51633981a8ed3c" + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [], + "source": [ + "database_slots = schema_view.class_induced_slots(target_class)\n", + "\n", + "database_slots_to_ranges = {}\n", + "database_slot_ranges = []" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.629344497Z", + "start_time": "2023-10-26T17:35:53.585007303Z" + } + }, + "id": "4f0dad8c4e071cd9" + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [], + "source": [ + "for i in database_slots:\n", + " database_slots_to_ranges[i.name] = i.range\n", + " database_slot_ranges.append(i.range)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.629584895Z", + "start_time": "2023-10-26T17:35:53.629062467Z" + } + }, + "id": "9dc38dc6f6ed34a4" + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [], + "source": [ + "non_database_classes = list(set(nmdc_class_names) - set(database_slot_ranges))\n", + "non_database_classes.sort()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.629714870Z", + "start_time": "2023-10-26T17:35:53.629308789Z" + } + }, + "id": "f1c81b6f02d0c47c" + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [ + { + "data": { + "text/plain": "['Activity',\n 'AnalyticalSample',\n 'AttributeValue',\n 'BiosampleProcessing',\n 'BooleanValue',\n 'ChemicalEntity',\n 'ControlledIdentifiedTermValue',\n 'ControlledTermValue',\n 'CreditAssociation',\n 'Database',\n 'EnvironmentalMaterialTerm',\n 'FunctionalAnnotationTerm',\n 'GeneProduct',\n 'GeolocationValue',\n 'ImageValue',\n 'Instrument',\n 'InstrumentValue',\n 'IntegerValue',\n 'LabDevice',\n 'MagBin',\n 'MaterialContainer',\n 'MaterialEntity',\n 'MetaboliteQuantification',\n 'MetatranscriptomeAnnotationActivity',\n 'MetatranscriptomeAssembly',\n 'NamedThing',\n 'OntologyClass',\n 'OrthologyGroup',\n 'Pathway',\n 'PeptideQuantification',\n 'PersonValue',\n 'PlannedProcess',\n 'ProteinQuantification',\n 'Protocol',\n 'QualityControlReport',\n 'QuantityValue',\n 'Reaction',\n 'ReactionParticipant',\n 'Site',\n 'TextValue',\n 'TimestampValue',\n 'UrlValue']" + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_database_classes" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-26T17:35:53.630475480Z", + "start_time": "2023-10-26T17:35:53.629442737Z" + } + }, + "id": "bf7d109513dd46db" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "3971d41bfafd02bf" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}