diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 2b41e5bb4..f61e575ed 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 7.3.2 +current_version = 7.3.3 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P[A-z0-9-]+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b33121d32..cf230c8af 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,36 +31,16 @@ repos: metamist/audit/README\.md )$ - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - name: isort (python) - - - repo: https://github.com/ambv/black - rev: 23.12.1 - hooks: - - id: black - args: [.] - pass_filenames: false - always_run: true - exclude: ^metamist/ - - - repo: https://github.com/PyCQA/flake8 - rev: "6.1.0" - hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear, flake8-quotes] - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.5.2 hooks: # Run the linter. - - id: ruff - args: [ --fix ] + # - id: ruff + # args: [ --fix ] # Run the formatter. - # - id: ruff-format + - id: ruff-format # Using system installation of pylint to support checking python module imports - repo: local diff --git a/api/graphql/schema.py b/api/graphql/schema.py index c95e9fefd..582a0230c 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -5,6 +5,7 @@ Note, we silence a lot of linting here because GraphQL looks at type annotations and defaults to decide the GraphQL schema, so it might not necessarily look correct. """ + import datetime from inspect import isclass diff --git a/api/routes/analysis.py b/api/routes/analysis.py index 5dc0f8a88..3d267ffcb 100644 --- a/api/routes/analysis.py +++ b/api/routes/analysis.py @@ -124,7 +124,11 @@ async def update_analysis( """Update status of analysis""" atable = AnalysisLayer(connection) await atable.update_analysis( - analysis_id, status=analysis.status, output=analysis.output, meta=analysis.meta + analysis_id, + status=analysis.status, + output=analysis.output, + meta=analysis.meta, + active=analysis.active, ) return True diff --git a/api/routes/project_insights.py b/api/routes/project_insights.py index ff35e6236..d74135b17 100644 --- a/api/routes/project_insights.py +++ b/api/routes/project_insights.py @@ -7,7 +7,12 @@ ProjectInsightsSummary, ) -router = APIRouter(prefix='/project-insights', tags=['project-insights',]) +router = APIRouter( + prefix='/project-insights', + tags=[ + 'project-insights', + ], +) @router.post( diff --git a/api/server.py b/api/server.py index 76671d340..4e61ba6bd 100644 --- a/api/server.py +++ b/api/server.py @@ -25,7 +25,7 @@ from db.python.utils import get_logger # This tag is automatically updated by bump2version -_VERSION = '7.3.2' +_VERSION = '7.3.3' logger = get_logger() diff --git a/codecov.yml b/codecov.yml index 5c34a4dc1..957eb68fe 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,7 +5,11 @@ coverage: # basic target: auto threshold: 0% + informational: true paths: - "db" - "scripts" - "metamist/parser" + patch: + default: + informational: true diff --git a/db/backup/backup.py b/db/backup/backup.py index 79feb8ec2..b9b751ad1 100644 --- a/db/backup/backup.py +++ b/db/backup/backup.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 # pylint: disable=broad-exception-caught,broad-exception-raised -""" Daily back up function for databases within a local -MariaDB instance """ +"""Daily back up function for databases within a local +MariaDB instance""" import json import os diff --git a/db/backup/recovery_test.py b/db/backup/recovery_test.py index 0c79fd7c1..f568b2992 100644 --- a/db/backup/recovery_test.py +++ b/db/backup/recovery_test.py @@ -1,4 +1,4 @@ -""" A script to test that tests the validity of a database backup +"""A script to test that tests the validity of a database backup in the event of recovery. NOTE: DO NOT RUN THIS SCRIPT ON A PRODUCTION SERVER. It will drop the local mysql database after each run. diff --git a/db/backup/restore.py b/db/backup/restore.py index f4c7c446c..3ee18230b 100644 --- a/db/backup/restore.py +++ b/db/backup/restore.py @@ -1,5 +1,5 @@ -""" A script to restore the database instance to the latest -backup """ +"""A script to restore the database instance to the latest +backup""" import os import subprocess diff --git a/db/deploy/main.py b/db/deploy/main.py index b77b465a1..46ef73164 100644 --- a/db/deploy/main.py +++ b/db/deploy/main.py @@ -22,10 +22,14 @@ changelog_file = 'project.xml' -def read_db_credentials(env: Literal['prod', 'dev']) -> Dict[Literal['dbname', 'username', 'password', 'host'], str]: +def read_db_credentials( + env: Literal['prod', 'dev'], +) -> Dict[Literal['dbname', 'username', 'password', 'host'], str]: """Get database credentials from Secret Manager.""" try: - secret_path = SECRET_CLIENT.secret_version_path(SECRET_PROJECT, SECRET_NAME, 'latest') + secret_path = SECRET_CLIENT.secret_version_path( + SECRET_PROJECT, SECRET_NAME, 'latest' + ) response = SECRET_CLIENT.access_secret_version(request={'name': secret_path}) return json.loads(response.payload.data.decode('UTF-8'))[env] except Exception as e: # Broad exception for example; refine as needed @@ -35,7 +39,10 @@ def read_db_credentials(env: Literal['prod', 'dev']) -> Dict[Literal['dbname', ' @app.post('/execute-liquibase') -async def execute_liquibase(request: Request, environment: Literal['prod', 'dev'] = Query(default='dev', regex='^(prod|dev)$')): +async def execute_liquibase( + request: Request, + environment: Literal['prod', 'dev'] = Query(default='dev', regex='^(prod|dev)$'), +): """Endpoint to remotely trigger Liquibase commands on a GCP VM using XML content.""" xml_content = await request.body() @@ -67,10 +74,25 @@ async def execute_liquibase(request: Request, environment: Literal['prod', 'dev' try: # Execute the gcloud command - result = subprocess.run(liquibase_command, check=True, capture_output=True, text=True, env={'LIQUIBASE_COMMAND_PASSWORD': db_password, 'LIQUIBASE_COMMAND_USERNAME': db_username, **os.environ},) - logger.log_text(f'Liquibase update successful: {result.stdout}', severity='INFO') + result = subprocess.run( + liquibase_command, + check=True, + capture_output=True, + text=True, + env={ + 'LIQUIBASE_COMMAND_PASSWORD': db_password, + 'LIQUIBASE_COMMAND_USERNAME': db_username, + **os.environ, + }, + ) + logger.log_text( + f'Liquibase update successful: {result.stdout}', severity='INFO' + ) os.remove(temp_file_path) - return {'message': 'Liquibase update executed successfully', 'output': result.stdout} + return { + 'message': 'Liquibase update executed successfully', + 'output': result.stdout, + } except subprocess.CalledProcessError as e: text = f'Failed to execute Liquibase update: {e.stderr}' logger.log_text(text, severity='ERROR') @@ -79,4 +101,5 @@ async def execute_liquibase(request: Request, environment: Literal['prod', 'dev' if __name__ == '__main__': import uvicorn + uvicorn.run(app, host='0.0.0.0', port=int(os.environ.get('PORT', 8080))) diff --git a/db/python/connect.py b/db/python/connect.py index 0fe2abf90..ece50699c 100644 --- a/db/python/connect.py +++ b/db/python/connect.py @@ -3,6 +3,7 @@ """ Code for connecting to Postgres database """ + import abc import asyncio import json @@ -229,7 +230,6 @@ async def audit_log_id(self): async with self._audit_log_lock: if not self._audit_log_id: - # make this import here, otherwise we'd have a circular import from db.python.tables.audit_log import ( # pylint: disable=import-outside-toplevel,R0401 AuditLogTable, @@ -328,9 +328,9 @@ def get_connection_string(self): if self.port: _host += f':{self.port}' - options: dict[str, str | int] = ( - {} - ) # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} + options: dict[ + str, str | int + ] = {} # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} _options = '&'.join(f'{k}={v}' for k, v in options.items()) url = f'mysql://{u_p}@{_host}/{self.dbname}?{_options}' diff --git a/db/python/layers/analysis.py b/db/python/layers/analysis.py index f136d49b1..823b66a4b 100644 --- a/db/python/layers/analysis.py +++ b/db/python/layers/analysis.py @@ -572,6 +572,7 @@ async def update_analysis( status: AnalysisStatus, meta: dict[str, Any] = None, output: str | None = None, + active: bool | None = None, ): """ Update the status of an analysis, set timestamp_completed if relevant @@ -586,4 +587,5 @@ async def update_analysis( status=status, meta=meta, output=output, + active=active, ) diff --git a/db/python/layers/family.py b/db/python/layers/family.py index fcaa218a0..1c027d3b8 100644 --- a/db/python/layers/family.py +++ b/db/python/layers/family.py @@ -275,11 +275,13 @@ async def import_pedigree( missing_external_family_ids = [ f for f in external_family_ids if f not in external_family_id_map ] - external_participant_ids_map = await participant_table.get_id_map_by_external_ids( - list(external_participant_ids), - project=self.connection.project_id, - # Allow missing participants if we're creating them - allow_missing=create_missing_participants, + external_participant_ids_map = ( + await participant_table.get_id_map_by_external_ids( + list(external_participant_ids), + project=self.connection.project_id, + # Allow missing participants if we're creating them + allow_missing=create_missing_participants, + ) ) async with self.connection.connection.transaction(): diff --git a/db/python/layers/project_insights.py b/db/python/layers/project_insights.py index fd4b9719b..c7ec38355 100644 --- a/db/python/layers/project_insights.py +++ b/db/python/layers/project_insights.py @@ -1,5 +1,5 @@ # mypy: disable-error-code="attr-defined,arg-type,index,call-overload" -# pylint: disable=too-many-arguments,too-many-locals,missing-class-docstring +# pylint: disable=too-many-arguments,too-many-locals,missing-class-docstring,too-many-lines import asyncio import itertools import json @@ -151,11 +151,16 @@ def convert_to_external_ids(self, external_ids_value: str | list[str]) -> list[s return [external_ids_value] return external_ids_value - def parse_project_seqtype_technology_keyed_rows(self, rows: list[Record], value_field: str) -> dict[ProjectSeqTypeTechnologyKey, Any]: + def parse_project_seqtype_technology_keyed_rows( + self, rows: list[Record], value_field: str + ) -> dict[ProjectSeqTypeTechnologyKey, Any]: """ Parse rows that are keyed by project, sequencing type, and sequencing technology """ - parsed_rows: dict[ProjectSeqTypeTechnologyKey, dict[str, Any] | list[SequencingGroupInternalId]] = {} + parsed_rows: dict[ + ProjectSeqTypeTechnologyKey, + dict[str, Any] | list[SequencingGroupInternalId], + ] = {} for row in rows: key = ProjectSeqTypeTechnologyKey( row['project'], @@ -191,7 +196,9 @@ async def _get_sequencing_groups_by_analysis_ids( 'analysis_ids': analysis_ids, }, ) - sequencing_groups_by_analysis_id: dict[AnalysisId, list[SequencingGroupInternalId]] = {} + sequencing_groups_by_analysis_id: dict[ + AnalysisId, list[SequencingGroupInternalId] + ] = {} for row in _query_results: sequencing_groups_by_analysis_id[row['analysis_id']] = [ int(sgid) for sgid in row['sequencing_group_ids'].split(',') @@ -199,7 +206,9 @@ async def _get_sequencing_groups_by_analysis_ids( return sequencing_groups_by_analysis_id - async def get_analysis_sequencing_groups(self, grouped_analysis_rows: list[AnalysisRow]): + async def get_analysis_sequencing_groups( + self, grouped_analysis_rows: list[AnalysisRow] + ): """ Get the analysis IDs from the group analysis rows, which is a list of analysis record dicts """ @@ -211,7 +220,13 @@ async def get_analysis_sequencing_groups(self, grouped_analysis_rows: list[Analy analyses_to_query_sequencing_groups ) - def get_report_url(self, project_name: str, sequencing_group_id: SequencingGroupInternalId, output: str, stage: str): + def get_report_url( + self, + project_name: str, + sequencing_group_id: SequencingGroupInternalId, + output: str, + stage: str, + ): """Converts an analysis output gs path to a web report link""" sg_id = sequencing_group_id_format(sequencing_group_id) if 'main-web' in output: @@ -240,21 +255,33 @@ def get_sg_web_report_links( if stripy_report := sequencing_group_stripy_reports.get(report_key): report_links['stripy'] = { - 'url': self.get_report_url(project.name, sequencing_group_id, stripy_report.output, 'Stripy'), + 'url': self.get_report_url( + project.name, sequencing_group_id, stripy_report.output, 'Stripy' + ), 'outliers_detected': stripy_report.outliers_detected, - 'outlier_loci': json.loads(stripy_report.outlier_loci) if stripy_report.outlier_loci else None, + 'outlier_loci': ( + json.loads(stripy_report.outlier_loci) + if stripy_report.outlier_loci + else None + ), 'timestamp_completed': stripy_report.timestamp_completed.isoformat(), } if mito_report := sequencing_group_mito_reports.get(report_key): report_links['mito'] = { - 'url': self.get_report_url(project.name, sequencing_group_id, mito_report.output, 'MitoReport'), + 'url': self.get_report_url( + project.name, sequencing_group_id, mito_report.output, 'MitoReport' + ), 'timestamp_completed': mito_report.timestamp_completed.isoformat(), } return report_links - def get_analysis_stats_internal_from_record(self, analysis_row: AnalysisRow | None, analysis_sequencing_groups: dict[AnalysisId, list[SequencingGroupInternalId]]) -> AnalysisStatsInternal | None: + def get_analysis_stats_internal_from_record( + self, + analysis_row: AnalysisRow | None, + analysis_sequencing_groups: dict[AnalysisId, list[SequencingGroupInternalId]], + ) -> AnalysisStatsInternal | None: """Transforms an analysis row record into an AnalysisStatsInternal object""" if not analysis_row: return None @@ -280,7 +307,9 @@ def get_insights_summary_internal_row( latest_sv_es_index_analysis: AnalysisRow | None, ) -> ProjectInsightsSummaryInternal: """Returns a ProjectInsightsSummaryInternal object from the given data""" - latest_annotate_dataset = self.get_analysis_stats_internal_from_record(latest_annotate_dataset_analysis, analysis_sequencing_groups) + latest_annotate_dataset = self.get_analysis_stats_internal_from_record( + latest_annotate_dataset_analysis, analysis_sequencing_groups + ) latest_snv_es_index = self.get_analysis_stats_internal_from_record( latest_snv_es_index_analysis, analysis_sequencing_groups ) @@ -325,12 +354,20 @@ def get_insights_details_internal_row( project, sequencing_group_details.sequencing_group_id, ) - sgs_in_latest_annotate_dataset = analysis_sequencing_groups.get(latest_annotate_dataset_id, []) - sgs_in_latest_snv_es_index = analysis_sequencing_groups.get(latest_snv_es_index_id, []) - sgs_in_latest_sv_es_index = analysis_sequencing_groups.get(latest_sv_es_index_id, []) + sgs_in_latest_annotate_dataset = analysis_sequencing_groups.get( + latest_annotate_dataset_id, [] + ) + sgs_in_latest_snv_es_index = analysis_sequencing_groups.get( + latest_snv_es_index_id, [] + ) + sgs_in_latest_sv_es_index = analysis_sequencing_groups.get( + latest_sv_es_index_id, [] + ) # participant_ext_ids = self.convert_to_external_ids(sequencing_group_details.participant_external_id) - sample_ext_ids = self.convert_to_external_ids(sequencing_group_details.sample_external_ids) + sample_ext_ids = self.convert_to_external_ids( + sequencing_group_details.sample_external_ids + ) return ProjectInsightsDetailsInternal( project=project.id, dataset=project.name, @@ -345,10 +382,14 @@ def get_insights_details_internal_row( sample_id=sequencing_group_details.sample_id, sample_ext_ids=sample_ext_ids, sequencing_group_id=sequencing_group_details.sequencing_group_id, - completed_cram=sequencing_group_details.sequencing_group_id in sequencing_groups_with_crams, - in_latest_annotate_dataset=sequencing_group_details.sequencing_group_id in sgs_in_latest_annotate_dataset, - in_latest_snv_es_index=sequencing_group_details.sequencing_group_id in sgs_in_latest_snv_es_index, - in_latest_sv_es_index=sequencing_group_details.sequencing_group_id in sgs_in_latest_sv_es_index, + completed_cram=sequencing_group_details.sequencing_group_id + in sequencing_groups_with_crams, + in_latest_annotate_dataset=sequencing_group_details.sequencing_group_id + in sgs_in_latest_annotate_dataset, + in_latest_snv_es_index=sequencing_group_details.sequencing_group_id + in sgs_in_latest_snv_es_index, + in_latest_sv_es_index=sequencing_group_details.sequencing_group_id + in sgs_in_latest_sv_es_index, web_reports=web_reports, ) @@ -383,7 +424,9 @@ async def _total_families_by_project_id_and_seq_fields( 'sequencing_types': sequencing_types, }, ) - return self.parse_project_seqtype_technology_keyed_rows(_query_results, 'num_families') + return self.parse_project_seqtype_technology_keyed_rows( + _query_results, 'num_families' + ) async def _total_participants_by_project_id_and_seq_fields( self, project_ids: list[ProjectId], sequencing_types: list[SequencingType] @@ -413,7 +456,9 @@ async def _total_participants_by_project_id_and_seq_fields( 'sequencing_types': sequencing_types, }, ) - return self.parse_project_seqtype_technology_keyed_rows(_query_results, 'num_participants') + return self.parse_project_seqtype_technology_keyed_rows( + _query_results, 'num_participants' + ) async def _total_samples_by_project_id_and_seq_fields( self, project_ids: list[ProjectId], sequencing_types: list[SequencingType] @@ -442,7 +487,9 @@ async def _total_samples_by_project_id_and_seq_fields( 'sequencing_types': sequencing_types, }, ) - return self.parse_project_seqtype_technology_keyed_rows(_query_results, 'num_samples') + return self.parse_project_seqtype_technology_keyed_rows( + _query_results, 'num_samples' + ) async def _total_sequencing_groups_by_project_id_and_seq_fields( self, project_ids: list[ProjectId], sequencing_types: list[SequencingType] @@ -471,7 +518,9 @@ async def _total_sequencing_groups_by_project_id_and_seq_fields( 'sequencing_types': sequencing_types, }, ) - return self.parse_project_seqtype_technology_keyed_rows(_query_results, 'num_sgs') + return self.parse_project_seqtype_technology_keyed_rows( + _query_results, 'num_sgs' + ) async def _crams_by_project_id_and_seq_fields( self, @@ -506,7 +555,9 @@ async def _crams_by_project_id_and_seq_fields( 'sequencing_types': sequencing_types, }, ) - return self.parse_project_seqtype_technology_keyed_rows(_query_results, 'sequencing_group_ids') + return self.parse_project_seqtype_technology_keyed_rows( + _query_results, 'sequencing_group_ids' + ) async def _latest_annotate_dataset_by_project_id_and_seq_type( self, project_ids: list[ProjectId], sequencing_types: list[str] @@ -548,10 +599,14 @@ async def _latest_annotate_dataset_by_project_id_and_seq_type( 'sequencing_types': sequencing_types, }, ) - latest_annotate_dataset_by_project_id_and_seq_type: dict[ProjectSeqTypeKey, AnalysisRow] = {} + latest_annotate_dataset_by_project_id_and_seq_type: dict[ + ProjectSeqTypeKey, AnalysisRow + ] = {} for row in _query_results: key = ProjectSeqTypeKey(row['project'], row['sequencing_type']) - latest_annotate_dataset_by_project_id_and_seq_type[key] = self.get_analysis_row(row) + latest_annotate_dataset_by_project_id_and_seq_type[key] = ( + self.get_analysis_row(row) + ) return latest_annotate_dataset_by_project_id_and_seq_type async def _latest_es_indices_by_project_id_and_seq_type_and_stage( @@ -589,10 +644,16 @@ async def _latest_es_indices_by_project_id_and_seq_type_and_stage( 'sequencing_types': sequencing_types, }, ) - latest_es_indices_by_project_id_and_seq_type_and_stage: dict[ProjectSeqTypeStageKey, AnalysisRow] = {} + latest_es_indices_by_project_id_and_seq_type_and_stage: dict[ + ProjectSeqTypeStageKey, AnalysisRow + ] = {} for row in _query_results: - key = ProjectSeqTypeStageKey(row['project'], row['sequencing_type'], row['stage']) - latest_es_indices_by_project_id_and_seq_type_and_stage[key] = self.get_analysis_row(row) + key = ProjectSeqTypeStageKey( + row['project'], row['sequencing_type'], row['stage'] + ) + latest_es_indices_by_project_id_and_seq_type_and_stage[key] = ( + self.get_analysis_row(row) + ) return latest_es_indices_by_project_id_and_seq_type_and_stage # Project Insights details queries @@ -640,7 +701,9 @@ async def _sequencing_group_details_by_project_and_seq_fields( 'sequencing_types': sequencing_types, }, ) - sequencing_group_details_by_project_id_and_seq_fields: dict[ProjectSeqTypeTechnologyPlatformKey, list[SequencingGroupDetailRow]] = {} + sequencing_group_details_by_project_id_and_seq_fields: dict[ + ProjectSeqTypeTechnologyPlatformKey, list[SequencingGroupDetailRow] + ] = {} for row in _query_results: key = ProjectSeqTypeTechnologyPlatformKey( row['project'], @@ -665,7 +728,9 @@ async def _sequencing_group_details_by_project_and_seq_fields( return sequencing_group_details_by_project_id_and_seq_fields - async def _details_stripy_reports(self, project_ids: list[ProjectId]) -> dict[ProjectSeqGroupKey, StripyReportRow]: + async def _details_stripy_reports( + self, project_ids: list[ProjectId] + ) -> dict[ProjectSeqGroupKey, StripyReportRow]: """Get stripy web report links""" _query = """ SELECT @@ -709,7 +774,9 @@ async def _details_stripy_reports(self, project_ids: list[ProjectId]) -> dict[Pr return stripy_reports - async def _details_mito_reports(self, project_ids: list[ProjectId]) -> dict[ProjectSeqGroupKey, AnalysisRow]: + async def _details_mito_reports( + self, project_ids: list[ProjectId] + ) -> dict[ProjectSeqGroupKey, AnalysisRow]: """Get mito web report links""" _query = """ SELECT @@ -750,25 +817,43 @@ def get_latest_grouped_analyses( project: Project, sequencing_type: SequencingType, sequencing_technology: SequencingTechnology, - latest_annotate_dataset_by_project_id_and_seq_type: dict[ProjectSeqTypeKey, AnalysisRow], - latest_es_indices_by_project_id_and_seq_type_and_stage: dict[ProjectSeqTypeStageKey, AnalysisRow], + latest_annotate_dataset_by_project_id_and_seq_type: dict[ + ProjectSeqTypeKey, AnalysisRow + ], + latest_es_indices_by_project_id_and_seq_type_and_stage: dict[ + ProjectSeqTypeStageKey, AnalysisRow + ], ): """Returns the latest grouped analyses for a project, sequencing type, and technology""" if sequencing_technology == 'short-read': - latest_annotate_dataset_row = latest_annotate_dataset_by_project_id_and_seq_type.get( - ProjectSeqTypeKey(project.id, sequencing_type) + latest_annotate_dataset_row = ( + latest_annotate_dataset_by_project_id_and_seq_type.get( + ProjectSeqTypeKey(project.id, sequencing_type) + ) ) - latest_snv_es_index_row = latest_es_indices_by_project_id_and_seq_type_and_stage.get( - ProjectSeqTypeStageKey(project.id, sequencing_type, 'MtToEs') + latest_snv_es_index_row = ( + latest_es_indices_by_project_id_and_seq_type_and_stage.get( + ProjectSeqTypeStageKey(project.id, sequencing_type, 'MtToEs') + ) ) - latest_sv_es_index_row = latest_es_indices_by_project_id_and_seq_type_and_stage.get( - ProjectSeqTypeStageKey(project.id, sequencing_type, SV_INDEX_SEQ_TYPE_STAGE_MAP.get(sequencing_type)) + latest_sv_es_index_row = ( + latest_es_indices_by_project_id_and_seq_type_and_stage.get( + ProjectSeqTypeStageKey( + project.id, + sequencing_type, + SV_INDEX_SEQ_TYPE_STAGE_MAP.get(sequencing_type), + ) + ) ) else: latest_annotate_dataset_row = None latest_snv_es_index_row = None latest_sv_es_index_row = None - return latest_annotate_dataset_row, latest_snv_es_index_row, latest_sv_es_index_row + return ( + latest_annotate_dataset_row, + latest_snv_es_index_row, + latest_sv_es_index_row, + ) # Main functions async def get_project_insights_summary( @@ -814,8 +899,8 @@ async def get_project_insights_summary( # Get the sequencing groups for each of the analyses in the grouped analyses rows analysis_sequencing_groups = await self.get_analysis_sequencing_groups( ( - list(latest_annotate_dataset_by_project_id_and_seq_type.values()) + - list(latest_es_indices_by_project_id_and_seq_type_and_stage.values()) + list(latest_annotate_dataset_by_project_id_and_seq_type.values()) + + list(latest_es_indices_by_project_id_and_seq_type_and_stage.values()) ) ) @@ -827,15 +912,17 @@ async def get_project_insights_summary( response = [] for project, seq_type, seq_tech in combinations: - rowkey = ProjectSeqTypeTechnologyKey( - project.id, seq_type, seq_tech - ) + rowkey = ProjectSeqTypeTechnologyKey(project.id, seq_type, seq_tech) - total_sequencing_groups = total_sequencing_groups_by_project_id_and_seq_fields.get(rowkey, 0) + total_sequencing_groups = ( + total_sequencing_groups_by_project_id_and_seq_fields.get(rowkey, 0) + ) if total_sequencing_groups == 0: continue - crams_in_project_with_sequencing_fields = crams_by_project_id_and_seq_fields.get(rowkey, []) + crams_in_project_with_sequencing_fields = ( + crams_by_project_id_and_seq_fields.get(rowkey, []) + ) ( latest_annotate_dataset_row, latest_snv_es_index_row, @@ -857,7 +944,9 @@ async def get_project_insights_summary( summary_row_key=rowkey, project=project, total_families=total_families_by_project_id_and_seq_fields[rowkey], - total_participants=total_participants_by_project_id_and_seq_fields[rowkey], + total_participants=total_participants_by_project_id_and_seq_fields[ + rowkey + ], total_samples=total_samples_by_project_id_and_seq_fields[rowkey], total_sequencing_groups=total_sequencing_groups, crams=crams_in_project_with_sequencing_fields, @@ -887,7 +976,9 @@ async def get_project_insights_details( sequencing_group_stripy_reports, sequencing_group_mito_reports, ) = await asyncio.gather( - self._sequencing_group_details_by_project_and_seq_fields(project_ids, sequencing_types), + self._sequencing_group_details_by_project_and_seq_fields( + project_ids, sequencing_types + ), self._crams_by_project_id_and_seq_fields(project_ids, sequencing_types), self._latest_annotate_dataset_by_project_id_and_seq_type( project_ids, sequencing_types @@ -901,15 +992,13 @@ async def get_project_insights_details( # Get the sequencing groups for each of the analyses in the grouped analyses rows analysis_sequencing_groups = await self.get_analysis_sequencing_groups( ( - list(latest_annotate_dataset_by_project_id_and_seq_type.values()) + - list(latest_es_indices_by_project_id_and_seq_type_and_stage.values()) + list(latest_annotate_dataset_by_project_id_and_seq_type.values()) + + list(latest_es_indices_by_project_id_and_seq_type_and_stage.values()) ) ) sequencing_platforms = await SeqPlatformTable(self._connection).get() - sequencing_technologies = await SeqTechTable( - self._connection - ).get() + sequencing_technologies = await SeqTechTable(self._connection).get() # Get all possible combinations of the projects, sequencing types, platforms, and technologies combinations = itertools.product( @@ -924,20 +1013,27 @@ async def get_project_insights_details( seq_tech, ) in combinations: details_rows: list[SequencingGroupDetailRow] - if not (details_rows := sequencing_group_details_by_project_id_and_seq_fields.get((project.id, seq_type, seq_platform, seq_tech))): + if not ( + details_rows + := sequencing_group_details_by_project_id_and_seq_fields.get( + (project.id, seq_type, seq_platform, seq_tech) + ) + ): continue - sequencing_groups_with_crams = crams_by_project_id_and_seq_fields.get((project.id, seq_type, seq_tech), []) + sequencing_groups_with_crams = crams_by_project_id_and_seq_fields.get( + (project.id, seq_type, seq_tech), [] + ) ( latest_annotate_dataset_row, latest_snv_es_index_row, - latest_sv_es_index_row + latest_sv_es_index_row, ) = self.get_latest_grouped_analyses( project, seq_type, seq_tech, latest_annotate_dataset_by_project_id_and_seq_type, - latest_es_indices_by_project_id_and_seq_type_and_stage + latest_es_indices_by_project_id_and_seq_type_and_stage, ) for details_row in details_rows: @@ -952,9 +1048,21 @@ async def get_project_insights_details( sequencing_group_details=details_row, sequencing_groups_with_crams=sequencing_groups_with_crams, analysis_sequencing_groups=analysis_sequencing_groups, - latest_annotate_dataset_id=latest_annotate_dataset_row.id if latest_annotate_dataset_row else None, - latest_snv_es_index_id=latest_snv_es_index_row.id if latest_snv_es_index_row else None, - latest_sv_es_index_id=latest_sv_es_index_row.id if latest_sv_es_index_row else None, + latest_annotate_dataset_id=( + latest_annotate_dataset_row.id + if latest_annotate_dataset_row + else None + ), + latest_snv_es_index_id=( + latest_snv_es_index_row.id + if latest_snv_es_index_row + else None + ), + latest_sv_es_index_id=( + latest_sv_es_index_row.id + if latest_sv_es_index_row + else None + ), stripy_reports=sequencing_group_stripy_reports, mito_reports=sequencing_group_mito_reports, ) diff --git a/db/python/layers/sample.py b/db/python/layers/sample.py index 1b4c86fc7..3018edd71 100644 --- a/db/python/layers/sample.py +++ b/db/python/layers/sample.py @@ -361,7 +361,6 @@ def unwrap_nested_samples( new_round = [] round_idx += 1 for root, parent, nested_samples in prev_round: - for sample in nested_samples: retval.append( SampleLayer.UnwrappedSample( diff --git a/db/python/tables/project.py b/db/python/tables/project.py index 74fa1c74f..6efd859d9 100644 --- a/db/python/tables/project.py +++ b/db/python/tables/project.py @@ -288,7 +288,6 @@ async def set_project_members( """ async with self.connection.transaction(): - # Get existing rows so that we can keep the existing audit log ids existing_rows = await self.connection.fetch_all( """ diff --git a/db/python/tables/sample.py b/db/python/tables/sample.py index 54135a957..f228ec944 100644 --- a/db/python/tables/sample.py +++ b/db/python/tables/sample.py @@ -176,7 +176,7 @@ async def get_sample_by_id( ) if not samples: raise NotFoundError( - f'Couldn\'t find sample with internal id {internal_id} (CPG id: {sample_id_format(internal_id)})' + f"Couldn't find sample with internal id {internal_id} (CPG id: {sample_id_format(internal_id)})" ) return projects.pop(), samples.pop() @@ -197,7 +197,7 @@ async def get_single_by_external_id( if not samples: raise NotFoundError( - f'Couldn\'t find sample with external id {external_id} in project {project}' + f"Couldn't find sample with external id {external_id} in project {project}" ) return samples.pop() diff --git a/deploy/python/version.txt b/deploy/python/version.txt index eab246c06..44e98ada9 100644 --- a/deploy/python/version.txt +++ b/deploy/python/version.txt @@ -1 +1 @@ -7.3.2 +7.3.3 diff --git a/metamist/audit/audithelper.py b/metamist/audit/audithelper.py index 7b58fb2a1..cf568698c 100644 --- a/metamist/audit/audithelper.py +++ b/metamist/audit/audithelper.py @@ -16,9 +16,7 @@ class AuditHelper(CloudHelper): """General helper class for bucket auditing""" EXCLUDED_SGS: set[str] = set( - sg for sg in - os.getenv('SM_AUDIT_EXCLUDED_SGS', '').split(',') - if sg + sg for sg in os.getenv('SM_AUDIT_EXCLUDED_SGS', '').split(',') if sg ) @staticmethod diff --git a/metamist/audit/generic_auditor.py b/metamist/audit/generic_auditor.py index c69eacf40..c154db431 100644 --- a/metamist/audit/generic_auditor.py +++ b/metamist/audit/generic_auditor.py @@ -137,7 +137,7 @@ async def get_participant_data_for_dataset(self) -> list[dict]: @staticmethod def get_most_recent_analyses_by_sg( - analyses_list: list[dict[str, Any]] + analyses_list: list[dict[str, Any]], ) -> dict[str, dict[str, Any]]: """ Takes a list of completed analyses for a number of sequencing groups and returns the latest diff --git a/metamist/graphql/__init__.py b/metamist/graphql/__init__.py index d4324067f..1bfd6fee3 100644 --- a/metamist/graphql/__init__.py +++ b/metamist/graphql/__init__.py @@ -146,7 +146,7 @@ def validate(doc: DocumentNode, client=None, use_local_schema=False): backoff.fibo, exception=(HTTPError, JSONDecodeError, TransportServerError), max_time=55, - max_value=55 + max_value=55, ) def query( _query: str | DocumentNode, diff --git a/metamist/parser/generic_metadata_parser.py b/metamist/parser/generic_metadata_parser.py index 1f4f5ded2..1cd8c3ba2 100644 --- a/metamist/parser/generic_metadata_parser.py +++ b/metamist/parser/generic_metadata_parser.py @@ -819,10 +819,12 @@ async def get_assays_from_group( assays = [] - read_filenames, read_checksums, reference_assemblies = ( - await self.get_read_and_ref_files_and_checksums( - sample.primary_external_id, rows - ) + ( + read_filenames, + read_checksums, + reference_assemblies, + ) = await self.get_read_and_ref_files_and_checksums( + sample.primary_external_id, rows ) # strip in case collaborator put "file1, file2" diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py index 631226253..0aece2af4 100644 --- a/metamist/parser/generic_parser.py +++ b/metamist/parser/generic_parser.py @@ -475,9 +475,7 @@ def wrapper(*args, **kwargs): return wrapper -class GenericParser( - CloudHelper -): # pylint: disable=too-many-public-methods,too-many-arguments +class GenericParser(CloudHelper): # pylint: disable=too-many-public-methods,too-many-arguments """Parser for ingesting rows of metadata""" def __init__( # pylint: disable=too-many-arguments @@ -1671,13 +1669,14 @@ def key_selector(kv: tuple[str, str]) -> tuple[str, str]: _, fastq_file_components = kv fastq_suffix = fastq_file_components[1] try: - common_component = fastq_suffix.split('_')[ + common_component = fastq_suffix.split( + '_' + )[ 1 ] # get the component of the file suffix after the first underscore if there except IndexError: - common_component = fastq_suffix.split('.', 1)[ - 1 - ] # or get the file extension if there is no trailing underscore after R1/R2 + # or get the file extension if there is no trailing underscore after R1/R2 + common_component = fastq_suffix.split('.', 1)[1] return fastq_suffix, common_component diff --git a/metamist/parser/sample_file_map_parser.py b/metamist/parser/sample_file_map_parser.py index 421aebd5e..13b16a566 100644 --- a/metamist/parser/sample_file_map_parser.py +++ b/metamist/parser/sample_file_map_parser.py @@ -31,9 +31,32 @@ READS_COL_NAME: ['filename', 'filenames', 'files', 'file'], SEQ_TYPE_COL_NAME: ['type', 'types', 'sequencing type', 'sequencing_type'], SEQ_FACILITY_COL_NAME: ['facility', 'sequencing facility', 'sequencing_facility'], - SEQ_LIBRARY_COL_NAME: ['library', 'library_prep', 'library prep', 'library type', 'library_type', 'sequencing_library', 'sequencing library'], - READ_END_TYPE_COL_NAME: ['read_end_type', 'read end type', 'read_end_types', 'read end types', 'end type', 'end_type', 'end_types', 'end types'], - READ_LENGTH_COL_NAME: ['length', 'read length', 'read_length', 'read lengths', 'read_lengths'], + SEQ_LIBRARY_COL_NAME: [ + 'library', + 'library_prep', + 'library prep', + 'library type', + 'library_type', + 'sequencing_library', + 'sequencing library', + ], + READ_END_TYPE_COL_NAME: [ + 'read_end_type', + 'read end type', + 'read_end_types', + 'read end types', + 'end type', + 'end_type', + 'end_types', + 'end types', + ], + READ_LENGTH_COL_NAME: [ + 'length', + 'read length', + 'read_length', + 'read lengths', + 'read_lengths', + ], CHECKSUM_COL_NAME: ['md5', 'checksum'], } diff --git a/metamist_infrastructure/__main__.py b/metamist_infrastructure/__main__.py index efdf0c08d..6813e2d89 100644 --- a/metamist_infrastructure/__main__.py +++ b/metamist_infrastructure/__main__.py @@ -5,6 +5,7 @@ development gcloud project. """ + import os from typing import NamedTuple diff --git a/metamist_infrastructure/slack_notification.py b/metamist_infrastructure/slack_notification.py index b6843e014..02df5001e 100644 --- a/metamist_infrastructure/slack_notification.py +++ b/metamist_infrastructure/slack_notification.py @@ -3,6 +3,7 @@ Make metamist architecture available to production pulumi stack so it can be centrally deployed. Do this through a plugin, and submodule. """ + from enum import Enum from functools import cached_property from pathlib import Path diff --git a/models/enums/web.py b/models/enums/web.py index b76613f10..80aff72a8 100644 --- a/models/enums/web.py +++ b/models/enums/web.py @@ -15,6 +15,6 @@ class SeqrDatasetType(Enum): """Type of dataset (es-index) that can be POSTed to Seqr""" SNV_INDEL = 'SNV_INDEL' # Haplotypecaller in seqr UI - SV = 'SV' # SV Caller in seqr UI (WGS projects) - GCNV = 'SV_WES' # SV Caller in seqr UI (WES projects) - MITO = 'MITO' # Mitochondria Caller in seqr UI + SV = 'SV' # SV Caller in seqr UI (WGS projects) + GCNV = 'SV_WES' # SV Caller in seqr UI (WES projects) + MITO = 'MITO' # Mitochondria Caller in seqr UI diff --git a/models/models/project_insights.py b/models/models/project_insights.py index 4a184a27e..adf6ba110 100644 --- a/models/models/project_insights.py +++ b/models/models/project_insights.py @@ -10,6 +10,7 @@ @dataclass class AnalysisStatsInternal: """Model for Analysis Sequencing Group Stats""" + id: int | None = None name: str | None = None sg_count: int | None = None @@ -19,7 +20,11 @@ def to_external(self) -> Optional['AnalysisStats']: """Convert to transport model""" if self.id is None: return None - timestamp = self.timestamp if isinstance(self.timestamp, str) else self.timestamp.isoformat() + timestamp = ( + self.timestamp + if isinstance(self.timestamp, str) + else self.timestamp.isoformat() + ) return AnalysisStats( id=self.id, name=self.name, @@ -75,7 +80,9 @@ def to_external(self): participant_ext_id=self.participant_ext_id, sample_id=sample_id_format.sample_id_format(self.sample_id), sample_ext_ids=self.sample_ext_ids, - sequencing_group_id=sequencing_group_id_format.sequencing_group_id_format(self.sequencing_group_id), + sequencing_group_id=sequencing_group_id_format.sequencing_group_id_format( + self.sequencing_group_id + ), completed_cram=self.completed_cram, in_latest_annotate_dataset=self.in_latest_annotate_dataset, in_latest_snv_es_index=self.in_latest_snv_es_index, @@ -136,9 +143,15 @@ def to_external(self): total_samples=self.total_samples, total_sequencing_groups=self.total_sequencing_groups, total_crams=self.total_crams, - latest_annotate_dataset=self.latest_annotate_dataset.to_external() if self.latest_annotate_dataset else None, - latest_snv_es_index=self.latest_snv_es_index.to_external() if self.latest_snv_es_index else None, - latest_sv_es_index=self.latest_sv_es_index.to_external() if self.latest_sv_es_index else None, + latest_annotate_dataset=self.latest_annotate_dataset.to_external() + if self.latest_annotate_dataset + else None, + latest_snv_es_index=self.latest_snv_es_index.to_external() + if self.latest_snv_es_index + else None, + latest_sv_es_index=self.latest_sv_es_index.to_external() + if self.latest_sv_es_index + else None, ) diff --git a/models/models/sample.py b/models/models/sample.py index bf7549a93..23a7f92d9 100644 --- a/models/models/sample.py +++ b/models/models/sample.py @@ -118,9 +118,8 @@ class SampleUpsertInternal(SMBase): def update_participant_id(self, participant_id: int): """Update the participant ID for the samples""" self.participant_id = participant_id - if self.nested_samples: - for s in self.nested_samples: - s.participant_id = participant_id + for s in self.nested_samples or []: + s.participant_id = participant_id def to_external(self): """Convert to transport model""" diff --git a/models/utils/cohort_template_id_format.py b/models/utils/cohort_template_id_format.py index 5f0864c49..e573af119 100644 --- a/models/utils/cohort_template_id_format.py +++ b/models/utils/cohort_template_id_format.py @@ -15,18 +15,20 @@ def cohort_template_id_format(cohort_template_id: int | str) -> str: if isinstance(cohort_template_id, str) and not cohort_template_id.isdigit(): if cohort_template_id.startswith(COHORT_TEMPLATE_PREFIX): return cohort_template_id - raise ValueError(f'Unexpected format for cohort template identifier {cohort_template_id!r}') + raise ValueError( + f'Unexpected format for cohort template identifier {cohort_template_id!r}' + ) cohort_template_id = int(cohort_template_id) - checksum = luhn_compute( - cohort_template_id, offset=COHORT_TEMPLATE_CHECKSUM_OFFSET - ) + checksum = luhn_compute(cohort_template_id, offset=COHORT_TEMPLATE_CHECKSUM_OFFSET) return f'{COHORT_TEMPLATE_PREFIX}{cohort_template_id}{checksum}' -def cohort_template_id_format_list(cohort_template_ids: Iterable[int | str]) -> list[str]: +def cohort_template_id_format_list( + cohort_template_ids: Iterable[int | str], +) -> list[str]: """ Transform LIST of raw (int) cohort template identifier to format (CTPLXXX) where: - CTPL is the prefix @@ -59,20 +61,26 @@ def cohort_template_id_transform_to_raw(cohort_template_id: int | str) -> int: f'Invalid prefix found for {COHORT_TEMPLATE_PREFIX} cohort template identifier {cohort_template_id!r}' ) - stripped_identifier = cohort_template_id[len(COHORT_TEMPLATE_PREFIX):] + stripped_identifier = cohort_template_id[len(COHORT_TEMPLATE_PREFIX) :] if not stripped_identifier.isdigit(): - raise ValueError(f'Invalid identifier found for cohort template identifier {cohort_template_id!r}') + raise ValueError( + f'Invalid identifier found for cohort template identifier {cohort_template_id!r}' + ) cohort_template_id = int(stripped_identifier) if not luhn_is_valid(cohort_template_id, offset=COHORT_TEMPLATE_CHECKSUM_OFFSET): - raise ValueError(f'Invalid checksum found for cohort template identifier {cohort_template_id!r}') + raise ValueError( + f'Invalid checksum found for cohort template identifier {cohort_template_id!r}' + ) return int(stripped_identifier[:-1]) -def cohort_template_id_transform_list_to_raw(cohort_template_ids: Iterable[int | str]) -> list[int]: +def cohort_template_id_transform_list_to_raw( + cohort_template_ids: Iterable[int | str], +) -> list[int]: """ Transform LIST of STRING cohort template identifier (CTPLXXXH) to XXX by: - validating prefix diff --git a/models/utils/sequencing_group_id_format.py b/models/utils/sequencing_group_id_format.py index 5be3e164d..738c157dc 100644 --- a/models/utils/sequencing_group_id_format.py +++ b/models/utils/sequencing_group_id_format.py @@ -48,7 +48,9 @@ def sequencing_group_id_transform_to_raw(identifier: int | str, strict=True) -> if not luhn_is_valid( sequencing_group_id_with_checksum, offset=SEQUENCING_GROUP_CHECKSUM_OFFSET ): - raise ValueError(f'The provided sequencing group ID was not valid: {identifier!r}') + raise ValueError( + f'The provided sequencing group ID was not valid: {identifier!r}' + ) return int(stripped_identifier[:-1]) diff --git a/pyproject.toml b/pyproject.toml index 8f44aa747..f83f5eb05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,40 @@ -[tool.black] +[tool.ruff] line-length = 88 -skip-string-normalization = true -exclude = "metamist/" -include = "metamist/parser/" + +exclude = [ + "metamist/api/*", + "metamist/apis/*", + "metamist/model/*", + "metamist/models/*", + +] + +[tool.ruff.format] +quote-style = "single" + + +[tool.ruff.lint.isort] + +section-order = ["future", "standard-library", "third-party", "hail", "cpg", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +hail = [ + "hail", + "hailtop", +] +# Adjust these for each repository, e.g., removing those that should be +# local rather than CPG. Also fill in extend_skip below if there are any +# subdirectories that should be ignored. +cpg = [ + "analysis_runner", + "cpg_infra", + "cpg_utils", + "cpg_workflows", + "gnomad", + "hail_scripts", + "metamist", +] + [tool.isort] py_version = 311 @@ -30,8 +62,6 @@ known_cpg = [ "hail_scripts", ] -[tool.ruff] -line-length = 88 [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] diff --git a/scripts/20230420_sequencinggroupmigration.py b/scripts/20230420_sequencinggroupmigration.py index 3feeecdd5..095009021 100644 --- a/scripts/20230420_sequencinggroupmigration.py +++ b/scripts/20230420_sequencinggroupmigration.py @@ -23,6 +23,7 @@ Noting, this script WILL modify the database, it's not easy to generate a list of SQL statements to run, because the script requires inserted IDS. """ + import asyncio import json from collections import defaultdict @@ -355,9 +356,9 @@ async def migrate_analyses(connection: Database, dry_run: bool = True): sequencing_group_ids_of_duplicate_samples = await connection.fetch_all( sequencing_group_ids_of_duplicate_samples_query ) - duplicate_sg_id_map: Dict[ - SampleId, Dict[SequenceType, SequenceGroupId] - ] = defaultdict(dict) + duplicate_sg_id_map: Dict[SampleId, Dict[SequenceType, SequenceGroupId]] = ( + defaultdict(dict) + ) for row in sequencing_group_ids_of_duplicate_samples: duplicate_sg_id_map[row['sample_id']][row['type']] = row['id'] diff --git a/scripts/add_cram_size.py b/scripts/add_cram_size.py index 1e00b04ef..794dd8c0b 100644 --- a/scripts/add_cram_size.py +++ b/scripts/add_cram_size.py @@ -3,6 +3,7 @@ This script goes through all CRAMS in sample-metadata, gets the size, and updates the meta['size'] attribute on the analysis. """ + import asyncio import logging import os @@ -119,9 +120,9 @@ async def process_project(project: str): for blob in client.list_blobs(bucket, prefix=prefix): if not blob.name.endswith('.cram'): continue - file_size_by_name[ - os.path.join(bucket_path, blob.name) - ] = blob.size + file_size_by_name[os.path.join(bucket_path, blob.name)] = ( + blob.size + ) except NotFound: continue except NotFound: diff --git a/scripts/arbitrary_sm.py b/scripts/arbitrary_sm.py index 9753534a2..cd0e9f4d8 100755 --- a/scripts/arbitrary_sm.py +++ b/scripts/arbitrary_sm.py @@ -11,6 +11,7 @@ --json '{"project": "acute-care", "external_id": ""}' """ + import argparse import json import logging diff --git a/scripts/back_populate_library_type.py b/scripts/back_populate_library_type.py index 8d065106f..5754ed6d2 100644 --- a/scripts/back_populate_library_type.py +++ b/scripts/back_populate_library_type.py @@ -5,6 +5,7 @@ updates the assay.meta field with `facility` and `library_type` annotations. Also updates the sequencing group meta fields with the same information, if the flag is set. """ + import asyncio import logging import re @@ -132,12 +133,16 @@ async def update_assays_async(assays_to_update: list[dict]): for assay_id, assay_meta_fields_to_update in assay.items(): if assay_meta_fields_to_update: updates.append( - asapi.update_assay_async(AssayUpsert(id=assay_id, meta=assay_meta_fields_to_update),) + asapi.update_assay_async( + AssayUpsert(id=assay_id, meta=assay_meta_fields_to_update), + ) ) return await asyncio.gather(*updates) -async def update_sequencing_groups_async(sequencing_groups_to_update: list[dict], project: str): +async def update_sequencing_groups_async( + sequencing_groups_to_update: list[dict], project: str +): """Update sequencing groups with new meta fields.""" sgapi = SequencingGroupApi() updates = [] @@ -148,7 +153,9 @@ async def update_sequencing_groups_async(sequencing_groups_to_update: list[dict] sgapi.update_sequencing_group_async( sequencing_group_id=sequencing_group_id, project=project, - sequencing_group_meta_update_model=SequencingGroupMetaUpdateModel(meta=sg_meta_fields_to_update) + sequencing_group_meta_update_model=SequencingGroupMetaUpdateModel( + meta=sg_meta_fields_to_update + ), ) ) return await asyncio.gather(*updates) @@ -181,13 +188,18 @@ async def update_sequencing_groups_async(sequencing_groups_to_update: list[dict] default=False, help='Do not save changes to metamist', ) -def main(project: str, sequencing_type: str, update_sequencing_groups: bool, dry_run: bool): +def main( + project: str, sequencing_type: str, update_sequencing_groups: bool, dry_run: bool +): """Back populate facility and library_type meta fields for existing assays and sequencing groups.""" sg_assays: defaultdict[str, list[dict]] = defaultdict(list) sg_meta: dict[str, dict] = {} # Get all sequencing groups and their assays for the project (for a specific sequencing type) - project_sg_assays: dict[str, dict] = query(_sg_assays_query, variables={'project': project, 'sequencingType': sequencing_type}) + project_sg_assays: dict[str, dict] = query( + _sg_assays_query, + variables={'project': project, 'sequencingType': sequencing_type}, + ) for sequencing_group in project_sg_assays['project']['sequencingGroups']: # pylint: disable=unsubscriptable-object sg_id = sequencing_group['id'] sg_meta[sg_id] = sequencing_group['meta'] @@ -212,13 +224,17 @@ def main(project: str, sequencing_type: str, update_sequencing_groups: bool, dry for assay_id, assay_meta_fields_to_update in assay_meta.items(): if assay_meta_fields_to_update: if update_sequencing_groups: - sg_meta_fields_to_update['facility'] = assay_meta_fields_to_update['facility'] + sg_meta_fields_to_update['facility'] = assay_meta_fields_to_update[ + 'facility' + ] if library_type := assay_meta_fields_to_update.get('library_type'): sg_meta_fields_to_update['library_type'] = library_type assays_to_update.append({assay_id: assay_meta_fields_to_update}) if sg_meta_fields_to_update and update_sequencing_groups: - sequencing_groups_to_update.append({sequencing_group: sg_meta_fields_to_update}) + sequencing_groups_to_update.append( + {sequencing_group: sg_meta_fields_to_update} + ) if dry_run: logging.info( @@ -232,7 +248,9 @@ def main(project: str, sequencing_type: str, update_sequencing_groups: bool, dry asyncio.run(update_assays_async(assays_to_update)) logging.info(f'Updated {len(assays_to_update)} assays.\n\n{assays_to_update}') if update_sequencing_groups: - asyncio.run(update_sequencing_groups_async(sequencing_groups_to_update, project)) + asyncio.run( + update_sequencing_groups_async(sequencing_groups_to_update, project) + ) logging.info( f'\n\nUpdated {len(sequencing_groups_to_update)} sequencing groups.\n\n{sequencing_groups_to_update}' ) diff --git a/scripts/check_sequence_files.py b/scripts/check_sequence_files.py index e8c22414f..5661906d6 100755 --- a/scripts/check_sequence_files.py +++ b/scripts/check_sequence_files.py @@ -4,6 +4,7 @@ Find sequencing files that exist in the bucket, but are not ingested. This pairs well will the cleanup_fastqs.py script. """ + import asyncio import logging import os diff --git a/scripts/create_custom_cohort.py b/scripts/create_custom_cohort.py index f2fe9bfe2..8e626fcfe 100644 --- a/scripts/create_custom_cohort.py +++ b/scripts/create_custom_cohort.py @@ -1,4 +1,4 @@ -""" A script to create a custom cohort """ +"""A script to create a custom cohort""" import argparse @@ -45,7 +45,9 @@ def main( def get_cohort_spec( - cohort_name: str | None, cohort_description: str | None, cohort_template_id: str | None + cohort_name: str | None, + cohort_description: str | None, + cohort_template_id: str | None, ) -> CohortBody: """Get the cohort spec""" @@ -62,7 +64,6 @@ def get_cohort_spec( if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Create a custom cohort') parser.add_argument( '--project', type=str, help='The project to create the cohort in' @@ -112,7 +113,9 @@ def get_cohort_spec( parser.add_argument( '--sample_type', required=False, type=list[str], help='sample type' ) - parser.add_argument('--dry-run', '--dry_run', action='store_true', help='Dry run mode') + parser.add_argument( + '--dry-run', '--dry_run', action='store_true', help='Dry run mode' + ) args = parser.parse_args() diff --git a/scripts/create_or_validate_md5s.py b/scripts/create_or_validate_md5s.py index d1ec45127..01bfc047c 100644 --- a/scripts/create_or_validate_md5s.py +++ b/scripts/create_or_validate_md5s.py @@ -7,7 +7,9 @@ from cpg_utils.hail_batch import config_retrieve, copy_common_env, get_batch -def get_blobs_in_directory(gs_dir: str, billing_project: str, storage_client: storage.Client): +def get_blobs_in_directory( + gs_dir: str, billing_project: str, storage_client: storage.Client +): """Get all blobs in a directory as a set of full gs:// URIs""" bucket_name, *components = gs_dir[5:].split('/') bucket = storage_client.bucket(bucket_name, user_project=billing_project) @@ -70,7 +72,7 @@ def create_and_validate_md5s_for_files_in_directory( force_recreate_existing: bool, billing_project: str, driver_image: str, - storage_client: storage.Client + storage_client: storage.Client, ): """ Create OR validate MD5s for files in the provided gs directory, skipping files with certain extensions. @@ -86,7 +88,7 @@ def create_and_validate_md5s_for_files_in_directory( md5_exists = f'{filepath}.md5' in files - if mode == 'create' : + if mode == 'create': if not md5_exists or force_recreate_existing: print('Creating md5 for', filepath) job = b.new_job(f'Create {os.path.basename(filepath)}.md5') @@ -109,15 +111,27 @@ def create_and_validate_md5s_for_files_in_directory( @click.command() @click.option('--billing-project', '-b', default=None) @click.option('--skip-filetypes', '-s', default=('.crai', '.tbi'), multiple=True) -@click.option('--mode', '-m', type=click.Choice(['create', 'validate']), default='create', help='Whether to validate existing md5s or create new ones.') -@click.option('--force-recreate-existing', '-f', is_flag=True, default=False, help='Re-create md5s even if they already exist.') +@click.option( + '--mode', + '-m', + type=click.Choice(['create', 'validate']), + default='create', + help='Whether to validate existing md5s or create new ones.', +) +@click.option( + '--force-recreate-existing', + '-f', + is_flag=True, + default=False, + help='Re-create md5s even if they already exist.', +) @click.argument('gs_dir') def main( billing_project: str | None, skip_filetypes: tuple[str, str], mode: str, force_recreate_existing: bool, - gs_dir: str + gs_dir: str, ): """ Scans the directory for files and creates md5 checksums for them, OR validates existing md5s. diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index 6dfab4f22..c801fec40 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -506,7 +506,6 @@ def transfer_analyses( ) existing_sgid = existing_sg.get('id') if existing_sg else None for analysis in sg['analyses']: - existing_analysis: dict = {} if existing_sgid: existing_analysis = get_existing_analysis( diff --git a/scripts/fix_family_ids.py b/scripts/fix_family_ids.py index c714916cd..fbd5b9ea9 100644 --- a/scripts/fix_family_ids.py +++ b/scripts/fix_family_ids.py @@ -1,6 +1,7 @@ """ Small script to update external family IDs """ + import json import click diff --git a/scripts/fix_participant_ids.py b/scripts/fix_participant_ids.py index f8ed64dcb..ea110b459 100644 --- a/scripts/fix_participant_ids.py +++ b/scripts/fix_participant_ids.py @@ -1,6 +1,7 @@ """ Small script to update external participant IDs """ + import json import click diff --git a/scripts/fix_pbmc_sample_participant.py b/scripts/fix_pbmc_sample_participant.py index c3fe32b08..e7f851a64 100644 --- a/scripts/fix_pbmc_sample_participant.py +++ b/scripts/fix_pbmc_sample_participant.py @@ -19,12 +19,20 @@ def main(): ) ) - sample_map_by_external_id = {eid: s for s in all_samples for eid in s['external_ids'].values()} + sample_map_by_external_id = { + eid: s for s in all_samples for eid in s['external_ids'].values() + } - pbmc_samples = [s for s in all_samples if any('-PBMC' in eid for eid in s['external_ids'].values())] + pbmc_samples = [ + s + for s in all_samples + if any('-PBMC' in eid for eid in s['external_ids'].values()) + ] for sample in pbmc_samples: - external_id = next(eid for eid in sample['external_ids'].values() if '-PBMC' in eid) + external_id = next( + eid for eid in sample['external_ids'].values() if '-PBMC' in eid + ) non_pbmc_id = external_id.strip('-PBMC') non_pbmc_sample = sample_map_by_external_id.get(non_pbmc_id) pbmc_sample = sample_map_by_external_id.get(external_id) diff --git a/scripts/parse_ont_sheet.py b/scripts/parse_ont_sheet.py index 9af36b22c..c4634ef00 100644 --- a/scripts/parse_ont_sheet.py +++ b/scripts/parse_ont_sheet.py @@ -97,7 +97,9 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]: """ return [fastqs] - async def get_sample_sequencing_groups(self, sample: ParsedSample) -> list[ParsedSequencingGroup]: + async def get_sample_sequencing_groups( + self, sample: ParsedSample + ) -> list[ParsedSequencingGroup]: sequencing_groups = await super().get_sample_sequencing_groups(sample) for sequencing_group in sequencing_groups: @@ -105,7 +107,8 @@ async def get_sample_sequencing_groups(self, sample: ParsedSample) -> list[Parse for r in sequencing_group.rows: parsed_failed_fastqs = await self.parse_files( - sequencing_group.sample.primary_external_id, r[Columns.FAIL_FASTQ_FILENAME] + sequencing_group.sample.primary_external_id, + r[Columns.FAIL_FASTQ_FILENAME], ) if 'reads' not in parsed_failed_fastqs: raise ValueError( diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py index 831ddd402..e12a4f625 100644 --- a/scripts/parse_ped.py +++ b/scripts/parse_ped.py @@ -1,4 +1,4 @@ -""" A really simple script to import a pedigree file """ +"""A really simple script to import a pedigree file""" import click from cloudpathlib import AnyPath diff --git a/setup.py b/setup.py index f4cfc6ff4..26125bd67 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ setup( name=PKG, # This tag is automatically updated by bump2version - version='7.3.2', + version='7.3.3', description='Python API for interacting with the Sample API system', long_description=readme, long_description_content_type='text/markdown', diff --git a/test/data/generate_data.py b/test/data/generate_data.py index 079d8a1cd..a4e63507c 100755 --- a/test/data/generate_data.py +++ b/test/data/generate_data.py @@ -26,7 +26,7 @@ PRIMARY_EXTERNAL_ORG = '' -EMOJIS = [':)', ':(', ':/', ':\'('] +EMOJIS = [':)', ':(', ':/', ":'("] default_ped_location = str(Path(__file__).parent / 'greek-myth-forgeneration.ped') diff --git a/test/data/generate_ourdna_data.py b/test/data/generate_ourdna_data.py index 9772957a4..eb12856f3 100644 --- a/test/data/generate_ourdna_data.py +++ b/test/data/generate_ourdna_data.py @@ -7,6 +7,7 @@ If you want to regenerate the data you would need to delete records from table sample and participant first """ + import argparse import asyncio diff --git a/test/data/generate_seqr_project_data.py b/test/data/generate_seqr_project_data.py index 1e367bcda..6df77002d 100644 --- a/test/data/generate_seqr_project_data.py +++ b/test/data/generate_seqr_project_data.py @@ -311,13 +311,9 @@ async def generate_sample_entries( 'short-read', 'long-read', 'bulk-rna-seq', - 'single-cell-rna-seq' - ] - sequencing_platforms = [ - 'illumina', - 'oxford-nanopore', - 'pacbio' + 'single-cell-rna-seq', ] + sequencing_platforms = ['illumina', 'oxford-nanopore', 'pacbio'] sequencing_types = ['genome', 'exome', 'transcriptome'] # Arbitrary distribution for number of samples, sequencing groups, assays @@ -388,7 +384,9 @@ async def generate_sample_entries( await sapi.upsert_samples_async(project, samples) -async def generate_cram_analyses(project: str, analyses_to_insert: list[Analysis]) -> list[dict]: +async def generate_cram_analyses( + project: str, analyses_to_insert: list[Analysis] +) -> list[dict]: """ Queries the list of sequencing groups for a project and randomly selects some to generate CRAM analysis entries for. @@ -418,9 +416,7 @@ async def generate_cram_analyses(project: str, analyses_to_insert: list[Analysis ).isoformat(), meta={ # random size between 5, 25 GB - 'size': random.randint(5 * 1024, 25 * 1024) - * 1024 - * 1024, + 'size': random.randint(5 * 1024, 25 * 1024) * 1024 * 1024, }, ) for sg in aligned_sgs @@ -430,16 +426,18 @@ async def generate_cram_analyses(project: str, analyses_to_insert: list[Analysis return aligned_sgs -async def generate_web_report_analyses(project: str, - aligned_sequencing_groups: list[dict], - analyses_to_insert: list[Analysis] - ): +async def generate_web_report_analyses( + project: str, + aligned_sequencing_groups: list[dict], + analyses_to_insert: list[Analysis], +): """ Queries the list of sequencing groups for a project and generates web analysis (STRipy and MITO report) entries for those with completed a CRAM analysis. Stripy analyses have a random chance of having outliers detected, and a random number of loci flagged as outliers. """ + def get_stripy_outliers(): """ Generate a the outliers_detected bool, and then the outlier_loci dict @@ -450,10 +448,7 @@ def get_stripy_outliers(): for loci in random.sample(LOCI, k=random.randint(1, len(LOCI))): outlier_loci[loci] = random.choice(['1', '2', '3']) - return { - 'outliers_detected': outliers_detected, - 'outlier_loci': outlier_loci - } + return {'outliers_detected': outliers_detected, 'outlier_loci': outlier_loci} # Insert completed web analyses for the aligned sequencing groups for sg in aligned_sequencing_groups: @@ -466,7 +461,8 @@ def get_stripy_outliers(): status=AnalysisStatus('completed'), output=f'FAKE://{project}/stripy/{sg["id"]}.stripy.html', timestamp_completed=( - datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 15)) + datetime.datetime.now() + - datetime.timedelta(days=random.randint(1, 15)) ).isoformat(), meta={ 'stage': 'Stripy', @@ -474,7 +470,7 @@ def get_stripy_outliers(): # random size between 5, 50 MB 'size': random.randint(5 * 1024, 25 * 1024) * 1024, 'outliers_detected': stripy_outliers['outliers_detected'], - 'outlier_loci': stripy_outliers['outlier_loci'] + 'outlier_loci': stripy_outliers['outlier_loci'], }, ), Analysis( @@ -483,7 +479,8 @@ def get_stripy_outliers(): status=AnalysisStatus('completed'), output=f'FAKE://{project}/mito/mitoreport-{sg["id"]}/index.html', timestamp_completed=( - datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 15)) + datetime.datetime.now() + - datetime.timedelta(days=random.randint(1, 15)) ).isoformat(), meta={ 'stage': 'MitoReport', @@ -491,7 +488,7 @@ def get_stripy_outliers(): # random size between 5, 50 MB 'size': random.randint(5 * 1024, 25 * 1024) * 1024, }, - ) + ), ] ) diff --git a/test/test_bq_billing_ar_batch.py b/test/test_bq_billing_ar_batch.py index 2bfb4b913..54c6bd27d 100644 --- a/test/test_bq_billing_ar_batch.py +++ b/test/test_bq_billing_ar_batch.py @@ -60,7 +60,7 @@ def test_error_no_connection(self): BillingArBatchTable(None) self.assertTrue( - 'No connection was provided to the table \'BillingArBatchTable\'' + "No connection was provided to the table 'BillingArBatchTable'" in str(context.exception) ) diff --git a/test/test_bq_billing_daily.py b/test/test_bq_billing_daily.py index 969b7c936..2c1ae3493 100644 --- a/test/test_bq_billing_daily.py +++ b/test/test_bq_billing_daily.py @@ -60,7 +60,7 @@ def test_error_no_connection(self): BillingDailyTable(None) self.assertTrue( - 'No connection was provided to the table \'BillingDailyTable\'' + "No connection was provided to the table 'BillingDailyTable'" in str(context.exception) ) diff --git a/test/test_bq_billing_daily_extended.py b/test/test_bq_billing_daily_extended.py index 183d6c8c5..ae306591e 100644 --- a/test/test_bq_billing_daily_extended.py +++ b/test/test_bq_billing_daily_extended.py @@ -57,7 +57,7 @@ def test_error_no_connection(self): BillingDailyExtendedTable(None) self.assertTrue( - 'No connection was provided to the table \'BillingDailyExtendedTable\'' + "No connection was provided to the table 'BillingDailyExtendedTable'" in str(context.exception) ) diff --git a/test/test_bq_billing_gcp_daily.py b/test/test_bq_billing_gcp_daily.py index 99e3caa10..ff0047662 100644 --- a/test/test_bq_billing_gcp_daily.py +++ b/test/test_bq_billing_gcp_daily.py @@ -65,7 +65,7 @@ def test_error_no_connection(self): BillingGcpDailyTable(None) self.assertTrue( - 'No connection was provided to the table \'BillingGcpDailyTable\'' + "No connection was provided to the table 'BillingGcpDailyTable'" in str(context.exception) ) diff --git a/test/test_bq_billing_raw.py b/test/test_bq_billing_raw.py index e1a5af50d..028cf7e1c 100644 --- a/test/test_bq_billing_raw.py +++ b/test/test_bq_billing_raw.py @@ -56,7 +56,7 @@ def test_error_no_connection(self): BillingRawTable(None) self.assertTrue( - 'No connection was provided to the table \'BillingRawTable\'' + "No connection was provided to the table 'BillingRawTable'" in str(context.exception) ) diff --git a/test/test_cohort_builder.py b/test/test_cohort_builder.py index 0918741a9..512b1ad6e 100644 --- a/test/test_cohort_builder.py +++ b/test/test_cohort_builder.py @@ -21,7 +21,9 @@ async def mock_ccfc(self, project, body_create_cohort_from_criteria): self.assertEqual(project, self.project_name) return await api.routes.cohort.create_cohort_from_criteria( CohortBody(**body_create_cohort_from_criteria['cohort_spec'].to_dict()), - CohortCriteria(**body_create_cohort_from_criteria['cohort_criteria'].to_dict()), + CohortCriteria( + **body_create_cohort_from_criteria['cohort_criteria'].to_dict() + ), self.connection, body_create_cohort_from_criteria['dry_run'], ) @@ -43,7 +45,9 @@ async def test_empty_main(self, mock): mock.side_effect = self.mock_ccfc result = main( project=self.project_name, - cohort_body_spec=metamist.models.CohortBody(name='Empty cohort', description='No criteria'), + cohort_body_spec=metamist.models.CohortBody( + name='Empty cohort', description='No criteria' + ), projects=['test'], sg_ids_internal=[], excluded_sg_ids=[], @@ -66,7 +70,9 @@ async def test_epic_main(self, mock): mock.side_effect = self.mock_ccfc result = main( project=self.project_name, - cohort_body_spec=metamist.models.CohortBody(name='Epic cohort', description='Every criterion'), + cohort_body_spec=metamist.models.CohortBody( + name='Epic cohort', description='Every criterion' + ), projects=['test'], sg_ids_internal=['CPGLCL33'], excluded_sg_ids=['CPGLCL17', 'CPGLCL25'], diff --git a/test/test_external_id.py b/test/test_external_id.py index 63055ebff..c62468ba2 100644 --- a/test/test_external_id.py +++ b/test/test_external_id.py @@ -22,7 +22,11 @@ async def setUp(self): self.p1 = await self.player.upsert_participant( ParticipantUpsertInternal( - external_ids={PRIMARY_EXTERNAL_ORG: 'P1', 'CONTROL': '86', 'KAOS': 'shoe'}, + external_ids={ + PRIMARY_EXTERNAL_ORG: 'P1', + 'CONTROL': '86', + 'KAOS': 'shoe', + }, ) ) self.p1_external_ids = {k.lower(): v for k, v in self.p1.external_ids.items()} @@ -78,19 +82,29 @@ async def test_insert(self): _ = await self.player.upsert_participant( ParticipantUpsertInternal( external_ids={PRIMARY_EXTERNAL_ORG: None}, - ) ) + ) with self.assertRaises(ValueError): - _ = await self.player.upsert_participant(ParticipantUpsertInternal(external_ids={'OTHER': 'P1'})) + _ = await self.player.upsert_participant( + ParticipantUpsertInternal(external_ids={'OTHER': 'P1'}) + ) result = await self.player.upsert_participant( ParticipantUpsertInternal( - external_ids={PRIMARY_EXTERNAL_ORG: 'P10', 'A': 'A1', 'B': None, 'C': 'C1'}, - ) + external_ids={ + PRIMARY_EXTERNAL_ORG: 'P10', + 'A': 'A1', + 'B': None, + 'C': 'C1', + }, ) + ) participants = await self.player.get_participants_by_ids([result.id]) - self.assertDictEqual(participants[0].external_ids, {PRIMARY_EXTERNAL_ORG: 'P10', 'a': 'A1', 'c': 'C1'}) + self.assertDictEqual( + participants[0].external_ids, + {PRIMARY_EXTERNAL_ORG: 'P10', 'a': 'A1', 'c': 'C1'}, + ) @run_as_sync async def test_update(self): @@ -100,17 +114,26 @@ async def test_update(self): ParticipantUpsertInternal( id=self.p1.id, external_ids={PRIMARY_EXTERNAL_ORG: None}, - ) ) + ) result = await self.player.upsert_participant( ParticipantUpsertInternal( id=self.p1.id, - external_ids={PRIMARY_EXTERNAL_ORG: 'P1B', 'CONTROL': '86B', 'KAOS': None, 'B': None, 'C': 'A1'}, - ) + external_ids={ + PRIMARY_EXTERNAL_ORG: 'P1B', + 'CONTROL': '86B', + 'KAOS': None, + 'B': None, + 'C': 'A1', + }, ) + ) participants = await self.player.get_participants_by_ids([result.id]) - self.assertDictEqual(participants[0].external_ids, {PRIMARY_EXTERNAL_ORG: 'P1B', 'control': '86B', 'c': 'A1'}) + self.assertDictEqual( + participants[0].external_ids, + {PRIMARY_EXTERNAL_ORG: 'P1B', 'control': '86B', 'c': 'A1'}, + ) @run_as_sync async def test_fill_in_missing(self): @@ -118,17 +141,25 @@ async def test_fill_in_missing(self): slayer = SampleLayer(self.connection) s1 = await slayer.upsert_sample( - SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'E1'}, participant_id=self.p1.id), + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'E1'}, participant_id=self.p1.id + ), + ) + sa = await slayer.upsert_sample( + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EA', 'foo': 'FA'}) + ) + sb = await slayer.upsert_sample( + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EB', 'foo': 'FB'}) ) - sa = await slayer.upsert_sample(SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EA', 'foo': 'FA'})) - sb = await slayer.upsert_sample(SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EB', 'foo': 'FB'})) result = await self.player.fill_in_missing_participants() self.assertEqual(result, 'Updated 2 records') samples = await slayer.get_samples_by(sample_ids=[s1.id, sa.id, sb.id]) - participants = await self.player.get_participants_by_ids([s.participant_id for s in samples]) + participants = await self.player.get_participants_by_ids( + [s.participant_id for s in samples] + ) p_map = {p.id: p for p in participants} for s in samples: @@ -142,7 +173,9 @@ async def test_get_by_families(self): fid = await flayer.create_family(external_id='Jones') child = await self.player.upsert_participant( - ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'P20', 'd': 'D20'}), + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'P20', 'd': 'D20'} + ), ) await self.player.add_participant_to_family( @@ -156,19 +189,28 @@ async def test_get_by_families(self): result = await self.player.get_participants_by_families([fid]) self.assertEqual(len(result), 1) self.assertEqual(len(result[fid]), 1) - self.assertDictEqual(result[fid][0].external_ids, {PRIMARY_EXTERNAL_ORG: 'P20', 'd': 'D20'}) + self.assertDictEqual( + result[fid][0].external_ids, {PRIMARY_EXTERNAL_ORG: 'P20', 'd': 'D20'} + ) @run_as_sync async def test_update_many(self): """Exercise update_many_participant_external_ids() method""" - result = await self.player.update_many_participant_external_ids({self.p1.id: 'P1B', self.p2.id: 'P2B'}) + result = await self.player.update_many_participant_external_ids( + {self.p1.id: 'P1B', self.p2.id: 'P2B'} + ) self.assertTrue(result) - participants = await self.player.get_participants_by_ids([self.p1.id, self.p2.id]) + participants = await self.player.get_participants_by_ids( + [self.p1.id, self.p2.id] + ) p_map = {p.id: p for p in participants} outp1 = p_map[self.p1.id] outp2 = p_map[self.p2.id] - self.assertDictEqual(outp1.external_ids, {PRIMARY_EXTERNAL_ORG: 'P1B', 'control': '86', 'kaos': 'shoe'}) + self.assertDictEqual( + outp1.external_ids, + {PRIMARY_EXTERNAL_ORG: 'P1B', 'control': '86', 'kaos': 'shoe'}, + ) self.assertDictEqual(outp2.external_ids, {PRIMARY_EXTERNAL_ORG: 'P2B'}) @run_as_sync @@ -177,7 +219,9 @@ async def test_get_etoi_map(self): slayer = SampleLayer(self.connection) _ = await slayer.upsert_sample( - SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'SE1'}, participant_id=self.p1.id), + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'SE1'}, participant_id=self.p1.id + ), ) s2 = await slayer.upsert_sample( @@ -195,7 +239,9 @@ async def test_get_etoi_map(self): ), ) - result = await self.player.get_external_participant_id_to_internal_sequencing_group_id_map(self.project_id) + result = await self.player.get_external_participant_id_to_internal_sequencing_group_id_map( + self.project_id + ) self.assertEqual(len(result), 3) for eid, sgid in result: self.assertIn(eid, self.p1.external_ids.values()) @@ -212,7 +258,11 @@ async def setUp(self): self.s1 = await self.slayer.upsert_sample( SampleUpsertInternal( - external_ids={PRIMARY_EXTERNAL_ORG: 'S1', 'CONTROL': '86', 'KAOS': 'shoe'}, + external_ids={ + PRIMARY_EXTERNAL_ORG: 'S1', + 'CONTROL': '86', + 'KAOS': 'shoe', + }, ) ) @@ -267,19 +317,28 @@ async def test_insert(self): _ = await self.slayer.upsert_sample( SampleUpsertInternal( external_ids={PRIMARY_EXTERNAL_ORG: None}, - ) ) + ) with self.assertRaises(ValueError): - _ = await self.slayer.upsert_sample(SampleUpsertInternal(external_ids={'OTHER': 'S1'})) + _ = await self.slayer.upsert_sample( + SampleUpsertInternal(external_ids={'OTHER': 'S1'}) + ) result = await self.slayer.upsert_sample( SampleUpsertInternal( - external_ids={PRIMARY_EXTERNAL_ORG: 'S10', 'A': 'A1', 'B': None, 'C': 'C1'}, - ) + external_ids={ + PRIMARY_EXTERNAL_ORG: 'S10', + 'A': 'A1', + 'B': None, + 'C': 'C1', + }, ) + ) sample = await self.slayer.get_sample_by_id(result.id) - self.assertDictEqual(sample.external_ids, {PRIMARY_EXTERNAL_ORG: 'S10', 'a': 'A1', 'c': 'C1'}) + self.assertDictEqual( + sample.external_ids, {PRIMARY_EXTERNAL_ORG: 'S10', 'a': 'A1', 'c': 'C1'} + ) @run_as_sync async def test_update(self): @@ -289,23 +348,34 @@ async def test_update(self): SampleUpsertInternal( id=self.s1.id, external_ids={PRIMARY_EXTERNAL_ORG: None}, - ) ) + ) result = await self.slayer.upsert_sample( SampleUpsertInternal( id=self.s1.id, - external_ids={PRIMARY_EXTERNAL_ORG: 'S1B', 'CONTROL': '86B', 'KAOS': None, 'B': None, 'C': 'A1'}, - ) + external_ids={ + PRIMARY_EXTERNAL_ORG: 'S1B', + 'CONTROL': '86B', + 'KAOS': None, + 'B': None, + 'C': 'A1', + }, ) + ) sample = await self.slayer.get_sample_by_id(result.id) - self.assertDictEqual(sample.external_ids, {PRIMARY_EXTERNAL_ORG: 'S1B', 'control': '86B', 'c': 'A1'}) + self.assertDictEqual( + sample.external_ids, + {PRIMARY_EXTERNAL_ORG: 'S1B', 'control': '86B', 'c': 'A1'}, + ) @run_as_sync async def test_get_single(self): """Exercise get_single_by_external_id() method""" with self.assertRaises(NotFoundError): - _ = await self.slayer.get_single_by_external_id('non-existent', self.project_id) + _ = await self.slayer.get_single_by_external_id( + 'non-existent', self.project_id + ) result = await self.slayer.get_single_by_external_id('86', self.project_id) self.assertEqual(result.id, self.s1.id) @@ -316,28 +386,34 @@ async def test_get_single(self): @run_as_sync async def test_get_internal_to_external(self): """Exercise get_internal_to_external_sample_id_map() method""" - result = await self.slayer.get_internal_to_external_sample_id_map([self.s1.id, self.s2.id]) + result = await self.slayer.get_internal_to_external_sample_id_map( + [self.s1.id, self.s2.id] + ) self.assertDictEqual(result, {self.s1.id: 'S1', self.s2.id: 'S2'}) @run_as_sync async def test_get_all(self): """Exercise get_all_sample_id_map_by_internal_ids() method""" - result = await self.slayer.get_all_sample_id_map_by_internal_ids(self.project_id) + result = await self.slayer.get_all_sample_id_map_by_internal_ids( + self.project_id + ) self.assertDictEqual(result, {self.s1.id: 'S1', self.s2.id: 'S2'}) @run_as_sync async def test_get_history(self): """Exercise get_history_of_sample() method""" # First create some history - await self.slayer.upsert_sample(SampleUpsertInternal(id=self.s1.id, meta={'foo': 'bar'})) + await self.slayer.upsert_sample( + SampleUpsertInternal(id=self.s1.id, meta={'foo': 'bar'}) + ) await self.slayer.upsert_sample( SampleUpsertInternal( id=self.s1.id, external_ids={'fruit': 'apple'}, meta={'fruit': 'banana'}, - ) ) + ) sample = await self.slayer.get_sample_by_id(self.s1.id) diff --git a/test/test_graphql.py b/test/test_graphql.py index 703da2755..6677c6b53 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -114,7 +114,8 @@ def test_validate_provided_schema(self): (strawberry has an as_str() method) """ client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore + schema=api.graphql.schema.schema.as_str(), + auth_token='FAKE', # type: ignore ) validate(TEST_QUERY, client=client) @@ -279,7 +280,9 @@ async def test_participant_phenotypes(self): """ # insert participant p = await self.player.upsert_participant( - ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, meta={}, samples=[]) + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, meta={}, samples=[] + ) ) phenotypes = {'phenotype1': 'value1', 'phenotype2': {'number': 123}} diff --git a/test/test_import_individual_metadata.py b/test/test_import_individual_metadata.py index 447be8535..c8d546507 100644 --- a/test/test_import_individual_metadata.py +++ b/test/test_import_individual_metadata.py @@ -14,7 +14,9 @@ async def test_import_many_hpo_terms(self): """Test import hpo terms from many columns""" pl = ParticipantLayer(self.connection) - await pl.upsert_participant(ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'TP01'})) + await pl.upsert_participant( + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'TP01'}) + ) headers = [ 'Individual ID', diff --git a/test/test_models.py b/test/test_models.py index e066eeb33..7a9ffb0c9 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -23,7 +23,9 @@ def test_participant_to_internal_basic(self): def test_participant_to_external_basic(self): """Test converting a basic participant to external model""" - internal = ParticipantUpsertInternal(id=1, external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'}) + internal = ParticipantUpsertInternal( + id=1, external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'} + ) external = internal.to_external() self.assertIsInstance(external, ParticipantUpsert) @@ -44,7 +46,9 @@ def test_sample_to_internal_basic(self): def test_sample_to_external_basic(self): """Test converting a basic sample to external model""" - internal = SampleUpsertInternal(id=1, external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'}) + internal = SampleUpsertInternal( + id=1, external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'} + ) external = internal.to_external() self.assertIsInstance(external, SampleUpsert) diff --git a/test/test_parse_generic_metadata.py b/test/test_parse_generic_metadata.py index 299b6c750..a53d15a5d 100644 --- a/test/test_parse_generic_metadata.py +++ b/test/test_parse_generic_metadata.py @@ -98,7 +98,8 @@ def test_queries(self): # only need to apply schema to the first client to create, then it gets cached client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore + schema=api.graphql.schema.schema.as_str(), + auth_token='FAKE', # type: ignore ) validate(QUERY_MATCH_PARTICIPANTS) validate(QUERY_MATCH_SAMPLES, client=client) diff --git a/test/test_pedigree.py b/test/test_pedigree.py index 328651e95..ad5ec52c0 100644 --- a/test/test_pedigree.py +++ b/test/test_pedigree.py @@ -56,7 +56,9 @@ async def test_pedigree_without_family(self): ) ) await pl.upsert_participant( - ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX02'}, reported_sex=None) + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EX02'}, reported_sex=None + ) ) rows = await fl.get_pedigree( diff --git a/test/test_project_groups.py b/test/test_project_groups.py index e7db46e9e..faec8e509 100644 --- a/test/test_project_groups.py +++ b/test/test_project_groups.py @@ -225,9 +225,10 @@ async def test_get_my_projects(self): members=[ProjectMemberUpdate(member=self.author, roles=['contributor'])], ) - project_id_map, project_name_map = ( - await self.pttable.get_projects_accessible_by_user(user=self.author) - ) + ( + project_id_map, + project_name_map, + ) = await self.pttable.get_projects_accessible_by_user(user=self.author) # Get projects with at least a read access role my_projects = self.connection.projects_with_role( diff --git a/test/test_search.py b/test/test_search.py index ad3d3c07f..0f14a776a 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -53,7 +53,9 @@ async def test_search_unavailable_sample_by_internal_id(self): Mock this in testing by limiting scope to non-existent project IDs """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood') + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood' + ) ) cpg_id = sample_id_format(sample.id) @@ -68,7 +70,9 @@ async def test_search_isolated_sample_by_id(self): Search by valid CPG sample ID (special case) """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood') + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood' + ) ) cpg_id = sample_id_format(sample.id) results = await self.schlay.search(query=cpg_id, project_ids=[self.project_id]) @@ -88,7 +92,9 @@ async def test_search_isolated_sequencing_group_by_id(self): Search by valid CPG sequencing group ID (special case) """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EXS001'}, type='blood') + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EXS001'}, type='blood' + ) ) sg = await self.sglayer.upsert_sequencing_groups( [ @@ -135,7 +141,9 @@ async def test_search_isolated_sample_by_external_id(self): should only return one result """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood') + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood' + ) ) results = await self.schlay.search(query='EX001', project_ids=[self.project_id]) diff --git a/test/test_update_participant_family.py b/test/test_update_participant_family.py index f0a6902eb..1774fb3c7 100644 --- a/test/test_update_participant_family.py +++ b/test/test_update_participant_family.py @@ -22,17 +22,23 @@ async def setUp(self) -> None: pl = ParticipantLayer(self.connection) self.pid = ( await pl.upsert_participant( - ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX01'}, reported_sex=2) + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EX01'}, reported_sex=2 + ) ) ).id self.pat_pid = ( await pl.upsert_participant( - ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX01_pat'}, reported_sex=1) + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EX01_pat'}, reported_sex=1 + ) ) ).id self.mat_pid = ( await pl.upsert_participant( - ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX01_mat'}, reported_sex=2) + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'EX01_mat'}, reported_sex=2 + ) ) ).id diff --git a/test/test_upsert.py b/test/test_upsert.py index 3c3fc2519..286b82513 100644 --- a/test/test_upsert.py +++ b/test/test_upsert.py @@ -255,7 +255,9 @@ async def test_insert_participants(self): self.assertEqual('Apollo', db_participants[1]['external_id']) self.assertEqual('Athena', db_participants[2]['external_id']) - participant_id_map = {p['external_id']: p['participant_id'] for p in db_participants} + participant_id_map = { + p['external_id']: p['participant_id'] for p in db_participants + } db_samples = await self.connection.connection.fetch_all( """ diff --git a/test/testbase.py b/test/testbase.py index 8f54b0329..2557ab5cd 100644 --- a/test/testbase.py +++ b/test/testbase.py @@ -219,9 +219,10 @@ async def setup(): ) # Get the new project map now that project membership is updated - project_id_map, project_name_map = ( - await ppt.get_projects_accessible_by_user(user=cls.author) - ) + ( + project_id_map, + project_name_map, + ) = await ppt.get_projects_accessible_by_user(user=cls.author) cls.project_id_map = project_id_map cls.project_name_map = project_name_map diff --git a/tob_metadata_harmonisation.py b/tob_metadata_harmonisation.py index 4a259297e..2c1b0c5f3 100644 --- a/tob_metadata_harmonisation.py +++ b/tob_metadata_harmonisation.py @@ -7,6 +7,7 @@ If the field is in the dictionary and the value is not None, the field is added to the new_assay_data dictionary with the new field name as the key and the value as the value. If there are multiple mappings to the same field, the value is compared to the value already in the new_assay_data dictionary. If the values are different, the field is printed to the console. """ + from typing import Dict from metamist.apis import AssayApi diff --git a/web/package.json b/web/package.json index 436d95662..234fbfc43 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "metamist", - "version": "7.3.2", + "version": "7.3.3", "private": true, "dependencies": { "@apollo/client": "^3.7.3",