diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100755 index 00000000..4a13da18 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,72 @@ +# relecov-tools: Contributing Guidelines + +## Contribution workflow + +If you'd like to write or modify some code for relecov-tools, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [relecov-tools issues](https://github.com/BU-ISCIII/relecov-tools/issues) to avoid duplicating work. **If there isn't one already, please create one so that others know you're working on this**. +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [relecov-tools repository](https://github.com/BU-ISCIII/relecov-tools/) to your GitHub account. +3. Make the necessary changes / additions within your forked repository following the [code style guidelines](#code-style-guidelines). +4. Modify the [`CHANGELOG`](../CHANGELOG.md) file according to your changes in the appropiate section ([X.X.Xdev]), you should register your changes regarding: + 1. Added enhancements + 2. New modules + 3. Fixes + 4. Removed stuff + 5. Requirements added or version update +5. Update any documentation as needed. +6. [Submit a Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) against the `develop` branch and send the url to the #pipelines-dev channel in slack (if you are not in the slack channel just wait fot the PR to be reviewed and rebased). + +If you're not used to this workflow with git, you can start with: + +- Some [docs in the bu-isciii wiki](https://github.com/BU-ISCIII/BU-ISCIII/wiki/Github--gitflow). +- [some slides](https://docs.google.com/presentation/d/1PruqGxPQVxtNcuEbOd86mylXorgYIU5a/edit?pli=1#slide=id.p1) (in spanish). +- some github generic docs [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests). +- even their [excellent `git` resources](https://try.github.io/). + +### relecov-tools repo branches + +relecov-tools repo works with a two branching scheme: `main` and `develop`. + +- `main`: stable code only for releases. +- `develop`: new code development for the different modules. + +You need to submit your PR always against `develop`. Once approbed, this changes must be **`rebased`** so we do not create empty unwanted merges. + +## Tests + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +We use black and flake8 linting based on PEP8 guidelines for python coding. You can check more information [here](https://github.com/BU-ISCIII/BU-ISCIII/wiki/Python#linting). + +### Code tests + +Download, read-lab-metadata, map and validate modules are executed using a test dataset. + +Anyhow you should always submit locally tested code!! + +### New version bumping and release + +In order to create a new release you need to follow the next steps: + +1. Set the new version according to [semantic versioning](https://semver.org/), in our particular case, changes in the `hotfix` branch will change the PATCH version (third one), and changes in develop will typicaly change the MINOR version, unless the developing team decides otherwise. +2. Create a PR bumping the new version against `develop`. For bumping a new version just change [this line](https://github.com/BU-ISCIII/relecov-tools/blob/09c00c1ddd11f7489de7757841aff506ef4b7e1d/setup.py#L5) with the new version. +3. Once that PR is merged, create via web another PR against `main` (origin `develop`). This PR would need 2 approvals. +4. [Create a new release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository) copying the appropiate notes from the `CHANGELOG`. +5. Once the release is approved and merged, you're all set! + +PRs from one branch to another, like in a release should be **`merged`** not rebased, so we avoid conflicts and the branch merge is correctly visualize in the commits history. + +### Code style guidelines + +We follow PEP8 conventions as code style guidelines, please check [here](https://github.com/BU-ISCIII/BU-ISCIII/wiki/Python#pep-8-guidelines-read-the-full-pep-8-documentation) for more detail. + +When developing new code, we strongly recommend to implement LogSum() functions from log_summary.py instead of the classic python logging in order to keep track of all the warnings and errors that may appear during any of the processes. + +## Getting help + +For further information/help, please ask on the `#pipelines-dev` slack channel or write us an email! ([bionformatica@isciii.es](emailto:bioinformatica@isciii.es)). \ No newline at end of file diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml new file mode 100644 index 00000000..350681b6 --- /dev/null +++ b/.github/workflows/pypi_publish.yml @@ -0,0 +1,88 @@ +name: Publish package python distribution to Pypi + +on: + push: + branches: "main" + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.9.16" + - name: Install pypi/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: Publish dist to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/relecov-tools + permissions: + id-token: write + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + github-release: + name: Sign dist with Sigstore and upload to GitHub Release + needs: + - publish-to-pypi + runs-on: ubuntu-latest + permissions: + contents: write + id-token: write + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v2.1.1 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "" + - name: Upload artifact signatures to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + # Upload to GitHub Release using the `gh` CLI. + # `dist/` contains the built packages, and the + # sigstore-produced signatures and certificates. + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' \ No newline at end of file diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml new file mode 100644 index 00000000..9d043bbd --- /dev/null +++ b/.github/workflows/python_lint.yml @@ -0,0 +1,35 @@ +name: python_lint + +on: + push: + paths: + - '**.py' + pull_request: + paths: + - '**.py' + +jobs: + flake8_py3: + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: 3.9.x + architecture: x64 + - name: Checkout PyTorch + uses: actions/checkout@master + - name: Install flake8 + run: pip install flake8 + - name: Run flake8 + run: flake8 --ignore E501,W503,E203,W605 + + black_lint: + runs-on: ubuntu-latest + steps: + - name: Setup + uses: actions/checkout@v2 + - name: Install black in jupyter + run: pip install black[jupyter] + - name: Check code lints with Black + uses: psf/black@stable diff --git a/.github/workflows/test_modules.yml b/.github/workflows/test_modules.yml new file mode 100755 index 00000000..35a96a01 --- /dev/null +++ b/.github/workflows/test_modules.yml @@ -0,0 +1,79 @@ +name: test_modules + +on: + push: + branches: "**" + pull_request: + types: [opened, reopened, synchronize, closed] + branches: "**" + +jobs: + test_map: + runs-on: ubuntu-latest + strategy: + max-parallel: 2 + matrix: + map_args: ["-d 'ENA' -f relecov_tools/schema/ena_schema.json", "-d 'GISAID' -f relecov_tools/schema/gisaid_schema.json"] + steps: + - name: Set up Python 3.9.16 + uses: actions/setup-python@v3 + with: + python-version: '3.9.16' + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + - name: Install package and dependencies + run: | + pip install -r requirements.txt + pip install . + - name: Run each module tests + run: | + relecov-tools map -j tests/data/map_validate/processed_metadata_lab_test.json -p relecov_tools/schema/relecov_schema.json ${{ matrix.map_args }} -o . + env: + OUTPUT_LOCATION: ${{ github.workspace }}/tests/ + - name: Upload output file + uses: actions/upload-artifact@v2 + with: + name: test-output + path: output.txt + + test_all_modules: + runs-on: ubuntu-latest + strategy: + max-parallel: 3 + matrix: + modules: ["read-lab-metadata", "read-bioinfo-metadata", "validate"] + steps: + + - name: Set up Python 3.9.16 + uses: actions/setup-python@v3 + with: + python-version: '3.9.16' + + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Install package and dependencies + run: | + pip install -r requirements.txt + pip install . + + - name: Run each module tests + run: | + if [ "${{ matrix.modules }}" == "read-lab-metadata" ]; then + relecov-tools read-lab-metadata -m tests/data/read_lab_metadata/metadata_lab_test.xlsx -s tests/data/read_lab_metadata/samples_data_test.json -o $OUTPUT_LOCATION + elif [ "${{ matrix.modules }}" == "validate" ]; then + relecov-tools validate -j tests/data/map_validate/processed_metadata_lab_test.json -s relecov_tools/schema/relecov_schema.json -m tests/data/map_validate/metadata_lab_test.xlsx -o $OUTPUT_LOCATION + fi + env: + OUTPUT_LOCATION: ${{ github.workspace }}/tests/ + - name: Upload output file + uses: actions/upload-artifact@v2 + with: + name: test-output + path: output.txt diff --git a/.github/workflows/test_sftp_handle.yml b/.github/workflows/test_sftp_handle.yml new file mode 100644 index 00000000..2d6e5ebd --- /dev/null +++ b/.github/workflows/test_sftp_handle.yml @@ -0,0 +1,65 @@ +name: test_sftp_handle + +on: + push: + branches: "**" + pull_request_target: + types: [opened, reopened, synchronize, closed] + branches: "**" + +jobs: + security_check: + runs-on: ubuntu-latest + steps: + - name: Get User Permission + id: checkAccess + uses: actions-cool/check-user-permission@v2 + with: + require: write + username: ${{ github.triggering_actor }} + - name: Check User Permission + if: steps.checkAccess.outputs.require-result == 'false' + run: | + echo "${{ github.triggering_actor }} does not have permissions on this repo." + echo "Current permission level is ${{ steps.checkAccess.outputs.user-permission }}" + echo "Job originally triggered by ${{ github.actor }}" + exit 1 + + test_sftp_handle: + needs: security_check + concurrency: + group: ${{ github.repository }}-test_sftp_handle + cancel-in-progress: false + runs-on: ubuntu-latest + strategy: + max-parallel: 1 + matrix: + download_options: ["download_only", "download_clean", "delete_only"] + target_folders: ["", "-t COD-test-1"] + + steps: + - name: Set up Python 3.9.16 + uses: actions/setup-python@v3 + with: + python-version: '3.9.16' + + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Install package and dependencies + run: | + pip install -r requirements.txt + pip install . + + - name: Run sftp_handle tests + run: | + python3 tests/test_sftp_handle.py --download_option ${{ matrix.download_options }} ${{ matrix.target_folders }} + env: + TEST_USER: ${{ secrets.TEST_USER }} + TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} + TEST_PORT: ${{ secrets.TEST_PORT }} + OUTPUT_LOCATION: ${{ github.workspace }}/tests/ + \ No newline at end of file diff --git a/.gitignore b/.gitignore index b6e47617..08cefe8a 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ dmypy.json # Pyre type checker .pyre/ + +# (Ad-hoc files) +.nextflow* diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100755 index 00000000..8913ab80 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,38 @@ +# relecov-tools Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.1.0dev] - 2024-0X-0X : https://github.com/BU-ISCIII/relecov-tools/releases/tag/1.1.X + +### Credits + +Code contributions to the hotfix: + +### Modules + +#### Added enhancements + +#### Fixes + +#### Changed + +#### Removed + +### Requirements + +## [1.0.0] - 2024-0X-0X : https://github.com/BU-ISCIII/relecov-tools/releases/tag/1.0.0 + +### Credits + +Code contributions to the inital release: + +- [Sara Monzón](https://github.com/saramonzon) +- [Sarai Varona](https://github.com/svarona) +- [Guillermo Gorines](https://github.com/GuilleGorines) +- [Pablo Mata](https://github.com/Shettland) +- [Luis Chapado](https://github.com/luissian) +- [Erika Kvalem](https://github.com/ErikaKvalem) +- [Alberto Lema](https://github.com/Alema91) +- [Daniel Valle](https://github.com/Daniel-VM) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..37ee93fc --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +include LICENSE +include README.md +graft relecov_tools/assets +graft relecov_tools/schema +graft relecov_tools/conf +include requirements.txt diff --git a/README.md b/README.md index 5848093e..e3524bc8 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,330 @@ # relecov-tools -[![Python tests](https://github.com/nf-core/tools/workflows/Python%20tests/badge.svg?branch=master&event=push)](https://github.com/nf-core/tools/actions?query=workflow%3A%22Python+tests%22+branch%3Amaster) -[![codecov](https://codecov.io/gh/nf-core/tools/branch/master/graph/badge.svg)](https://codecov.io/gh/nf-core/tools) +[![python_lint](https://github.com/BU-ISCIII/relecov-tools/actions/workflows/python_lint.yml/badge.svg)](https://github.com/BU-ISCIII/relecov-tools/actions/workflows/python_lint.yml) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -[![install with Bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/recipes/nf-core/README.html) -[![install with PyPI](https://img.shields.io/badge/install%20with-PyPI-blue.svg)](https://pypi.org/project/nf-core/) +> THIS REPO IS UNDER ACTIVE DEVELOPMENT. -A python package with helper tools for the relecov project. +relecov-tools is a set of helper tools for the assembly of the different elements in the RELECOV platform (Spanish Network for genomic surveillance of SARS-Cov-2) as data download, processing, validation and upload to public databases, as well as analysis runs and database storage. +## Table of contents -## Table of contents +- [relecov-tools](#relecov-tools) + - [Table of contents](#table-of-contents) + - [Installation](#installation) + - [Bioconda](#bioconda) + - [Pip](#pip) + - [Development version](#development-version) + - [Usage](#usage) + - [Command-line](#command-line) + - [download](#download) + - [read-lab-metadata](#read-lab-metadata) + - [read-bioinfo-metadata](#read-bioinfo-metadata) + - [validate](#validate) + - [map](#map) + - [upload-to-ena](#upload-to-ena) + - [upload-to-gisaid](#upload-to-gisaid) + - [update-db](#update-db) + - [build-schema](#build-schema) + - [Mandatory Fields](#mandatory-fields) + - [launch-pipeline](#launch-pipeline) + - [custom logs](#custom-logs) + - [Python package mode](#python-package-mode) + - [Acknowledgements](#acknowledgements) -* [ installation][*installation] +## Installation +### Bioconda +Soon -## Installation +### Pip +relecov-tools is available in Pypi and can be installed via pip: +``` +pip install relecov-tools +``` + +### Development version +If you want to install the latest code in the repository: + +``` +conda create -n relecov_dev pip +pip install --force-reinstall --upgrade git+https://github.com/bu-isciii/relecov-tools.git@develop +``` + +## Usage + +### Command-line +relecov-tools provides a command-line version with help descriptions and params prompt if needed. + +``` +$ relecov-tools --help + ___ ___ ___ ___ ___ +\ |--| | \ | | | | | | \ / +\ \ / |__ / |__ | |___ | | | \ / +/ / \ | \ | | | | | | \ / +/ |--| | \ |___ |___ |___ |___ |___| \/ +RELECOV-tools version 1.0.0 +Usage: relecov-tools [OPTIONS] COMMAND [ARGS]... + +Options: +--version Show the version and exit. +-v, --verbose Print verbose output to the console. +-l, --log-file Save a verbose log to a file. +--help Show this message and exit. + +Commands: + download Download files located in sftp server. + read-lab-metadata Create the json compliant to the relecov schema from... + read-bioinfo-metadata Create the json compliant to the relecov schema with Bioinfo Metadata. + validate Validate json file against schema. + map Convert data between phage plus schema to ENA,... + upload-to-ena parsed data to create xml files to upload to ena + upload-to-gisaid parsed data to create files to upload to gisaid + update-db feed database with metadata jsons + build-schema Generates and updates JSON Schema files from... + launch-pipeline Create the symbolic links for the samples which... +``` +#### download +The command `download` connects to a transfer protocol (currently sftp) and downloads all files in the different available folders in the passed credentials. In addition, it checks if the files in the current folder match the files in the metadata file and also checks if there are md5sum for each file. Else, it creates one before storing in the final repository. + +``` +$ relecov-tools download --help +Usage: relecov-tools download [OPTIONS] + + Download files located in sftp server. + + Options: + -u, --user User name for login to sftp server + -p, --password Password for the user to login + -d, --download_option Select the download option: [download_only, download_clean, delete_only]. + download_only will only download the files + download_clean will remove files from sftp after download + delete_only will only delete the files + -o, --output_location Flag: Select location for downloaded files, overrides config file location + -t, --target_folders Flag: Select which sftp folders will be targeted giving [paths] or via prompt + -f, --conf_file Configuration file in yaml format (no params file) + --help Show this message and exit. +``` + +Configuration can be passed in several formats: +- if no config_file is passed, default values are fetched from conf/configuration.json, and user and password are asked in prompt. +- Default values can be overwritten using a yml config file, so you can input user, password, sftp_server, etc. + +Config file example with all available options: +``` +sftp_server: "sftprelecov.isciii.es" +sftp_port: "22" +sftp_user : "user" +sftp_passwd : "pass" +storage_local_folder: "/tmp/relecov" +tmp_folder_for_metadata: "/tmp/relecov/tmp" +allowed_sample_extensions: + - .fastq.gz + - .fasta +``` + +#### read-lab-metadata +`read-lab-metadata` command reads the excel file with laboratory metadata and processes it adding additional needed fields. + +``` +$ relecov-tools read-lab-metadata --help +Usage: relecov-tools read-metadata [OPTIONS] + + Create the json compliant to the relecov schema from the Metadata file. + + Options: + -m, --metadata_file PATH file containing metadata in xlsx format. + -s, --sample_list_file PATH Json with the additional metadata to add to the + received user metadata. + -o, --metadata-out PATH Path to save output metadata file in json format. + --help Show this message and exit. +``` + + +An example for the metadata excel file can be found [here](./relecov_tools/example_data/METADATA_LAB_TEST.xlsx) + +#### read-bioinfo-metadata +`read-bioinfo-metadata` Include the results from the Bioinformatics analysis into the Json previously created with read-lab-metadata module. + +``` +$ relecov-tools read-bioinfo-metadata --help +Usage: relecov-tools read-bioinfo-metadata [OPTIONS] + + Create the json compliant to the relecov schema with Bioinfo Metadata. + + Options: + -j, --json_file Json file containing lab metadata + -i, --input_folder Path to folder containing analysis results + -s, --software_name Name of the software employed in the bioinformatics analysis (default: viralrecon). + -o, --out_dir Path to save output file" +``` +- Note: Software-specific configurations are available in [bioinfo_config.json](./relecov_tools/conf/bioinfo_config.json). + +#### validate +`validate` commands validate the data in json format outputted by `read-metadata` command against a json schema, in this case the relecov [schema specification](./relecov_tools/schema/relecov_schema.json). + +``` +$ relecov-tools validate --help +Usage: relecov-tools validate [OPTIONS] + + Validate json file against schema. + + Options: + -j, --json_file TEXT Json file to validate + -s, --json_schema TEXT Json schema (default: relecov-schema) + -m, --metadata PATH Origin file containing metadata + -o, --out_folder TEXT Path to save validate json file + --help Show this message and exit. + +``` + +#### map +The command `map` converts a data in json format from relecov data model to ena or gisaid data model using their own schemas acording to their annotated ontology terms. + +``` +$ relecov-tools map --help +Usage: relecov-tools map [OPTIONS] + + Convert data between phage plus schema to ENA, GISAID, or any other schema + + Options: + -p, --origin_schema TEXT File with the origin (relecov) schema + -j, --json_data TEXT File with the json data to convert + -d, --destination_schema [ENA|GSAID|other] + schema to be mapped + -f, --schema_file TEXT file with the custom schema + -o, --output TEXT File name and path to store the mapped json + --help Show this message and exit. +``` + +#### upload-to-ena +`upload-to-ena` command uses json data mapped to ena schema to use the [ena_upload_cli](https://github.com/usegalaxy-eu/ena-upload-cli) package to upload raw data and metadata to ENA db. + +``` +Usage: relecov-tools upload-to-ena [OPTIONS] + + parsed data to create xml files to upload to ena + + Options: + -u, --user user name for login to ena + -p, --password password for the user to login + -c, --center center name + -e, --ena_json where the validated json is + -t, --template_path path to folder containing ENA xml templates + -a, --action select one of the available options: [add|modify|cancel|release] + --dev Flag: Test submission + --upload_fastq Flag: Upload fastq files. Mandatory for "add" action + -m", --metadata_types List of metadata xml types to submit [study,experiment,run,sample] + -o, --output_path TEXT output folder for the xml generated files + --help Show this message and exit. + +``` + +#### upload-to-gisaid +`upload-to-gisaid` uses the json mapped to gisaid schema to upload raw data and metadata to GISAID db + +``` +Usage: relecov-tools upload-to-gisaid [OPTIONS] + + parsed data to create xml files to upload to ena + + Options: + -u, --user user name for login + -p, --password password for the user to login + -c, --client_id client-ID provided by clisupport@gisaid.org + -t, --token path to athentication token + -e, --gisaid_json path to validated json mapped to GISAID + -i, --input_path path to fastas folder or multifasta file + -f, --frameshift frameshift notification: ["catch_all", "catch_none", "catch_novel"] + -x, --proxy_config introduce your proxy credentials as: username:password@proxy:port + --single Flag: input is a folder with several fasta files. + --gzip Flag: input fasta is gziped. +``` + +#### update-db + -u, --user user name for login + -p, --password password for the user to login + -t, --type Select the type of information to upload to database [sample,bioinfodata,variantdata] + -d, --databaseServer Name of the database server receiving the data [iskylims,relecov] + +### build-schema +The `build-schema` module provides functionality to generate and manage JSON Schema files based on database definitions from Excel spreadsheets. It automates the creation of JSON Schemas, including validation, drafting, and comparison with existing schemas. + +``` +Usage: relecov-tools build-schema [OPTIONS] + + Generates and updates JSON Schema files from Excel-based database + definitions. + +Options: + -i, --input_file PATH Path to the Excel document containing the database + definition. This file must have a .xlsx extension. + [required] + -s, --schema_base PATH Path to the base schema file. This file is used as + a reference to compare it with the schema + generated using this module. (Default: installed + schema in 'relecov- + tools/relecov_tools/schema/relecov_schema.json') + -v, --draft_version TEXT Version of the JSON schema specification to be + used. Example: '2020-12'. See: https://json- + schema.org/specification-links + -d, --diff BOOLEAN Prints a changelog/diff between the base and + incoming versions of the schema. + -o, --out_dir PATH Path to save output file/s + --help Show this message and exit. +``` + +#### Mandatory Fields +Ensure that the fields below are properly defined as headers in your Excel sheet (database definition): + +``` +enum: List of possible values for enumeration. +examples: Example values for the property. +ontology_id: Identifier for ontology. +type: Data type of the property (e.g., string, integer). +description: Description of the property. +classification: Classification or category of the property. +label_name: Label or name for the property. +fill_mode: Mode for filling in the property (e.g., required, optional). +required (Y/N): Indicates if the property is required (Y) or optional (N). +complex_field (Y/N): Indicates if the property is a complex (nested) field (Y) or a standard field (N). +``` + +#### launch-pipeline +Create the folder structure to execute the given pipeline for the latest sample batches after executing download, read-lab-metadata and validate modules. This module will create symbolic links for each sample and generate the necessary files for pipeline execution using the information from validated_BATCH-NAME_DATE.json. +``` +Usage: relecov-tools launch-pipeline [OPTIONS] + + Create the symbolic links for the samples which are validated to prepare for + bioinformatics pipeline execution. + +Options: + -i, --input PATH Path to the input folder where sample files are located + -t, --template PATH Path to the pipeline template folder to be copied in the output folder + -c, --config PATH Path to the the template config file + -o, --out_dir PATH Path to output folder + --help Show this message and exit. +``` + +#### custom logs +After executing each of these modules, you may find a custom log report in json format named "DATE_EXECUTED-MODULE_log_summary.json. These custom log summaries can be useful to detect errors in metadata in order to fix them and/or notify the users. + +### Python package mode +relecov-tools is designed in a way that you can use import the different modules and use them in your own scripts, for example: + +``` +import relecov_tools.sftp_handle +user="admin" +passwd="1234" +conf_file="/path/to/conf" +sftp_connection = relecov_tools.sftp_handle.SftpHandle( + user, password, conf_file +) +sftp_connection.download() +``` +DOCs soon!! +## Acknowledgements +Python package idea and design is really inspired in [nf-core/tools](https://github.com/nf-core/tools). diff --git a/relecov_tools/__init__.py b/relecov_tools/__init__.py new file mode 100644 index 00000000..e176a03f --- /dev/null +++ b/relecov_tools/__init__.py @@ -0,0 +1,6 @@ +""" Main relecov package file. +""" + +import pkg_resources + +__version__ = pkg_resources.get_distribution("relecov_tools").version diff --git a/relecov_tools/__main__.py b/relecov_tools/__main__.py new file mode 100755 index 00000000..710066bb --- /dev/null +++ b/relecov_tools/__main__.py @@ -0,0 +1,527 @@ +#!/usr/bin/env python +import logging + +# import re + +# from rich.prompt import Confirm +import click +import relecov_tools.download_manager +import rich.console +import rich.logging +import rich.traceback + +import relecov_tools.utils +import relecov_tools.assets.pipeline_utils.viralrecon +import relecov_tools.read_lab_metadata +import relecov_tools.download_manager +import relecov_tools.json_validation +import relecov_tools.map_schema +import relecov_tools.upload_database +import relecov_tools.read_bioinfo_metadata +import relecov_tools.metadata_homogeneizer +import relecov_tools.gisaid_upload +import relecov_tools.upload_ena_protocol +import relecov_tools.pipeline_manager +import relecov_tools.build_schema + +log = logging.getLogger() + +# Set up rich stderr console +stderr = rich.console.Console( + stderr=True, force_terminal=relecov_tools.utils.rich_force_colors() +) + + +def run_relecov_tools(): + # Set up the rich traceback + rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1) + + # Print nf-core header + # stderr.print("\n[green]{},--.[grey39]/[green],-.".format(" " * 42), highlight=False) + stderr.print( + "[blue] ___ ___ ___ ___ ___ ", + highlight=False, + ) + stderr.print( + "[blue] \ |-[grey39]-| [blue] | \ | | | | | | \ / ", + highlight=False, + ) + stderr.print( + "[blue] \ \ [grey39]/ [blue] |__ / |__ | |___ | | | \ / ", + highlight=False, + ) + stderr.print( + "[blue] / [grey39] / [blue] \ | \ | | | | | | \ / ", + highlight=False, + ) + stderr.print( + "[blue] / [grey39] |-[blue]-| | \ |___ |___ |___ |___ |___| \/ ", + highlight=False, + ) + + # stderr.print("[green] `._,._,'\n", highlight=False) + __version__ = "1.0.0" + stderr.print( + "\n" "[grey39] RELECOV-tools version {}".format(__version__), highlight=False + ) + + # Lanch the click cli + relecov_tools_cli() + + +# Customise the order of subcommands for --help +class CustomHelpOrder(click.Group): + def __init__(self, *args, **kwargs): + self.help_priorities = {} + super(CustomHelpOrder, self).__init__(*args, **kwargs) + + def get_help(self, ctx): + self.list_commands = self.list_commands_for_help + return super(CustomHelpOrder, self).get_help(ctx) + + def list_commands_for_help(self, ctx): + """reorder the list of commands when listing the help""" + commands = super(CustomHelpOrder, self).list_commands(ctx) + return ( + c[1] + for c in sorted( + (self.help_priorities.get(command, 1000), command) + for command in commands + ) + ) + + def command(self, *args, **kwargs): + """Behaves the same as `click.Group.command()` except capture + a priority for listing command names in help. + """ + help_priority = kwargs.pop("help_priority", 1000) + help_priorities = self.help_priorities + + def decorator(f): + cmd = super(CustomHelpOrder, self).command(*args, **kwargs)(f) + help_priorities[cmd.name] = help_priority + return cmd + + return decorator + + +@click.group(cls=CustomHelpOrder) +@click.version_option(relecov_tools.__version__) +@click.option( + "-v", + "--verbose", + is_flag=True, + default=False, + help="Print verbose output to the console.", +) +@click.option( + "-l", "--log-file", help="Save a verbose log to a file.", metavar="" +) +def relecov_tools_cli(verbose, log_file): + # Set the base logger to output DEBUG + log.setLevel(logging.DEBUG) + + # Set up logs to a file if we asked for one + if log_file: + log_fh = logging.FileHandler(log_file, encoding="utf-8") + log_fh.setLevel(logging.DEBUG) + log_fh.setFormatter( + logging.Formatter( + "[%(asctime)s] %(name)-20s [%(levelname)-7s] %(message)s" + ) + ) + log.addHandler(log_fh) + + +# sftp +@relecov_tools_cli.command(help_priority=2) +@click.option("-u", "--user", help="User name for login to sftp server") +@click.option("-p", "--password", help="password for the user to login") +@click.option( + "-f", + "--conf_file", + help="Configuration file (not params file)", +) +@click.option( + "-d", + "--download_option", + default=None, + multiple=False, + help="Select the download option: [download_only, download_clean, delete_only]. \ + download_only will only download the files \ + download_clean will remove files from sftp after download \ + delete_only will only delete the files", +) +@click.option( + "-o", + "--output_location", + default=None, + help="Flag: Select location for downloaded files, overrides config file location", +) +@click.option( + "-t", + "--target_folders", + is_flag=False, + flag_value="ALL", + default=None, + help="Flag: Select which folders will be targeted giving [paths] or via prompt", +) +def download( + user, + password, + conf_file, + download_option, + output_location, + target_folders, +): + """Download files located in sftp server.""" + download_manager = relecov_tools.download_manager.DownloadManager( + user, + password, + conf_file, + download_option, + output_location, + target_folders, + ) + download_manager.execute_process() + + +# metadata +@relecov_tools_cli.command(help_priority=3) +@click.option( + "-m", + "--metadata_file", + type=click.Path(), + help="file containing metadata", +) +@click.option( + "-s", + "--sample_list_file", + type=click.Path(), + help="Json with the additional metadata to add to the received user metadata", +) +@click.option( + "-o", "--metadata-out", type=click.Path(), help="Path to save output metadata file" +) +def read_lab_metadata(metadata_file, sample_list_file, metadata_out): + """ + Create the json compliant to the relecov schema from the Metadata file. + """ + new_metadata = relecov_tools.read_lab_metadata.RelecovMetadata( + metadata_file, sample_list_file, metadata_out + ) + relecov_json = new_metadata.create_metadata_json() + return relecov_json + + +# validation +@relecov_tools_cli.command(help_priority=4) +@click.option("-j", "--json_file", help="Json file to validate") +@click.option("-s", "--json_schema", help="Json schema") +@click.option( + "-m", + "--metadata", + type=click.Path(), + help="Origin file containing metadata", +) +@click.option("-o", "--out_folder", help="Path to save validate json file") +def validate(json_file, json_schema, metadata, out_folder): + """Validate json file against schema.""" + validation = relecov_tools.json_validation.SchemaValidation( + json_file, json_schema, metadata, out_folder + ) + validation.validate() + + +# mapping to ENA schema +@relecov_tools_cli.command(help_priority=5) +@click.option("-p", "--origin_schema", help="File with the origin (relecov) schema") +@click.option("-j", "--json_data", help="File with the json data to convert") +@click.option( + "-d", + "--destination_schema", + type=click.Choice(["ENA", "GISAID", "other"], case_sensitive=True), + help="schema to be mapped", +) +@click.option("-f", "--schema_file", help="file with the custom schema") +@click.option("-o", "--output", help="File name and path to store the mapped json") +def map(origin_schema, json_data, destination_schema, schema_file, output): + """Convert data between phage plus schema to ENA, GISAID, or any other schema""" + new_schema = relecov_tools.map_schema.MappingSchema( + origin_schema, json_data, destination_schema, schema_file, output + ) + new_schema.map_to_data_to_new_schema() + + +# upload to ENA +@relecov_tools_cli.command(help_priority=6) +@click.option("-u", "--user", help="user name for login to ena") +@click.option("-p", "--password", help="password for the user to login") +@click.option("-c", "--center", help="center name") +@click.option("-e", "--ena_json", help="where the validated json is") +@click.option("-t", "--template_path", help="Path to ENA templates folder") +@click.option( + "-a", + "--action", + type=click.Choice(["ADD", "MODIFY", "CANCEL", "RELEASE"], case_sensitive=False), + help="select one of the available options", +) +@click.option("--dev", is_flag=True, default=False, help="Test submission") +@click.option("--upload_fastq", is_flag=True, default=False, help="Upload fastq files") +@click.option("-m", "--metadata_types", help="List of metadata xml types to submit") +@click.option("-o", "--output_path", help="output folder for the xml generated files") +def upload_to_ena( + user, + password, + center, + ena_json, + template_path, + dev, + action, + metadata_types, + upload_fastq, + output_path, +): + """parse data to create xml files to upload to ena""" + upload_ena = relecov_tools.upload_ena_protocol.EnaUpload( + user=user, + passwd=password, + center=center, + source_json=ena_json, + template_path=template_path, + dev=dev, + action=action, + metadata_types=metadata_types, + upload_fastq=upload_fastq, + output_path=output_path, + ) + upload_ena.upload() + + +# upload to GISAID +@relecov_tools_cli.command(help_priority=7) +@click.option("-u", "--user", help="user name for login") +@click.option("-p", "--password", help="password for the user to login") +@click.option("-c", "--client_id", help="client-ID provided by clisupport@gisaid.org") +@click.option("-t", "--token", help="path to athentication token") +@click.option("-e", "--gisaid_json", help="path to validated json mapped to GISAID") +@click.option( + "-i", + "--input_path", + help="path to fastas folder or multifasta file", +) +@click.option("-o", "--output_path", help="output folder for log") +@click.option( + "-f", + "--frameshift", + type=click.Choice(["catch_all", "catch_none", "catch_novel"], case_sensitive=False), + help="frameshift notification", +) +@click.option( + "-x", + "--proxy_config", + help="introduce your proxy credentials as: username:password@proxy:port", + required=False, +) +@click.option( + "--single", + is_flag=True, + default=False, + help="input is a folder with several fasta files. Default: False", +) +@click.option( + "--gzip", + is_flag=True, + default=False, + help="input fasta is gziped. Default: False", +) +def upload_to_gisaid( + user, + password, + client_id, + token, + gisaid_json, + input_path, + output_path, + frameshift, + proxy_config, + single, + gzip, +): + """parsed data to create files to upload to gisaid""" + upload_gisaid = relecov_tools.gisaid_upload.GisaidUpload( + user, + password, + client_id, + token, + gisaid_json, + input_path, + output_path, + frameshift, + proxy_config, + single, + gzip, + ) + upload_gisaid.gisaid_upload() + + +@relecov_tools_cli.command(help_priority=9) +@click.option("-j", "--json", help="data in json format") +@click.option( + "-t", + "--type", + type=click.Choice(["sample", "bioinfodata", "variantdata"]), + multiple=False, + default=None, + help="Select the type of information to upload to database", +) +@click.option( + "-plat", + "--platform", + type=click.Choice( + [ + "iskylims", + "relecov", + ] + ), + multiple=False, + default=None, + help="name of the platform where data is uploaded", +) +@click.option("-u", "--user", help="user name for login") +@click.option("-p", "--password", help="password for the user to login") +@click.option("-s", "--server_url", help="url of the platform server") +@click.option( + "-f", + "--full_update", + is_flag=True, + default=False, + help="Sequentially run every update option", +) +def update_db(user, password, json, type, platform, server_url, full_update): + """upload the information included in json file to the database""" + update_database_obj = relecov_tools.upload_database.UpdateDatabase( + user, password, json, type, platform, server_url, full_update + ) + update_database_obj.update_db() + + +# read metadata bioinformatics +@relecov_tools_cli.command(help_priority=10) +@click.option( + "-j", + "--json_file", + type=click.Path(), + help="json file containing lab metadata", +) +@click.option("-i", "--input_folder", type=click.Path(), help="Path to input files") +@click.option("-o", "--out_dir", type=click.Path(), help="Path to save output file") +@click.option("-s", "--software_name", help="Name of the software/pipeline used.") +def read_bioinfo_metadata(json_file, input_folder, out_dir, software_name): + """ + Create the json compliant from the Bioinfo Metadata. + """ + new_bioinfo_metadata = relecov_tools.read_bioinfo_metadata.BioinfoMetadata( + json_file, + input_folder, + out_dir, + software_name, + ) + + new_bioinfo_metadata.create_bioinfo_file() + + +# read metadata bioinformatics +@relecov_tools_cli.command(help_priority=12) +@click.option( + "-i", + "--institution", + type=click.Choice(["isciii", "hugtip", "hunsc-iter"], case_sensitive=False), + help="select one of the available institution options", +) +@click.option( + "-d", + "--directory", + type=click.Path(), + help="Folder where are located the additional files", +) +@click.option("-o", "--output", type=click.Path(), help="Path to save json output") +def metadata_homogeneizer(institution, directory, output): + """Parse institution metadata lab to the one used in relecov""" + new_parse = relecov_tools.metadata_homogeneizer.MetadataHomogeneizer( + institution, directory, output + ) + new_parse.converting_metadata() + + +# creating symbolic links +@relecov_tools_cli.command(help_priority=13) +@click.option( + "-i", + "--input", + type=click.Path(), + help="select input folder where are located the sample files", +) +@click.option( + "-t", + "--template", + type=click.Path(), + help="select the pipeline template folder to be copied in the output folder", +) +@click.option( + "-c", + "--config", + type=click.Path(), + help="select the template config file", +) +@click.option("-o", "--output", type=click.Path(), help="select output folder") +def launch_pipeline(input, template, output, config): + """ + Create the symbolic links for the samples which are validated to prepare for + bioinformatics pipeline execution. + """ + new_launch = relecov_tools.pipeline_manager.LaunchPipeline( + input, template, output, config + ) + new_launch.pipeline_exc() + + +# schema builder +@relecov_tools_cli.command(help_priority=13) +@click.option( + "-i", + "--input_file", + type=click.Path(), + help="Path to the Excel document containing the database definition. This file must have a .xlsx extension.", + required=True, +) +@click.option( + "-s", + "--schema_base", + type=click.Path(), + help="Path to the base schema file. This file is used as a reference to compare it with the schema generated using this module. (Default: installed schema in 'relecov-tools/relecov_tools/schema/relecov_schema.json')", + required=False, +) +@click.option( + "-v", + "--draft_version", + type=click.STRING, + help="Version of the JSON schema specification to be used. Example: '2020-12'. See: https://json-schema.org/specification-links", +) +@click.option( + "-d", + "--diff", + is_flag=True, + help="Prints a changelog/diff between the base and incoming versions of the schema.", +) +@click.option("-o", "--out_dir", type=click.Path(), help="Path to save output file/s") +def build_schema(input_file, schema_base, draft_version, diff, out_dir): + """Generates and updates JSON Schema files from Excel-based database definitions.""" + schema_update = relecov_tools.build_schema.SchemaBuilder( + input_file, schema_base, draft_version, diff, out_dir + ) + schema_update.handle_build_schema() + + +if __name__ == "__main__": + run_relecov_tools() diff --git a/relecov_tools/assets/images/readBioinfoMetadata_processDiagram.png b/relecov_tools/assets/images/readBioinfoMetadata_processDiagram.png new file mode 100644 index 00000000..9c283702 Binary files /dev/null and b/relecov_tools/assets/images/readBioinfoMetadata_processDiagram.png differ diff --git a/relecov_tools/assets/pipeline_utils/viralrecon.py b/relecov_tools/assets/pipeline_utils/viralrecon.py new file mode 100644 index 00000000..dcb1cc31 --- /dev/null +++ b/relecov_tools/assets/pipeline_utils/viralrecon.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python +import json +import os +import sys +import re +import logging +import rich +import os.path + +from pathlib import Path +from datetime import datetime + +import relecov_tools.utils +from relecov_tools.config_json import ConfigJson +from relecov_tools.read_bioinfo_metadata import BioinfoReportLog + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +# INIT Class +class LongTableParse: + """ + - parse_a_list_of_dictionaries() : returns generated_JSON + - saving_file(generated_JSON) + - parsing_csv() : It manages all this proccess: + - calling first to parse_a_list_of_dictionaries() and then calling to saving_file() + """ + + def __init__(self, file_path=None, output_directory=None): + if file_path is None: + self.file_path = relecov_tools.utils.prompt_path( + msg="Select the csv file which contains variant long table information" + ) + else: + self.file_path = file_path + + if not os.path.exists(self.file_path): + log.error("Variant long table file %s does not exist ", self.file_path) + stderr.print( + f"[red] Variant long table file {self.file_path} does not exist" + ) + sys.exit(1) + + if not self.file_path.endswith(".csv"): + log.error("Variant long table file %s is not a csv file ", self.file_path) + stderr.print( + f"[red] Variant long table file {self.file_path} must be a csv file" + ) + sys.exit(1) + + if output_directory is None: + use_default = relecov_tools.utils.prompt_yn_question("Use default path?: ") + if use_default: + self.output_directory = os.getcwd() + else: + self.output_directory = relecov_tools.utils.prompt_path( + msg="Select the output folder:" + ) + else: + self.output_directory = output_directory + Path(self.output_directory).mkdir(parents=True, exist_ok=True) + + json_file = os.path.join( + os.path.dirname(__file__), "..", "..", "conf", "bioinfo_config.json" + ) + config_json = ConfigJson(json_file) + self.software_config = config_json.get_configuration("viralrecon") + self.long_table_heading = self.software_config["variants_long_table"]["content"] + + def validate_file(self, heading): + """Check if long table file has all mandatory fields defined in + configuration file + """ + for field in self.long_table_heading: + if field not in heading: + log.error("Incorrect format file. %s is missing", field) + stderr.print(f"[red]Incorrect Format. {field} is missing in file") + sys.exit(1) + return True + + def parse_file(self): + """This function generates a json file from the csv file entered by + the user (long_table.csv). + Validate the file by checking the header line + """ + + with open(self.file_path, encoding="utf-8-sig") as fh: + lines = fh.readlines() + + stderr.print("[green]\tSuccessful checking heading fields") + heading_index = {} + headings_from_csv = lines[0].strip().split(",") + for heading in self.long_table_heading.values(): + heading_index[heading] = headings_from_csv.index(heading) + + samp_dict = {} + for line in lines[1:]: + line_s = line.strip().split(",") + + sample = line_s[heading_index["SAMPLE"]] + if sample not in samp_dict: + samp_dict[sample] = [] + + variant_dict = { + key: ( + {key2: line_s[heading_index[val2]] for key2, val2 in value.items()} + if isinstance(value, dict) + else line_s[heading_index[value]] + ) + for key, value in self.long_table_heading.items() + } + + if re.search("&", line_s[heading_index["GENE"]]): + # Example + # 215184,NC_045512.2,27886,AAACGAACATGAAATT,A,PASS,1789,1756,1552,0.87,ORF7b&ORF8,gene_fusion,n.27887_27901delAACGAACATGAAATT,.,.,ivar,B.1.1.318 + # This only occurs (for now) as gene fusion, so we just duplicate lines with same values + genes = re.split("&", line_s[heading_index["GENE"]]) + for gene in genes: + variant_dict_copy = variant_dict.copy() + variant_dict_copy["Gene"] = gene + samp_dict[sample].append(variant_dict_copy) + else: + variant_dict["Gene"] = line_s[heading_index["GENE"]] + samp_dict[sample].append(variant_dict) + stderr.print("[green]\tSuccessful parsing data") + return samp_dict + + def convert_to_json(self, samp_dict): + j_list = [] + # Grab date from filename + result_regex = re.search( + "variants_long_table(?:_\d{8})?\.csv", os.path.basename(self.file_path) + ) + if result_regex is None: + stderr.print( + "[red]\tWARN: Couldn't find variants long table file. Expected file name is:" + ) + stderr.print( + "[red]\t\t- variants_long_table.csv or variants_long_table_YYYYMMDD.csv. Aborting..." + ) + sys.exit(1) + else: + date_regex = re.search(r"(\d{8})", result_regex.group()) + if date_regex is not None: + analysis_date = date_regex.group() + stderr.print(f"[green]\tDate {analysis_date} found in {self.file_path}") + else: + analysis_date = "Not Provided [GENEPIO:0001668]" + stderr.print( + f"[yellow]\tWARN:No analysis date found in long table: {self.file_path}" + ) + for key, values in samp_dict.items(): + j_dict = {"sample_name": key, "analysis_date": analysis_date} + j_dict["variants"] = values + j_list.append(j_dict) + return j_list + + def save_to_file(self, j_list): + """Transform the parsed data into a json file""" + date_now = datetime.now().strftime("%Y%m%d%H%M%S") + file_name = "long_table_" + date_now + ".json" + file_path = os.path.join(self.output_directory, file_name) + + try: + with open(file_path, "w") as fh: + fh.write(json.dumps(j_list, indent=4)) + stderr.print("[green]\tParsed data successfully saved to file:", file_path) + except Exception as e: + stderr.print("[red]\tError saving parsed data to file:", str(e)) + + def parsing_csv(self): + """ + Function called when using the relecov-tools long-table-parse function. + """ + # Parsing longtable file + parsed_data = self.parse_file() + j_list = self.convert_to_json(parsed_data) + # Saving long table data into a file + self.save_to_file(j_list) + stderr.print("[green]\tProcess completed") + return + + +# END of Class + + +# START util functions +def handle_pangolin_data(files_list): + """File handler to parse pangolin data (csv) into JSON structured format. + + Args: + files_list (list): A list with paths to pangolin files. + + Returns: + pango_data_processed: A dictionary containing pangolin data handled. + """ + method_name = f"{handle_pangolin_data.__name__}" + method_log_report = BioinfoReportLog() + + # Handling pangolin data + pango_data_processed = {} + valid_samples = [] + try: + files_list_processed = relecov_tools.utils.select_most_recent_files_per_sample( + files_list + ) + for pango_file in files_list_processed: + try: + pango_data = relecov_tools.utils.read_csv_file_return_dict( + pango_file, sep="," + ) + # Add custom content in pangolin + pango_data_key = next(iter(pango_data)) + pango_data[pango_data_key]["lineage_analysis_date"] = ( + relecov_tools.utils.get_file_date(pango_file) + ) + + # Rename key in f_data + pango_data_updated = { + key.split()[0]: value for key, value in pango_data.items() + } + pango_data_processed.update(pango_data_updated) + valid_samples.append(pango_data_key.split()[0]) + except (FileNotFoundError, IndexError) as e: + method_log_report.update_log_report( + method_name, + "warning", + f"Error occurred while processing file {pango_file}: {e}", + ) + continue + except Exception as e: + method_log_report.update_log_report( + method_name, "warning", f"Error occurred while processing files: {e}" + ) + if len(valid_samples) > 0: + method_log_report.update_log_report( + method_name, + "valid", + f"Successfully handled data in samples: {', '.join(valid_samples)}", + ) + method_log_report.print_log_report(method_name, ["valid", "warning"]) + return pango_data_processed + + +def parse_long_table(files_list): + """File handler to retrieve data from long table files and convert it into a JSON structured format. + This function utilizes the LongTableParse class to parse the long table data. + Since this utility handles and maps data using a custom way, it returns None to be avoid being transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table(). + + Args: + files_list (list): A list of paths to long table files. + + Returns: + None: Indicates that the function does not return any meaningful value. + """ + method_name = f"{parse_long_table.__name__}" + method_log_report = BioinfoReportLog() + + # Handling long table data + if len(files_list) == 1: + files_list_processed = files_list[0] + if not os.path.isfile(files_list_processed): + method_log_report.update_log_report( + method_name, "error", f"{files_list_processed} given file is not a file" + ) + sys.exit(method_log_report.print_log_report(method_name, ["error"])) + long_table = LongTableParse(files_list_processed) + # Parsing long table data and saving it + long_table.parsing_csv() + elif len(files_list) > 1: + method_log_report.update_log_report( + method_name, + "warning", + f"Found {len(files_list)} variants_long_table files. This version is unable to process more than one variants long table each time.", + ) + # This needs to return none to avoid being parsed by method mapping-over-table + return None + + +def handle_consensus_fasta(files_list): + """File handler to parse consensus data (fasta) into JSON structured format. + + Args: + files_list (list): A list with paths to condensus files. + + Returns: + consensus_data_processed: A dictionary containing consensus data handled. + """ + method_name = f"{handle_consensus_fasta.__name__}" + method_log_report = BioinfoReportLog() + + consensus_data_processed = {} + missing_consens = [] + for consensus_file in files_list: + try: + record_fasta = relecov_tools.utils.read_fasta_return_SeqIO_instance( + consensus_file + ) + except FileNotFoundError as e: + missing_consens.append(e.filename) + continue + sample_key = os.path.basename(consensus_file).split(".")[0] + + # Update consensus data for the sample key + consensus_data_processed[sample_key] = { + "sequence_name": record_fasta.description, + "genome_length": str(len(record_fasta)), + "sequence_filepath": os.path.dirname(consensus_file), + "sequence_filename": sample_key, + "sequence_md5": relecov_tools.utils.calculate_md5(consensus_file), + # TODO: Not sure this is correct. If not, recover previous version: https://github.com/BU-ISCIII/relecov-tools/blob/09c00c1ddd11f7489de7757841aff506ef4b7e1d/relecov_tools/read_bioinfo_metadata.py#L211-L218 + "number_of_base_pairs_sequenced": len(record_fasta.seq), + } + + # Report missing consensus + conserrs = len(missing_consens) + if conserrs >= 1: + method_log_report.update_log_report( + method_name, + "warning", + f"{conserrs} samples missing in consensus file: {missing_consens}", + ) + method_log_report.print_log_report(method_name, ["valid", "warning"]) + return consensus_data_processed diff --git a/relecov_tools/assets/schema_utils/jsonschema_draft.py b/relecov_tools/assets/schema_utils/jsonschema_draft.py new file mode 100644 index 00000000..96f2c750 --- /dev/null +++ b/relecov_tools/assets/schema_utils/jsonschema_draft.py @@ -0,0 +1,119 @@ +import logging +import sys +import rich.console + +import relecov_tools.utils +import pkg_resources +import jsonschema +from jsonschema import Draft202012Validator + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + +SCHEMA_VALIDATORS = { + "2020-12": Draft202012Validator, +} + + +def check_valid_version(draft_version): + """Validate the provided draft version against available JSON Schema versions.""" + available_schemas = [version for version in SCHEMA_VALIDATORS.keys()] + try: + if not draft_version: + draft_version = relecov_tools.utils.prompt_selection( + "Choose a Json Schema valid version:", available_schemas + ) + elif draft_version not in available_schemas: + draft_version = relecov_tools.utils.prompt_selection( + f"Draft version '{draft_version}' not found. Choose a valid Json Schema version:", + available_schemas, + ) + except Exception as e: + stderr.print(f"[red]An error occurred while selecting the draft version: {e}") + sys.exit(1) + + if not draft_version: + stderr.print("[red]No valid draft version selected. Exiting.") + sys.exit(1) + + stderr.print(f"[green]Using draft version: {draft_version}") + return draft_version + + +def create_draft(draft_version, required_items=None): + """Creates a JSON Schema Draft template with required fields.""" + + def bump_version(version): + """Increment the patch version by 1.""" + major, minor, patch = map(int, version.split(".")) + return f"{major}.{minor}.{patch + 1}" + + # Check if the user wants to bump the version + current_version = pkg_resources.get_distribution("relecov_tools").version + version_sel_choice = relecov_tools.utils.prompt_selection( + "Use default or custom/bumped version?", + ["Use default", "Bump version", "Custom"], + ) + stderr.print(version_sel_choice) + if version_sel_choice == "Bump version": + pakage_version_str = bump_version(current_version) + if version_sel_choice == "Custom": + pakage_version_str = relecov_tools.utils.prompt_text( + "Write the desired version using semantic versioning:" + ) + if version_sel_choice == "Use default": + pakage_version_str = current_version + + # Get parameters to set create the schema + url_str = SCHEMA_VALIDATORS[draft_version].META_SCHEMA["$id"] + id_str = "https://github.com/BU-ISCIII/relecov-tools/blob/develop/relecov_tools/schema/relecov_schema.json" + description_str = "Json schema that specifies the structure, content, and validation rules for RELECOV metadata" + + # Construct the draft template + draft_template = { + "$schema": url_str, + "$id": id_str, + "title": "RELECOV schema", + "description": description_str, + "version": pakage_version_str, + "type": "object", + "properties": {}, + } + + # Include required fields if specified + if required_items: + draft_template["required"] = [] + + return draft_template + + +def check_schema_draft(schema_draft, draft_version): + """Validates the schema_draft against the JSON Schema Draft 2020-12 meta-schema.""" + if draft_version not in SCHEMA_VALIDATORS: + stderr.print(f"[red]Unsupported draft version: {draft_version}") + sys.exit(1) + + validator_class = SCHEMA_VALIDATORS[draft_version] + + try: + validator_class.check_schema(schema_draft) + stderr.print("[green]New schema is valid based on JSON Specification rules.") + except jsonschema.ValidationError: + stderr.print(f"[red] Json schema does not fulfill ${draft_version} Validation") + promp_answ = relecov_tools.utils.prompt_yn_question( + "Errors found during schema validation, proceed?:" + ) + if not promp_answ: + sys.exit(1) + except Exception as e: + stderr.print(f"[yellow]Error occurred during schema validation: {e}") + promp_answ = relecov_tools.utils.prompt_yn_question( + "Errors found during schema validation, proceed?:" + ) + if not promp_answ: + sys.exit(1) diff --git a/relecov_tools/assets/schema_utils/metadatalab_template.py b/relecov_tools/assets/schema_utils/metadatalab_template.py new file mode 100644 index 00000000..db4d4a1d --- /dev/null +++ b/relecov_tools/assets/schema_utils/metadatalab_template.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +import logging +import rich.console +import pandas as pd + + +import relecov_tools.utils + + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +def schema_to_flatten_json(json_data): + """This function flattens schema when nested items are found""" + try: + flatten_json = {} + for property_id, features in json_data.items(): + try: + if features.get("type") == "array": + complex_properties = json_data[property_id]["items"].get( + "properties" + ) + for ( + complex_property_id, + complex_feature, + ) in complex_properties.items(): + flatten_json.update({complex_property_id: complex_feature}) + else: + flatten_json.update({property_id: features}) + except Exception as e: + stderr.print(f"[red]Error processing property {property_id}: {e}") + return flatten_json + except Exception as e: + stderr.print(f"[red]Error in schema_to_flatten_json: {e}") + return None + + +def schema_properties_to_df(json_data): + try: + # Initialize an empty list to store the rows of the DataFrame + rows = [] + + # Iterate over each property in the JSON data + for property_id, property_features in json_data.items(): + try: + # Create a dictionary to hold the property features + row = {"property_id": property_id} + row.update(property_features) + + # Append the row to the list of rows + rows.append(row) + except Exception as e: + stderr.print(f"[red]Error processing property {property_id}: {e}") + + # Create a DataFrame from the list of rows + df = pd.DataFrame(rows) + + # Return the DataFrame + return df + except Exception as e: + stderr.print(f"[red]Error in schema_properties_to_df: {e}") + return None + + +def excel_formater(df, writer, sheet, out_file, have_index=True, have_header=True): + try: + + # Write the DataFrame to the specified sheet + df.to_excel( + writer, sheet_name=sheet, startrow=1, index=have_index, header=have_header + ) + + # Get the xlsxwriter workbook and worksheet objects. + workbook = writer.book + worksheet = writer.sheets[sheet] + + # setup excel format + worksheet.set_column(0, len(df.columns), 30) + header_formater = workbook.add_format( + { + "bold": True, + "text_wrap": False, + "valign": "top", + "fg_color": "#ADD8E6", + "border": 1, + "locked": True, + } + ) + first_col_formater = workbook.add_format( + { + "bold": True, + "text_wrap": False, + "valign": "center", + "fg_color": "#ADD8E6", + "border": 1, + "locked": True, + } + ) + + if sheet == "OVERVIEW": + # Write the column headers with the defined format. + for col_num, value in enumerate(df.columns.values): + try: + worksheet.write(0, col_num + 1, value, header_formater) + except Exception as e: + stderr.print(f"Error writing header at column {col_num + 1}: {e}") + + # Write the first column with the defined format. + for row_num in range(1, len(df) + 1): + try: + worksheet.write( + row_num, 0, df.iloc[row_num - 1, 0], first_col_formater + ) + except Exception as e: + stderr.print(f"Error writing first column at row {row_num}: {e}") + + if sheet == "METADATA_LAB" or sheet == "DATA_VALIDATION": + # Write the column headers with the defined format. + for col_num in range(0, len(df.columns)): + for row_num in range(0, len(df)): + if row_num < 3: + try: + worksheet.write( + row_num + 1, + col_num + 1, + df.iloc[row_num, col_num], + header_formater, + ) + except Exception as e: + stderr.print( + f"Error writing first column at row {row_num}: {e}" + ) + + # Write the first column with the defined format. + for index_num, index_val in enumerate(df.index): + try: + worksheet.write(index_num + 1, 0, index_val, first_col_formater) + except Exception as e: + stderr.print(f"Error writing first column at row {row_num}: {e}") + except Exception as e: + stderr.print(f"Error in excel_formater: {e}") diff --git a/relecov_tools/build_schema.py b/relecov_tools/build_schema.py new file mode 100644 index 00000000..175ecac7 --- /dev/null +++ b/relecov_tools/build_schema.py @@ -0,0 +1,673 @@ +#!/usr/bin/env python +import logging +import relecov_tools.json_validation +import rich.console +import pandas as pd +import os +import sys +import json +import difflib +import inspect + +import relecov_tools.utils +import relecov_tools.assets.schema_utils.jsonschema_draft +import relecov_tools.assets.schema_utils.metadatalab_template +from relecov_tools.config_json import ConfigJson + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class SchemaBuilder: + def __init__( + self, + excel_file_path=None, + base_schema_path=None, + draft_version=None, + show_diff=None, + out_dir=None, + ): + """ + Initialize the SchemaBuilder class. This class generates a JSON Schema file based on the provided draft version. + It reads the database definition from an Excel file and allows customization of the schema generation process. + """ + self.excel_file_path = excel_file_path + # Validate input variables + if not self.excel_file_path or not os.path.isfile(self.excel_file_path): + raise ValueError("A valid Excel file path must be provided.") + if not self.excel_file_path.endswith(".xlsx"): + raise ValueError("The Excel file must have a .xlsx extension.") + + # Validate output folder creation + if not out_dir or not os.path.isfile(out_dir): + self.output_folder = relecov_tools.utils.prompt_create_outdir(None, out_dir) + else: + self.output_folder = out_dir + + # Validate show diff option + if not show_diff: + self.show_diff = None + else: + self.show_diff = True + + # Validate json schema draft version + self.draft_version = ( + relecov_tools.assets.schema_utils.jsonschema_draft.check_valid_version( + draft_version + ) + ) + + # Validate base schema + if base_schema_path is not None: + if relecov_tools.utils.file_exists(base_schema_path): + self.base_schema_path = base_schema_path + else: + stderr.print( + f"[Error]Defined base schema file not found: {base_schema_path}. Exiting..." + ) + sys.exit(1) + else: + try: + config_json = ConfigJson() + relecov_schema = config_json.get_topic_data( + "json_schemas", "relecov_schema" + ) + try: + self.base_schema_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "schema", + relecov_schema, + ) + if not relecov_tools.utils.file_exists(self.base_schema_path): + stderr.print( + "[Error]Fatal error. Relecov schema were not found in current relecov-tools installation. Make sure relecov-tools command is functioning. Exiting..." + ) + sys.exit(1) + stderr.print( + "[green]RELECOV schema successfully found in the configuration." + ) + except FileNotFoundError as fnf_error: + stderr.print(f"[red]Configuration file not found: {fnf_error}") + sys.exit(1) + except KeyError as key_error: + stderr.print(f"[orange]Configuration key error: {key_error}") + sys.exit(1) + + def validate_database_definition(self, json_data): + """Validate the mandatory features of each property in json_data. + Validate the mandatory features of each property in json_data. + + Args: + json_data (dict): The JSON data representing the database definition. + + Returns: + dict or None: A dictionary with properties that are missing mandatory/invalid features, + or None if all mandatory features are present + """ + # Check mandatory key features to build a json schema + notvalid_properties = {} + mandatory_features = [ + "enum", + "examples", + "ontology_id", + "type", + "description", + "classification", + "label_name", + "fill_mode", + "required (Y/N)", + "complex_field (Y/N)", + ] + # Iterate over each property in json_data + for j_key, j_value in json_data.items(): + missing_features = [] + for feature in mandatory_features: + if feature not in j_value: + missing_features.append(feature) + + if missing_features: + notvalid_properties[j_key] = missing_features + + # Summarize validation + if notvalid_properties: + return notvalid_properties + else: + return None + + def read_database_definition(self, sheet_id="main"): + """Reads the database definition from an Excel sheet and converts it into JSON format. + + Args: + sheet_id (str): The sheet name or ID in the Excel file to read from. Defaults to "main". + + Returns: + json_data (dict): The JSON data representing the database definition. + """ + caller_method = inspect.stack()[1][3] + # Read excel file + df = pd.read_excel( + self.excel_file_path, + sheet_name=sheet_id, + na_values=["nan", "N/A", "NA", ""], + ) + # Convert database to json format + json_data = {} + for row in df.itertuples(index=False): + property_name = row[0] + values = row[1:] + json_data[property_name] = dict(zip(df.columns[1:], values)) + + # Check json is not empty + if len(json_data) == 0: + stderr.print( + f"{caller_method}{sheet_id}) [red]No data found in xlsx database" + ) + sys.exit(1) + + # Perform validation of database content + validation_out = self.validate_database_definition(json_data) + + if validation_out: + stderr.print( + f"({caller_method}:{sheet_id}) [red]Validation of database content falied. Missing mandatory features in: {validation_out}" + ) + sys.exit(1) + else: + stderr.print( + f"({caller_method}:{sheet_id}) [green]Validation of database content passed." + ) + return json_data + + def create_schema_draft_template(self): + """ + Create a JSON Schema draft template based on the draft version. + Available drafts: [2020-12] + + Returns: + draft_template(dict): The JSON Schema draft template. + """ + draft_template = ( + relecov_tools.assets.schema_utils.jsonschema_draft.create_draft( + self.draft_version, True + ) + ) + return draft_template + + def standard_jsonschema_object(seschemalf, data_dict, target_key): + """ + Create a standard JSON Schema object for a given key in the data dictionary. + + Args: + data_dict (dict): The data dictionary containing the properties. + target_key (str): The key for which to create the JSON Schema object. + + Returns: + json_dict (dict): The JSON Schema object for the target key. + """ + # For enum and examples, wrap the value in a list + json_dict = {} + + # Function to handle NaN values + def handle_nan(value): + if pd.isna(value) or value in ["nan", "NaN", "None", "none"]: + return "" + return str(value) + + if target_key in ["enum", "examples"]: + value = handle_nan(data_dict.get(target_key, "")) + # if no value, json key won't be necessary, then avoid adding it + if len(value) > 0: + if target_key == "enum": + json_dict[target_key] = value.split(", ") + elif target_key == "examples": + json_dict[target_key] = [value] + elif target_key == "description": + json_dict[target_key] = handle_nan(data_dict.get(target_key, "")) + else: + json_dict[target_key] = handle_nan(data_dict.get(target_key, "")) + return json_dict + + def complex_jsonschema_object(self, property_id, features_dict): + """ + Create a complex (nested) JSON Schema object for a given property ID. + + Args: + property_id (str): The ID of the property for which to create the JSON Schema object. + features_dict (dict): A dictionary mapping database features to JSON Schema features. + + Returns: + json_dict (dict): The complex JSON Schema object. + """ + json_dict = {"type": "object", "properties": {}} + + # Read tab-dedicated sheet in excell database + try: + complex_json_data = self.read_database_definition(sheet_id=property_id) + except ValueError as e: + stderr.print(f"[yellow]{e}") + return None + + # Add sub property items + for sub_property_id, _ in complex_json_data.items(): + json_dict["properties"][sub_property_id] = {} + complex_json_feature = {} + for db_feature_key, json_key in features_dict.items(): + if json_key == "required": + continue + feature_schema = self.standard_jsonschema_object( + complex_json_data[sub_property_id], db_feature_key + ) + if feature_schema: + complex_json_feature[json_key] = feature_schema[db_feature_key] + json_dict["properties"][sub_property_id] = complex_json_feature + + return json_dict + + def build_new_schema(self, json_data, schema_draft): + """ + Build a new JSON Schema based on the provided JSON data and draft template. + + Parameters: + json_data (dict): The JSON data representing the database definition. + schema_template (dict): The JSON Schema draft template. + + Returns: + schema_draft (dict): The newly created JSON Schema. + """ + try: + # List of properties to check in the features dictionary (it maps values between database features and json schema features): + # key[db_feature_key]: value[schema_feature_key] + features_to_check = { + "type": "type", + "enum": "enum", + "examples": "examples", + "ontology_id": "ontology", + "description": "description", + "classification": "classification", + "label_name": "label", + "fill_mode": "fill_mode", + "required (Y/N)": "required", + } + required_property_unique = [] + + # Read property_ids in the database. + # Perform checks and create (for each property) feature object like: + # {'example':'A', 'ontology': 'B'...}. + # Finally this objet will be written to the draft schema. + for property_id, db_features_dic in json_data.items(): + schema_property = {} + required_property = {} + + # Parse property_ids that needs to be incorporated as complex fields in json_schema + if json_data[property_id].get("complex_field (Y/N)") == "Y": + complex_json_feature = self.complex_jsonschema_object( + property_id, features_to_check + ) + if complex_json_feature: + schema_property["type"] = "array" + schema_property["items"] = complex_json_feature + schema_property["additionalProperties"] = False + schema_property["required"] = [ + key for key in complex_json_feature["properties"].keys() + ] + # For those that follows standard format, add them to json schema as well. + else: + for db_feature_key, schema_feature_key in features_to_check.items(): + # Verifiy that db_feature_key is present in the database (processed excel (aka 'json_data')) + if db_feature_key not in db_features_dic: + stderr.print( + f"[INFO] Feature {db_feature_key} is not present in database ({self.excel_file_path})" + ) + continue + # Record the required value for each property + if ( + "required" in db_feature_key + or "required" == schema_feature_key + ): + is_required = str(db_features_dic[db_feature_key]) + if is_required != "nan": + required_property[property_id] = is_required + else: + std_json_feature = self.standard_jsonschema_object( + db_features_dic, db_feature_key + ) + if std_json_feature: + schema_property[schema_feature_key] = std_json_feature[ + db_feature_key + ] + else: + continue + # Finally, send schema_property object to the new json schema draft. + schema_draft["properties"][property_id] = schema_property + + # Add to schema draft the recorded porperty_ids. + for key, values in required_property.items(): + if values == "Y": + required_property_unique.append(key) + schema_draft["required"] = required_property_unique + + # Return new schema + return schema_draft + + except Exception as e: + stderr.print(f"[red]Error building schema: {str(e)}") + raise + + def verify_schema(self, schema): + """ + Verify that the given schema adheres to the JSON Schema specification for the specified draft version. + + Args: + schema (dict): The JSON Schema to be verified. + + Raises: + ValueError: If the schema does not conform to the JSON Schema specification. + """ + relecov_tools.assets.schema_utils.jsonschema_draft.check_schema_draft( + schema, self.draft_version + ) + + def get_schema_diff(self, base_schema, new_schema): + """ + Print the differences between the base schema and the newly generated schema. + + Args: + base_schema (dict): The base JSON Schema to compare against. + new_schema (dict): The newly generated JSON Schema to compare. + + Returns: + bool: True if differences are found, False otherwise. + """ + # Set diff input + base_schema_lines = json.dumps(base_schema, indent=4).splitlines() + new_schema_lines = json.dumps(new_schema, indent=4).splitlines() + + # Get diff lines + diff_lines = list( + difflib.unified_diff( + base_schema_lines, + new_schema_lines, + fromfile="base_schema.json", + tofile="new_schema.json", + ) + ) + + if not diff_lines: + stderr.print( + "[yellow]No differencess were found between already installed and new generated schema. Exiting. No changes made" + ) + return None + else: + stderr.print( + "[yellow]Differences found between the existing schema and the newly generated schema." + ) + return self.print_save_schema_diff(diff_lines) + + def print_save_schema_diff(self, diff_lines=None): + # Set user's choices + choices = ["Print to sandard output (stdout)", "Save to file", "Both"] + diff_output_choice = relecov_tools.utils.prompt_selection( + "How would you like to print the diff between schemes?:", choices + ) + if diff_output_choice in ["Print to sandard output (stdout)", "Both"]: + for line in diff_lines: + print(line) + return True + if diff_output_choice in ["Save to file", "Both"]: + diff_filepath = os.path.join( + os.path.realpath(self.output_folder) + "/build_schema_diff.txt" + ) + with open(diff_filepath, "w") as diff_file: + diff_file.write("\n".join(diff_lines)) + stderr.print(f"[green]Schema diff file saved to {diff_filepath}") + return True + + # FIXME: Add version tag to file name + def save_new_schema(self, json_data): + """ + Save the generated JSON Schema to the output folder. + + Args: + json_data (dict): The JSON Schema to be saved. + + Returns: + bool: True if the schema was successfully saved, False otherwise. + """ + try: + path_to_save = self.output_folder + "/relecov_schema.json" + with open(path_to_save, "w") as schema_file: + json.dump(json_data, schema_file, ensure_ascii=False, indent=4) + stderr.print(f"[green]New JSON schema saved to: {path_to_save} ") + return True + except PermissionError as perm_error: + stderr.print(f"[red]Permission error: {perm_error}") + except IOError as io_error: + stderr.print(f"[red]I/O error: {io_error}") + except Exception as e: + stderr.print(f"[red]An unexpected error occurred: {str(e)}") + return False + + # FIXME: overview-tab - FIX first column values + # FIXME: overview-tab - Still need to add the column that maps to tab metadatalab + def create_metadatalab_excel(self, json_schema): + """ + Generate an Excel template file for Metadata LAB with three tabs: Overview, Metadata LAB, and Data Validation. + + Args: + json_schema (dict): The JSON Schema from which the Excel template is generated. It should include properties and required fields. + + Returns: + None: if any error occurs during the process. + """ + try: + # Set up metadatalab configuration + out_file = os.path.join( + self.output_folder, "metadatalab_template" + ".xlsx" + ) + required_classification = [ + "Database Identifiers", + "Sample collection and processing", + "Host information", + "Sequencing", + "Pathogen Diagnostic testing", + "Contributor Acknowledgement", + ] + required_properties = json_schema.get("required") + schema_properties = json_schema.get("properties") + + # Read json schema properties and convert it into pandas df + try: + schema_properties_flatten = relecov_tools.assets.schema_utils.metadatalab_template.schema_to_flatten_json( + schema_properties + ) + df = relecov_tools.assets.schema_utils.metadatalab_template.schema_properties_to_df( + schema_properties_flatten + ) + df = df[df["classification"].isin(required_classification)] + df["required"] = df["property_id"].apply( + lambda x: "Y" if x in required_properties else "N" + ) + except Exception as e: + stderr.print(f"Error processing schema properties: {e}") + return None + + # Overview sheet + try: + overview_header = [ + "Label name", + "Description", + "Group", + "Mandatory (Y/N)", + "Example", + "METADATA_LAB COLUMN", + ] + df_overview = pd.DataFrame( + columns=[col_name for col_name in overview_header] + ) + df_overview["Label name"] = df["label"] + df_overview["Description"] = df["description"] + df_overview["Group"] = df["classification"] + df_overview["Mandatory (Y/N)"] = df["required"] + df_overview["Example"] = df["examples"].apply( + lambda x: x[0] if isinstance(x, list) else x + ) + except Exception as e: + stderr.print(f"Error creating overview sheet: {e}") + return None + + # MetadataLab sheet + try: + metadatalab_header = ["EJEMPLOS", "DESCRIPCIÓN", "CAMPO"] + df_metadata = pd.DataFrame( + columns=[col_name for col_name in metadatalab_header] + ) + df_metadata["EJEMPLOS"] = df["examples"].apply( + lambda x: x[0] if isinstance(x, list) else x + ) + df_metadata["DESCRIPCIÓN"] = df["description"] + df_metadata["CAMPO"] = df["label"] + df_metadata = df_metadata.transpose() + except Exception as e: + stderr.print(f"[red]Error creating MetadataLab sheet: {e}") + return None + + # DataValidation sheet + try: + datavalidation_header = ["EJEMPLOS", "DESCRIPCIÓN", "CAMPO"] + df_hasenum = df[(pd.notnull(df.enum))] + df_validation = pd.DataFrame( + columns=[col_name for col_name in datavalidation_header] + ) + df_validation["tmp_property"] = df_hasenum["property_id"] + df_validation["EJEMPLOS"] = df_hasenum["examples"].apply( + lambda x: x[0] if isinstance(x, list) else x + ) + df_validation["DESCRIPCIÓN"] = df_hasenum["description"] + df_validation["CAMPO"] = df_hasenum["label"] + except Exception as e: + stderr.print(f"[red]Error creating DataValidation sheet: {e}") + return None + + try: + # Since enums have different lengths we need further processing. + # Convert df into dict to perform data manipulation. + enum_dict = {property: [] for property in df_hasenum["property_id"]} + enum_maxitems = 0 + # Populate the dictionary with flattened lists + for key in enum_dict.keys(): + enum_values = df_hasenum[df_hasenum["property_id"] == key][ + "enum" + ].values + if enum_values.size > 0: + enum_list = enum_values[0] # Extract the list + enum_dict[key] = enum_list # Assign the list to the dictionary + if enum_maxitems < len(enum_list): + enum_maxitems = len(enum_list) + else: + enum_dict[key] = [] + + # Reshape list dimensions based on enum length. + for key in enum_dict.keys(): + if len(enum_dict[key]) < enum_maxitems: + num_nas = enum_maxitems - len(enum_dict[key]) + for _ in range(num_nas): + enum_dict[key].append("") + + new_df = pd.DataFrame(enum_dict) + new_index = range(len(new_df.columns)) + new_df.reindex(columns=new_index) + + valid_index = df_validation["tmp_property"].values + valid_transposed = df_validation.transpose() + valid_transposed.columns = valid_index + + frames = [valid_transposed, new_df] + df_validation = pd.concat(frames) + df_validation = df_validation.drop(index=["tmp_property"]) + except Exception as e: + stderr.print(f"[red]Error processing enums and combining data: {e}") + return None + + # WRITE EXCEL + try: + writer = pd.ExcelWriter(out_file, engine="xlsxwriter") + relecov_tools.assets.schema_utils.metadatalab_template.excel_formater( + df_overview, + writer, + "OVERVIEW", + out_file, + have_index=False, + have_header=False, + ) + relecov_tools.assets.schema_utils.metadatalab_template.excel_formater( + df_metadata, + writer, + "METADATA_LAB", + out_file, + have_index=True, + have_header=False, + ) + relecov_tools.assets.schema_utils.metadatalab_template.excel_formater( + df_validation, + writer, + "DATA_VALIDATION", + out_file, + have_index=True, + have_header=False, + ) + writer.close() + stderr.print( + f"[green]Metadata lab template successfuly created in: {out_file}" + ) + except Exception as e: + stderr.print(f"[red]Error writing to Excel: {e}") + return None + except Exception as e: + stderr.print(f"[red]Error in create_metadatalab_excel: {e}") + return None + + def handle_build_schema(self): + # Load xlsx database and convert into json format + stderr.print("[white]Start reading xlsx database") + database_dic = self.read_database_definition() + + # Verify current schema used by relecov-tools: + base_schema_json = relecov_tools.utils.read_json_file(self.base_schema_path) + if not base_schema_json: + stderr.print("[red]Couldn't find relecov base schema. Exiting...)") + sys.exit(1) + + # Create schema draft template (leave empty to be prompted to list of available schema versions) + schema_draft_template = self.create_schema_draft_template() + + # build new schema draft based on database definition. + new_schema_json = self.build_new_schema(database_dic, schema_draft_template) + + # Verify new schema follows json schema specification rules. + self.verify_schema(new_schema_json) + + # Compare base vs new schema and saves new JSON schema + stderr.print(self.show_diff) + if self.show_diff: + schema_diff = self.get_schema_diff(base_schema_json, new_schema_json) + else: + schema_diff = None + + if schema_diff: + self.save_new_schema(new_schema_json) + else: + stderr.print( + f"[green]No changes found against base schema ({self.base_schema_path})." + ) + + # Create metadata lab template + promp_answ = relecov_tools.utils.prompt_yn_question( + "Do you want to create a metadata lab file?:" + ) + if promp_answ: + self.create_metadatalab_excel(new_schema_json) diff --git a/relecov_tools/conf/anatomical_material_collection_method.json b/relecov_tools/conf/anatomical_material_collection_method.json new file mode 100755 index 00000000..0ce2c125 --- /dev/null +++ b/relecov_tools/conf/anatomical_material_collection_method.json @@ -0,0 +1,1016 @@ +{ + "Lower respiratory tract Aspiration": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Aspiration": { + "anatomical_part": "Bronchus", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Aspiration": { + "anatomical_part": "Lung", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Aspiration": { + "anatomical_part": "Bronchiole", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Aspiration": { + "anatomical_part": "Alveolar sac", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Aspiration": { + "anatomical_part": "Pleural sac", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Aspiration": { + "anatomical_part": "Pleural cavity", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Aspiration": { + "anatomical_part": "Trachea", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Aspiration": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Aspiration": { + "anatomical_part": "Anterior Nares", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Aspiration": { + "anatomical_part": "Esophagus", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Aspiration": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Aspiration": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Aspiration": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Aspiration": { + "anatomical_part": "Nasopharynx", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Aspiration": { + "anatomical_part": "Oropharynx", + "collection_method": "Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lower respiratory tract Vacuum Aspiration": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Vacuum Aspiration": { + "anatomical_part": "Bronchus", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Vacuum Aspiration": { + "anatomical_part": "Lung", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Vacuum Aspiration": { + "anatomical_part": "Bronchiole", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Vacuum Aspiration": { + "anatomical_part": "Alveolar sac", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Vacuum Aspiration": { + "anatomical_part": "Pleural sac", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Vacuum Aspiration": { + "anatomical_part": "Pleural cavity", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Vacuum Aspiration": { + "anatomical_part": "Trachea", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Vacuum Aspiration": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Vacuum Aspiration": { + "anatomical_part": "Anterior Nares", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Vacuum Aspiration": { + "anatomical_part": "Esophagus", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Vacuum Aspiration": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Vacuum Aspiration": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Vacuum Aspiration": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Vacuum Aspiration": { + "anatomical_part": "Nasopharynx", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Vacuum Aspiration": { + "anatomical_part": "Oropharynx", + "collection_method": "Vacuum Aspiration", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lower respiratory tract Biopsy": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Biopsy": { + "anatomical_part": "Bronchus", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Biopsy": { + "anatomical_part": "Lung", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Biopsy": { + "anatomical_part": "Bronchiole", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Biopsy": { + "anatomical_part": "Alveolar sac", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Biopsy": { + "anatomical_part": "Pleural sac", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Biopsy": { + "anatomical_part": "Pleural cavity", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Biopsy": { + "anatomical_part": "Trachea", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Rectum Biopsy": { + "anatomical_part": "Rectum", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Skin Biopsy": { + "anatomical_part": "Skin", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Stomach Biopsy": { + "anatomical_part": "Stomach", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Biopsy": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Biopsy": { + "anatomical_part": "Anterior Nares", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Biopsy": { + "anatomical_part": "Esophagus", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Biopsy": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Biopsy": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Biopsy": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Duodenum Biopsy": { + "anatomical_part": "Duodenum", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Biopsy": { + "anatomical_part": "Nasopharynx", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Biopsy": { + "anatomical_part": "Oropharynx", + "collection_method": "Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lower respiratory tract Needle Biopsy": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Needle Biopsy": { + "anatomical_part": "Bronchus", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Needle Biopsy": { + "anatomical_part": "Lung", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Needle Biopsy": { + "anatomical_part": "Bronchiole", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Needle Biopsy": { + "anatomical_part": "Alveolar sac", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Needle Biopsy": { + "anatomical_part": "Pleural sac", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Needle Biopsy": { + "anatomical_part": "Pleural cavity", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Needle Biopsy": { + "anatomical_part": "Trachea", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Rectum Needle Biopsy": { + "anatomical_part": "Rectum", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Skin Needle Biopsy": { + "anatomical_part": "Skin", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Stomach Needle Biopsy": { + "anatomical_part": "Stomach", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Needle Biopsy": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Needle Biopsy": { + "anatomical_part": "Anterior Nares", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Needle Biopsy": { + "anatomical_part": "Esophagus", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Needle Biopsy": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Needle Biopsy": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Needle Biopsy": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Duodenum Needle Biopsy": { + "anatomical_part": "Duodenum", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Needle Biopsy": { + "anatomical_part": "Nasopharynx", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Needle Biopsy": { + "anatomical_part": "Oropharynx", + "collection_method": "Needle Biopsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lower respiratory tract Lavage": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Lavage": { + "anatomical_part": "Bronchus", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Lavage": { + "anatomical_part": "Lung", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Lavage": { + "anatomical_part": "Pleural sac", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Lavage": { + "anatomical_part": "Pleural cavity", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Lavage": { + "anatomical_part": "Trachea", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Lavage": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Lavage": { + "anatomical_part": "Anterior Nares", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Lavage": { + "anatomical_part": "Esophagus", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Lavage": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Lavage": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Lavage": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Lavage": { + "anatomical_part": "Nasopharynx", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Lavage": { + "anatomical_part": "Oropharynx", + "collection_method": "Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lower respiratory tract Necropsy": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Necropsy": { + "anatomical_part": "Bronchus", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Necropsy": { + "anatomical_part": "Lung", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Necropsy": { + "anatomical_part": "Bronchiole", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Necropsy": { + "anatomical_part": "Alveolar sac", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Necropsy": { + "anatomical_part": "Pleural sac", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Necropsy": { + "anatomical_part": "Pleural cavity", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Necropsy": { + "anatomical_part": "Trachea", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Rectum Necropsy": { + "anatomical_part": "Rectum", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Skin Necropsy": { + "anatomical_part": "Skin", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Stomach Necropsy": { + "anatomical_part": "Stomach", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Necropsy": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Necropsy": { + "anatomical_part": "Anterior Nares", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Necropsy": { + "anatomical_part": "Esophagus", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Necropsy": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Necropsy": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Necropsy": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Duodenum Necropsy": { + "anatomical_part": "Duodenum", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Necropsy": { + "anatomical_part": "Nasopharynx", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Necropsy": { + "anatomical_part": "Oropharynx", + "collection_method": "Necropsy", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lower respiratory tract Rinsing": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Rinsing": { + "anatomical_part": "Bronchus", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Rinsing": { + "anatomical_part": "Lung", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Rinsing": { + "anatomical_part": "Bronchiole", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Rinsing": { + "anatomical_part": "Alveolar sac", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Rinsing": { + "anatomical_part": "Pleural sac", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Rinsing": { + "anatomical_part": "Pleural cavity", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Rinsing": { + "anatomical_part": "Trachea", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Rinsing": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Rinsing": { + "anatomical_part": "Anterior Nares", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Rinsing": { + "anatomical_part": "Esophagus", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Rinsing": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Rinsing": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Rinsing": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Rinsing": { + "anatomical_part": "Nasopharynx", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Rinsing": { + "anatomical_part": "Oropharynx", + "collection_method": "Rinsing", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Extract": { + "anatomical_part": "Bronchiole", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Extract": { + "anatomical_part": "Alveolar sac", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Extract": { + "anatomical_part": "Pleural sac", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Extract": { + "anatomical_part": "Pleural cavity", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Extract": { + "anatomical_part": "Trachea", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Extract": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Extract": { + "anatomical_part": "Anterior Nares", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Extract": { + "anatomical_part": "Esophagus", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Extract": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Extract": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Extract": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Extract": { + "anatomical_part": "Nasopharynx", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Extract": { + "anatomical_part": "Oropharynx", + "collection_method": "Extract", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pharynx Swab": { + "anatomical_part": "Pharynx", + "collection_method": "Swab", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx swab": { + "anatomical_part": "Nasopharynx", + "collection_method": "Swab", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Swab": { + "anatomical_part": "Oropharynx", + "collection_method": "Swab", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lower respiratory tract Wash": { + "anatomical_part": "Lower respiratory tract", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchus Wash": { + "anatomical_part": "Bronchus", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Lung Wash": { + "anatomical_part": "Lung", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchiole Wash": { + "anatomical_part": "Bronchiole", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Alveolar sac Wash": { + "anatomical_part": "Alveolar sac", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural sac Wash": { + "anatomical_part": "Pleural sac", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Pleural cavity Wash": { + "anatomical_part": "Pleural cavity", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Trachea Wash": { + "anatomical_part": "Trachea", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Upper respiratory tract Wash": { + "anatomical_part": "Upper respiratory tract", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Anterior Nares Wash": { + "anatomical_part": "Anterior Nares", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Esophagus Wash": { + "anatomical_part": "Esophagus", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Ethmoid sinus Wash": { + "anatomical_part": "Ethmoid sinus", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasal Cavity Wash": { + "anatomical_part": "Nasal Cavity", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Middle Nasal Turbinate Wash": { + "anatomical_part": "Middle Nasal Turbinate", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Nasopharynx Wash": { + "anatomical_part": "Nasopharynx", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Oropharynx Wash": { + "anatomical_part": "Oropharynx", + "collection_method": "Wash", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Bronchoalveolar Lavage": { + "anatomical_part": "Not Applicable", + "collection_method": "Bronchoalveolar Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Gastric Lavage": { + "anatomical_part": "Not Applicable", + "collection_method": "Gastric Lavage", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Feces": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Feces", + "anatomical_material": "Not Applicable" + }, + "Mucus": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Mucus", + "anatomical_material": "Not Applicable" + }, + "Sputum": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Sputum", + "anatomical_material": "Not Applicable" + }, + "Sweat": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Sweat", + "anatomical_material": "Not Applicable" + }, + "Tear": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Tear", + "anatomical_material": "Not Applicable" + }, + "Urine": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Urine", + "anatomical_material": "Not Applicable" + }, + "Blood": { + "anatomical_part": "Blood", + "collection_method": "Not Applicable", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Cerebrospinal Fluid": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Not Applicable", + "anatomical_material": "Cerebrospinal Fluid" + }, + "Saliva": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Not Applicable", + "anatomical_material": "Saliva" + }, + "Tissue": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Not Applicable", + "anatomical_material": "Tissue" + }, + "Scraping": { + "anatomical_part": "Not Applicable", + "collection_method": "Scraping", + "body_product": "Not Applicable", + "anatomical_material": "Not Applicable" + }, + "Placenta": { + "anatomical_part": "Not Applicable", + "collection_method": "Not Applicable", + "body_product": "Not Applicable", + "anatomical_material": "Placenta" + }, + "Not Provided": { + "anatomical_part": "Not Provided", + "collection_method": "Not Provided", + "body_product": "Not Provided", + "anatomical_material": "Not Provided" + } +} diff --git a/relecov_tools/conf/bioinfo_config.json b/relecov_tools/conf/bioinfo_config.json new file mode 100644 index 00000000..56fe98f0 --- /dev/null +++ b/relecov_tools/conf/bioinfo_config.json @@ -0,0 +1,130 @@ +{ + "viralrecon": { + "mapping_stats": { + "fn": "mapping_illumina(?:_\\d{8})?\\.tab", + "sample_col_idx": 5, + "header_row_idx": 1, + "required": true, + "function": null, + "content": { + "analysis_date": "analysis_date", + "depth_of_coverage_value": "medianDPcoveragevirus", + "number_of_variants_in_consensus": "Variantsinconsensusx10", + "number_of_variants_with_effect": "MissenseVariants", + "per_genome_greater_10x": "Coverage>10x(%)", + "per_Ns": "%Ns10x", + "per_reads_host": "%readshost", + "per_reads_virus": "%readsvirus", + "per_unmapped": "%unmapedreads", + "qc_filtered": "totalreads", + "reference_genome_accession": "Virussequence", + "read_length": "read_length" + } + }, + "mapping_pangolin": { + "fn": ".pangolin(?:.*?)?\\.csv$", + "header_row_idx": 1, + "required": false, + "function": "handle_pangolin_data", + "content": { + "variant_name": "scorpio_call", + "lineage_name": "lineage", + "lineage_algorithm_software_version": "version", + "lineage_analysis_software_version": "pangolin_version", + "lineage_analysis_scorpio_version": "scorpio_version", + "lineage_analysis_constellation_version": "constellation_version", + "lineage_analysis_date":"lineage_analysis_date" + } + }, + "variants_long_table": { + "fn": "variants_long_table.csv", + "sample_col_idx": 1, + "header_row_idx": 1, + "required": true, + "function": "parse_long_table", + "content": { + "sample" : "SAMPLE", + "chromosome": "CHROM", + "pos": "POS", + "alt": "ALT", + "ref": "REF", + "Filter": "FILTER", + "dp": "DP", + "ref_dp": "REF_DP", + "alt_dp": "ALT_DP", + "af": "AF", + "effect": "EFFECT", + "hgvs_c": "HGVS_C", + "hgvs_p": "HGVS_P", + "hgvs_p_1_letter": "HGVS_P_1LETTER", + "caller" : "CALLER", + "lineage" : "LINEAGE", + "gene" : "GENE" + } + }, + "mapping_consensus": { + "fn": ".consensus.fa", + "required": false, + "function": "handle_consensus_fasta", + "content": { + "consensus_sequence_name" : "sequence_name", + "consensus_genome_length" : "genome_length", + "consensus_sequence_filename" : "sequence_filename", + "consensus_sequence_filepath" : "sequence_filepath", + "consensus_sequence_md5" : "sequence_md5", + "number_of_base_pairs_sequenced" : "number_of_base_pairs_sequenced" + } + }, + "summary_mqc": { + "fn": "summary_variants_metrics_mqc.csv", + "sample_col_idx": 1, + "header_row_idx": 1, + "required": true, + "function": null, + "content": { + "ns_per_100_kbp": "# Ns per 100kb consensus" + } + }, + "workflow_summary": { + "fn": "multiqc_report.html", + "required": true, + "function": null, + "content": { + "software_version": { + "bioinformatics_protocol_software_version": "nf-core/viralrecon", + "consensus_sequence_software_version": "bcftools", + "dehosting_method_software_version": "kraken2", + "mapping_software_version":"bowtie2", + "preprocessing_software_version":"fastp", + "variant_calling_software_version":"ivar" + }, + "software_name": { + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "consensus_sequence_software_name": "bcftools", + "dehosting_method_software_name": "kraken2", + "mapping_software_name":"bowtie2", + "preprocessing_software_name":"fastp", + "variant_calling_software_name":"ivar" + } + } + }, + "fixed_values": { + "assembly": "Not Provided [GENEPIO:0001668]", + "assembly_params": "Not Provided [GENEPIO:0001668]", + "commercial_open_source_both": "Open Source", + "consensus_params": "-p vcf -f", + "depth_of_coverage_threshold": ">10x", + "if_assembly_other": "Not Provided [GENEPIO:0001668]", + "if_bioinformatic_protocol_is_other_specify": "Not Provided [GENEPIO:0001668]", + "if_consensus_other": "Not Provided [GENEPIO:0001668]", + "if_lineage_identification_other": "Not Provided [GENEPIO:0001668]", + "if_mapping_other": "Not Provided [GENEPIO:0001668]", + "if_preprocessing_other": "Not Provided [GENEPIO:0001668]", + "lineage_analysis_software_name": "pangolin", + "mapping_params": "--seed 1", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m10", + "lineage_analysis_date": "Not Provided [GENEPIO:0001668]" + } + } +} diff --git a/relecov_tools/conf/configuration.json b/relecov_tools/conf/configuration.json new file mode 100755 index 00000000..2c4c20f7 --- /dev/null +++ b/relecov_tools/conf/configuration.json @@ -0,0 +1,451 @@ +{ + "lab_metadata": { + "fixed_fields": { + "host_disease": "COVID-19", + "tax_id": "2697049", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "study_type": "Whole Genome Sequencing", + "collector_name": "Not Provided" + }, + "metadata_lab_heading": [ + "Public Health sample id (SIVIES)", + "Sample ID given by originating laboratory", + "Sample ID given by the submitting laboratory", + "Sample ID given in the microbiology lab", + "Sample ID given if multiple rna-extraction or passages", + "Sample ID given for sequencing", + "ENA Sample ID", + "GISAID Virus Name", + "GISAID id", + "Originating Laboratory", + "Submitting Institution", + "Sequencing Institution", + "Sample Collection Date", + "Sample Received Date", + "Purpose of sampling", + "Biological Sample Storage Condition", + "Specimen source", + "Environmental Material", + "Environmental System", + "Collection Device", + "Host", + "Host Age", + "Host Gender", + "Sequencing Date", + "Nucleic acid extraction protocol", + "Commercial All-in-one library kit", + "Library Preparation Kit", + "Enrichment Protocol", + "If Enrichment Protocol Is Other, Specify", + "Enrichment panel/assay", + "If Enrichment panel/assay Is Other, Specify", + "Enrichment panel/assay version", + "Number Of Samples In Run", + "Runid", + "Sequencing Instrument Model", + "Flowcell Kit", + "Source material", + "Capture method", + "Sequencing technique", + "Library Layout", + "Gene Name 1", + "Diagnostic Pcr Ct Value 1", + "Gene Name 2", + "Diagnostic Pcr Ct Value-2", + "Authors", + "Sequence file R1 fastq", + "Sequence file R2 fastq" + ], + "bioinfo_heading": [ + "Consensus sequence filename", + "VCF filename", + "Variant designation table filename", + "Bioinformatics protocol", + "If bioinformatics protocol Is Other, Specify", + "Bioinformatics protocol version", + "Commercial/Open-source/both", + "Preprocessing software", + "Preprocessing software version", + "If preprocessing Is Other, Specify", + "Preprocessing params", + "Mapping software", + "Mapping software version", + "If mapping Is Other, Specify", + "Mapping params", + "Assembly software", + "Assembly software version", + "If assembly Is Other, Specify", + "Assembly params", + "Variant calling software", + "Variant calling software version", + "If variant calling Is Other, Specify", + "Variant calling params", + "Consensus software", + "Consensus software version", + "If consensus Is Other, Specify", + "Consensus params", + "Clade/Type identification software", + "Clade/Type software version", + "If Clade/Type Is Other, Specify", + "Lineage identification software", + "Lineage software version", + "If lineage identification Is Other, Specify", + "Quality control metrics (sample discard criteria)" + ], + "lab_metadata_req_json": { + "laboratory_data": { + "file": "laboratory_address.json", + "map_field": "collecting_institution", + "adding_fields": [ + "collecting_institution_address", + "collecting_institution_email", + "geo_loc_state", + "geo_loc_region", + "geo_loc_city", + "geo_loc_country" + ] + }, + "geo_location_data": { + "file": "geo_loc_cities.json", + "map_field": "geo_loc_city", + "adding_fields": [ + "geo_loc_latitude", + "geo_loc_longitude" + ] + }, + "submitting_data": { + "file": "laboratory_address.json", + "map_field": "collecting_institution", + "adding_fields": [ + "submitting_institution", + "submitting_institution_address", + "submitting_institution_email" + ] + }, + "specimen_source_splitting": { + "file": "anatomical_material_collection_method.json", + "map_field": "specimen_source", + "adding_fields": [ + "anatomical_material", + "anatomical_part", + "body_product", + "collection_method" + ] + } + }, + "required_post_processing": { + "host_common_name": { + "Human": "host_scientific_name::Homo sapiens" + }, + "sequencing_instrument_model": { + "Illumina": "sequencing_instrument_platform::Illumina", + "PacBio": "sequencing_instrument_platform::PacBio", + "Ion Torrent": "sequencing_instrument_platform::Ion Torrent", + "Oxford Nanopore": "sequencing_instrument_platform::Oxford Nanopore" + } + }, + "required_copy_from_other_field": { + "isolate_sample_id": "sequencing_sample_id" + }, + "samples_json_fields": [ + "fastq_r1_md5", + "fastq_r2_md5", + "sequence_file_R1_fastq", + "sequence_file_R2_fastq", + "r1_fastq_filepath", + "r2_fastq_filepath" + ] + }, + "long_table_heading": [ + "SAMPLE", + "CHROM", + "POS", + "REF", + "ALT", + "FILTER", + "DP", + "REF_DP", + "ALT_DP", + "AF", + "GENE", + "EFFECT", + "HGVS_C", + "HGVS_P", + "HGVS_P_1LETTER", + "CALLER", + "LINEAGE" + ], + "long_table_parse_aux": { + "Chromosome": "CHROM", + "Variant": { + "pos": "POS", + "alt": "ALT", + "ref": "REF" + }, + "Filter": "FILTER", + "VariantInSample": { + "dp": "DP", + "ref_dp": "REF_DP", + "alt_dp": "ALT_DP", + "af": "AF" + }, + "Effect": "EFFECT", + "VariantAnnotation": { + "hgvs_c": "HGVS_C", + "hgvs_p": "HGVS_P", + "hgvs_p_1_letter": "HGVS_P_1LETTER" + } + }, + "gisaid_csv_headers": [ + "submitter", + "covv_virus_name", + "covv_type", + "covv_passage", + "covv_collection_date", + "covv_location", + "covv_add_location", + "covv_host", + "covv_add_host_info", + "covv_sampling_strategy", + "covv_gender", + "covv_patient_age", + "covv_patient_status", + "covv_specimen", + "covv_outbreak", + "covv_last_vaccinated", + "covv_treatment", + "covv_seq_technology", + "covv_assembly_method", + "covv_coverage", + "covv_orig_lab", + "covv_orig_lab_addr", + "covv_provider_sample_id", + "covv_subm_lab", + "covv_subm_lab_addr", + "covv_subm_sample_id", + "covv_authors" + ], + "json_schemas": { + "relecov_schema": "relecov_schema.json", + "ena_schema": "ena_schema.json", + "gisaid_schema": "gisaid_schema.json" + }, + "institution_mapping_file": { + "ISCIII": "ISCIII.json", + "HUGTiP": "HUGTiP.json" + }, + "sftp_handle": { + "sftp_connection": { + "sftp_server": "sftprelecov.isciii.es", + "sftp_port": "22" + }, + "metadata_processing": { + "header_flag": "CAMPO", + "excel_sheet": "METADATA_LAB" + }, + "abort_if_md5_mismatch": "False", + "platform_storage_folder": "/tmp/relecov", + "allowed_file_extensions": [ + ".fastq.gz", + ".fastq", + ".fq", + ".fq.gz", + ".fasta", + ".fasta.gz", + ".fa", + ".fa.gz", + "bam" + ], + "allowed_download_options": [ + "download_only", + "download_clean", + "delete_only" + ], + "skip_when_found": [ + "#", + "Hash", + "Path" + ] + }, + "GISAID_configuration": { + "submitter": "GISAID_ID" + }, + "upload_database": { + "platform":{ + "iskylims": { + "server_url": "http://relecov-iskylims.isciiides.es", + "api_url": "/wetlab/api/", + "store_samples": "create-sample", + "url_project_fields": "projects-fields", + "url_sample_fields": "sample-fields", + "param_sample_project": "project", + "project_name": "relecov", + "token": "" + }, + "relecov": { + "server_url": "http://relecov-platform.isciiides.es", + "api_url": "/api/", + "store_samples": "createSampleData", + "bioinfodata": "createBioinfoData", + "variantdata": "createVariantData", + "sftp_info": "sftpInfo", + "token": "" + } + }, + "iskylims_fixed_values": { + "patient_core": "", + "sample_project": "Relecov", + "only_recorded": "Yes", + "sample_location": "Not defined" + }, + "relecov_sample_metadata": [ + "authors", + "collecting_institution", + "collecting_lab_sample_id", + "ena_broker_name", + "ena_sample_accession", + "gisaid_accession_id", + "gisaid_virus_name", + "microbiology_lab_sample_id", + "r1_fastq_filepath", + "r2_fastq_filepath", + "schema_name", + "schema_version", + "sequencing_date", + "sequence_file_R1_md5", + "sequence_file_R2_md5", + "sequence_file_R1_fastq", + "sequence_file_R2_fastq", + "sequencing_sample_id", + "submitting_lab_sample_id" + ] + }, + "ENA_fields": { + "ENA_configuration": { + "study_alias": "RELECOV", + "design_description": "Design Description", + "experiment_title": "Project for ENA submission RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "study_id": "ERP137164", + "ena_broker_name": "Instituto de Salud Carlos III" + }, + "checklist": "ERC000033", + "templates_path": "", + "tool": { + "tool_name": "ena-upload-cli", + "tool_version": "0.5.3" + }, + "df_study_fields": [ + "study_alias", + "study_title", + "study_type", + "study_abstract" + ], + "df_sample_fields": [ + "sample_alias", + "sample_title", + "collection date", + "geographic location (country and/or sea)", + "sample_description", + "host common name", + "host scientific name", + "host sex", + "scientific_name", + "collector name", + "collecting institution", + "address", + "isolate", + "host subject id", + "host health state", + "authors", + "taxon_id" + ], + "df_run_fields": [ + "run_alias", + "experiment_alias", + "file_name", + "file_format", + "file_checksum", + "collecting institution" + ], + "df_experiment_fields": [ + "experiment_alias", + "experiment_title", + "sample_alias", + "study_alias", + "design_description", + "library_name", + "library_strategy", + "library_source", + "library_selection", + "library_layout", + "library_construction_protocol", + "insert_size", + "platform", + "instrument_model", + "collecting institution" + ], + "ena_fixed_fields": { + "broker_name": "Instituto de Salud Carlos III", + "file_format": "FASTQ", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_abstract": "RELECOV is a Spanish Network for genomics surveillance", + "insert_size": "0" + }, + "accession_fields": [ + "ena_study_accession", + "ena_sample_accession", + "ena_experiment_accession", + "ena_run_accession" + ], + "additional_formating": { + "sample_description": [ + "host_common_name", + "anatomical_part", + "collection_method" + ], + "design_description": [ + "library_layout", + "library_preparation_kit", + "library_selection", + "library_strategy" + ], + "r1_fastq_filepath": [ + "r1_fastq_filepath", + "sequence_file_R1_fastq" + ], + "r2_fastq_filepath": [ + "r2_fastq_filepath", + "sequence_file_R2_fastq" + ], + "experiment_alias": [ + "isolate_sample_id", + "sample_collection_date" + ], + "run_alias": [ + "isolate_sample_id", + "sample_collection_date" + ], + "experiment_title": [ + "sequencing_instrument_model", + "isolate_sample_id" + ], + "file_name": [ + "sequence_file_R1_fastq", + "sequence_file_R2_fastq" + ], + "file_checksum": [ + "fastq_r1_md5", + "fastq_r2_md5" + ] + } + }, + "launch_pipeline": { + "analysis_name": "RELECOV_icasas_C", + "analysis_folder": "ANALYSIS", + "sample_stored_folder": "RAW", + "sample_link_folder": "00-reads" + } +} diff --git a/relecov_tools/conf/geo_loc_cities.json b/relecov_tools/conf/geo_loc_cities.json new file mode 100755 index 00000000..535a7b28 --- /dev/null +++ b/relecov_tools/conf/geo_loc_cities.json @@ -0,0 +1,419 @@ +{ + "Madrid": { + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167" + }, + "Barcelona": { + "geo_loc_latitude": "41.3825", + "geo_loc_longitude": "2.1769" + }, + "Sevilla": { + "geo_loc_latitude": "37.3900", + "geo_loc_longitude": "-5.9900" + }, + "Malaga": { + "geo_loc_latitude": "36.7194", + "geo_loc_longitude": "-4.4200" + }, + "Valencia": { + "geo_loc_latitude": "39.4700", + "geo_loc_longitude": "-0.3764" + }, + "Zaragoza": { + "geo_loc_latitude": "41.6483", + "geo_loc_longitude": "-0.8830" + }, + "Palma de Mallorca": { + "geo_loc_latitude": "39.5667", + "geo_loc_longitude": "2.6500" + }, + "Murcia": { + "geo_loc_latitude": "37.9861", + "geo_loc_longitude": "-1.1303" + }, + "Las Palmas": { + "geo_loc_latitude": "28.1272", + "geo_loc_longitude": "-15.4314" + }, + "A Coruña": { + "geo_loc_latitude": "43.3713", + "geo_loc_longitude": "-8.4188" + }, + "Bilbao": { + "geo_loc_latitude": "43.2569", + "geo_loc_longitude": "-2.9236" + }, + "Alicante": { + "geo_loc_latitude": "38.3453", + "geo_loc_longitude": "-0.4831" + }, + "Cordoba": { + "geo_loc_latitude": "37.8845", + "geo_loc_longitude": "-4.7796" + }, + "Valladolid": { + "geo_loc_latitude": "41.6528", + "geo_loc_longitude": "-4.7236" + }, + "Vigo": { + "geo_loc_latitude": "42.2314", + "geo_loc_longitude": "-8.7124" + }, + "Gijon": { + "geo_loc_latitude": "43.5333", + "geo_loc_longitude": "-5.7000" + }, + "Vitoria": { + "geo_loc_latitude": "42.8500", + "geo_loc_longitude": "-2.6833" + }, + "Granada": { + "geo_loc_latitude": "37.1781", + "geo_loc_longitude": "-3.6008" + }, + "Asturias": { + "geo_loc_latitude": "43.3600", + "geo_loc_longitude": "-5.8450" + }, + "Santa Cruz de Tenerife": { + "geo_loc_latitude": "28.4667", + "geo_loc_longitude": "-16.2500" + }, + "Pamplona": { + "geo_loc_latitude": "42.8167", + "geo_loc_longitude": "-1.6500" + }, + "León": { + "geo_loc_latitude": "42.6056", + "geo_loc_longitude": "-5.5700" + }, + "Almería": { + "geo_loc_latitude": "36.8403", + "geo_loc_longitude": "-2.4681" + }, + "Logroño": { + "geo_loc_latitude": "42.4650", + "geo_loc_longitude": "-2.4456" + }, + "Donostia": { + "geo_loc_latitude": "43.3200", + "geo_loc_longitude": "-1.9800" + }, + "Albacete": { + "geo_loc_latitude": "38.9956", + "geo_loc_longitude": "-1.8558" + }, + "Castellón de la Plana": { + "geo_loc_latitude": "39.9831", + "geo_loc_longitude": "-0.0331" + }, + "Santander": { + "geo_loc_latitude": "43.4628", + "geo_loc_longitude": "-3.8050" + }, + "Burgos": { + "geo_loc_latitude": "42.3500", + "geo_loc_longitude": "-3.6822" + }, + "Guadalajara": { + "geo_loc_latitude": "40.6337", + "geo_loc_longitude": "-3.1674" + }, + "Badajoz": { + "geo_loc_latitude": "38.8803", + "geo_loc_longitude": "-6.9753" + }, + "Salamanca": { + "geo_loc_latitude": "40.9667", + "geo_loc_longitude": "-5.6639" + }, + "Huelva": { + "geo_loc_latitude": "37.2500", + "geo_loc_longitude": "-6.9500" + }, + "Ciudad de Melilla": { + "geo_loc_latitude": "35.2937", + "geo_loc_longitude": "-2.9383" + }, + "Tarragona": { + "geo_loc_latitude": "41.1187", + "geo_loc_longitude": "1.2453" + }, + "Cadiz": { + "geo_loc_latitude": "36.5350", + "geo_loc_longitude": "-6.2975" + }, + "Jaen": { + "geo_loc_latitude": "37.7667", + "geo_loc_longitude": "-3.7711" + }, + "Girona": { + "geo_loc_latitude": "41.9833", + "geo_loc_longitude": "2.8167" + }, + "Lugo": { + "geo_loc_latitude": "43.0167", + "geo_loc_longitude": "-7.5500" + }, + "Caceres": { + "geo_loc_latitude": "39.4833", + "geo_loc_longitude": "-6.3667" + }, + "Toledo": { + "geo_loc_latitude": "39.8567", + "geo_loc_longitude": "-4.0244" + }, + "Ceuta": { + "geo_loc_latitude": "35.8867", + "geo_loc_longitude": "-5.3000" + }, + "Melilla": { + "geo_loc_latitude": "35.2919", + "geo_loc_longitude": "-2.93848" + }, + "Zamora": { + "geo_loc_latitude": "41.5033", + "geo_loc_longitude": "-5.7556" + }, + "Avila": { + "geo_loc_latitude": "40.6500", + "geo_loc_longitude": "-4.6833" + }, + "Cuenca": { + "geo_loc_latitude": "40.0667", + "geo_loc_longitude": "-2.1500" + }, + "Huesca": { + "geo_loc_latitude": "42.1333", + "geo_loc_longitude": "-0.4167" + }, + "Teruel": { + "geo_loc_latitude": "40.3456", + "geo_loc_longitude": "-1.1065" + }, + "Ourense": { + "geo_loc_latitude": "42.3364", + "geo_loc_longitude": "-7.8633" + }, + "Palencia": { + "geo_loc_latitude": "42.0167", + "geo_loc_longitude": "-4.5333" + }, + "Lleida": { + "geo_loc_latitude": "41.6167", + "geo_loc_longitude": "0.6222" + }, + "Segovia": { + "geo_loc_latitude": "40.9481", + "geo_loc_longitude": "-4.1184" + }, + "Ciudad Real": { + "geo_loc_latitude": "38.9833", + "geo_loc_longitude": "-3.9167" + }, + "Soria": { + "geo_loc_latitude": "41.7667", + "geo_loc_longitude": "-2.4667" + }, + "Pontevedra": { + "geo_loc_latitude": "42.4333", + "geo_loc_longitude": "-8.6333" + }, + "Alcazar de San Juan": { + "geo_loc_latitude": "39.4056", + "geo_loc_longitude": "-3.2056" + }, + "Manzanares": { + "geo_loc_latitude": "38.9964", + "geo_loc_longitude": "-3.3731" + }, + "Valdepeñas": { + "geo_loc_latitude": "38.7667", + "geo_loc_longitude": "-3.4000" + }, + "Puertollano": { + "geo_loc_latitude": "38.6833", + "geo_loc_longitude": "-4.1167" + }, + "Talavera de la Reina": { + "geo_loc_latitude": "39.95", + "geo_loc_longitude": "-4.8333" + }, + "Badalona": { + "geo_loc_latitude": "41.4333", + "geo_loc_longitude": "2.2333" + }, + "L'Hospitalet de Llobregat": { + "geo_loc_latitude": "41.4217", + "geo_loc_longitude": "2.1897" + }, + "Sant Cugat del Valles": { + "geo_loc_latitude": "41.4667", + "geo_loc_longitude": "2.0833" + }, + "Móstoles": { + "geo_loc_latitude": "40.3223", + "geo_loc_longitude": "-3.865" + }, + "Leganés": { + "geo_loc_latitude": "40.3281", + "geo_loc_longitude": "-3.7644" + }, + "Getafe": { + "geo_loc_latitude": "40.3047", + "geo_loc_longitude": "-3.7311" + }, + "Cartagena": { + "geo_loc_latitude": "37.6000", + "geo_loc_longitude": "-0.9819" + }, + "Pozo Aledo": { + "geo_loc_latitude": "37.8167", + "geo_loc_longitude": "-0.85" + }, + "Lorca": { + "geo_loc_latitude": "37.6798", + "geo_loc_longitude": "-1.6944" + }, + "Cieza": { + "geo_loc_latitude": "38.2392", + "geo_loc_longitude": "-1.4189" + }, + "Merida": { + "geo_loc_latitude": "38.9000", + "geo_loc_longitude": "-6.3333" + }, + "Santiago de Compostela": { + "geo_loc_latitude": "42.8833", + "geo_loc_longitude": "-8.55" + }, + "Elche": { + "geo_loc_latitude": "38.2669", + "geo_loc_longitude": "-0.6983" + }, + "Gasteiz": { + "geo_loc_latitude": "42.85", + "geo_loc_longitude": "-2.6667" + }, + "Marbella": { + "geo_loc_latitude": "36.51543", + "geo_loc_longitude": "-4.88583" + }, + "Almeria": { + "geo_loc_latitude": "36.72016", + "geo_loc_longitude": "-4.42034" + }, + "El Ejido": { + "geo_loc_latitude": "36.772153", + "geo_loc_longitude": "-2.810602" + }, + "Antequera": { + "geo_loc_latitude": "37.030842", + "geo_loc_longitude": "-4.528663" + }, + "Huercal-Overa":{ + "geo_loc_latitude": "37.395605", + "geo_loc_longitude": "-1.942236" + }, + "Ronda":{ + "geo_loc_latitude": "36.745518", + "geo_loc_longitude": "-5.163095" + }, + "Velez-Malaga":{ + "geo_loc_latitude": "36.778904", + "geo_loc_longitude": "-4.100808" + }, + "Motril":{ + "geo_loc_latitude": "36.746111", + "geo_loc_longitude": "-3.515" + }, + "Mataró":{ + "geo_loc_latitude": "41.540444", + "geo_loc_longitude": "2.443782" + }, + "Calella":{ + "geo_loc_latitude": "41.612812", + "geo_loc_longitude": "2.653981" + }, + "Blanes":{ + "geo_loc_latitude": "41.675284", + "geo_loc_longitude": "2.789399" + }, + "Reus":{ + "geo_loc_latitude": "41.154857", + "geo_loc_longitude": "1.106481" + }, + "Tortosa":{ + "geo_loc_latitude": "40.814140", + "geo_loc_longitude": "0.522052" + }, + "Oviedo":{ + "geo_loc_latitude": "43.36316", + "geo_loc_longitude": "-5.85352" + }, + "Jarrio":{ + "geo_loc_latitude": "43.54444", + "geo_loc_longitude": "-6.74722" + }, + "Avilés":{ + "geo_loc_latitude": "43.551944", + "geo_loc_longitude": "-5.936111" + }, + "Gijón":{ + "geo_loc_latitude": "43.5345709", + "geo_loc_longitude": "-5.6675865" + }, + "Cangas del Narcea":{ + "geo_loc_latitude": "43.165833", + "geo_loc_longitude": "-6.541666" + }, + "Arriondas":{ + "geo_loc_latitude": "43.381388", + "geo_loc_longitude": "-5.184444" + }, + "Mieres":{ + "geo_loc_latitude": "43.226388", + "geo_loc_longitude": "-5.781388" + }, + "Langreo":{ + "geo_loc_latitude": "43.165833", + "geo_loc_longitude": "-6.541666" + }, + "Santa Coloma de Gramenet":{ + "geo_loc_latitude": "41.448215", + "geo_loc_longitude": "2.211027" + }, + "Montornès del Vallès":{ + "geo_loc_latitude": "41.546434", + "geo_loc_longitude": "2.268351" + }, + "Tudela":{ + "geo_loc_latitude": "42.060165", + "geo_loc_longitude": "-1.608065" + }, + "Estella":{ + "geo_loc_latitude": "42.672969", + "geo_loc_longitude": "-2.032974" + }, + "Viladecavalls":{ + "geo_loc_latitude": "41.552843", + "geo_loc_longitude": "1.952722" + }, + "Manresa":{ + "geo_loc_latitude": "41.727944", + "geo_loc_longitude": "1.823089" + }, + "Barakaldo":{ + "geo_loc_latitude": "43.282025", + "geo_loc_longitude": "-2.984338" + }, + "Gorliz":{ + "geo_loc_latitude": "43.416987", + "geo_loc_longitude": "-2.9448381" + }, + "Urdúliz":{ + "geo_loc_latitude": "43.375278", + "geo_loc_longitude": "-2.960000" + } + +} diff --git a/relecov_tools/conf/laboratory_address.json b/relecov_tools/conf/laboratory_address.json new file mode 100755 index 00000000..4acbf284 --- /dev/null +++ b/relecov_tools/conf/laboratory_address.json @@ -0,0 +1,1455 @@ +{ + "Instituto de Salud Carlos III ": { + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es" + }, + "Hospital de Antequera":{ + "collecting_institution_address": "Avenida Poeta Muñoz Rojas, s/n", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Malaga", + "geo_loc_city": "Antequera", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital La Inmaculada - Huércal Overa":{ + "collecting_institution_address": "Av. de la Dra. Ana Parra, s/n", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Almeria", + "geo_loc_city": "Huercal-Overa", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + + }, + "Hospital de la Serranía":{ + "collecting_institution_address": "Calle San Pedro, Km 2", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Malaga", + "geo_loc_city": "Ronda", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital La Axarquía":{ + "collecting_institution_address": "Av. del Sol, 43", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Malaga", + "geo_loc_city": "Velez-Malaga", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Santa Ana":{ + "collecting_institution_address": "Av. Enrique Martín Cuevas, s/n", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Granada", + "geo_loc_city": "Motril", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Regional Universitario de Malaga":{ + "collecting_institution_address": "Av. de Carlos Haya, 84", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Malaga", + "geo_loc_city": "Malaga", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Virgen de las Nieves":{ + "collecting_institution_address": "Av. de las Fuerzas Armadas, 2", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Granada", + "geo_loc_city": "Granada", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Complejo Hospitalario de Jaen":{ + "collecting_institution_address": "Avda. del Ejército Español nº10", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Jaen", + "geo_loc_city": "Jaen", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital de Poniente":{ + "collecting_institution_address": "Ctra Almerimar, 31", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Almeria", + "geo_loc_city": "El Ejido", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Juan Ramon Jimenez":{ + "collecting_institution_address": "Ronda Norte, s/n", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Huelva", + "geo_loc_city": "Huelva", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Reina Sofia":{ + "collecting_institution_address": "Av. Menendez Pidal, s/n", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Cordoba", + "geo_loc_city": "Cordoba", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Plataforma de Medicina Computacional, Fundación Progreso y Salud":{ + "collecting_institution_address": "Avda. Américo Vespucio, 15", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Sevilla", + "geo_loc_city": "Sevilla", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Puerta del Mar": { + "collecting_institution_address": "Av. Ana de Viya, 21", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Cadiz", + "geo_loc_city": "Cadiz", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Costa del Sol": { + "collecting_institution_address": "A-7, Km 187", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Malaga", + "geo_loc_city": "Marbella", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Virgen de la Victoria": { + "collecting_institution_address": "Campus de Teatinos, S/N", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Malaga", + "geo_loc_city": "Malaga", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Complejo Hospitalario Torrecardenas": { + "collecting_institution_address": "C. Hermandad de Donantes de Sangre, s/n", + "collecting_institution_email": "joaquin.dopazo@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Almeria", + "geo_loc_city": "Almeria", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Universitario San Cecilio": { + "collecting_institution_address": "Av. del Conocimiento S/N", + "collecting_institution_email": "fegarcia@ugr.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Granada", + "geo_loc_city": "Granada", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Microbiología HUC San Cecilio": { + "collecting_institution_address": "Av. del Conocimiento S/N", + "collecting_institution_email": "fegarcia@ugr.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Granada", + "geo_loc_city": "Granada", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Microbiología. Hospital Universitario Virgen del Rocio": { + "collecting_institution_address": "Avenida Manuel Siurot s/n", + "collecting_institution_email": "josea.lepe.sspa@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Sevilla", + "geo_loc_city": "Sevilla", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Universitario Virgen del Rocio": { + "collecting_institution_address": "Avenida Manuel Siurot s/n", + "collecting_institution_email": "josea.lepe.sspa@juntadeandalucia.es", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Sevilla", + "geo_loc_city": "Sevilla", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + }, + "Hospital Clínico Universitario Lozano Blesa": { + "collecting_institution_address": "S. Juan Bosco, 15", + "collecting_institution_email": "rbenito@salud.aragon.es", + "geo_loc_state": "Aragon", + "geo_loc_region": "Zaragoza", + "geo_loc_city": "Zaragoza", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Clínico Universitario Lozano Blesa", + "submitting_institution_address": "S. Juan Bosco, 15", + "submitting_institution_email": "rbenito@salud.aragon.es" + }, + "Hospital Universitario Miguel Servet": { + "collecting_institution_address": "P.º Isabel la Católica, 1-3", + "collecting_institution_email": "rbenito@salud.aragon.es", + "geo_loc_state": "Aragon", + "geo_loc_region": "Zaragoza", + "geo_loc_city": "Zaragoza", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Miguel Servet", + "submitting_institution_address": "P.º Isabel la Católica, 1-3", + "submitting_institution_email": "rbenito@salud.aragon.es" + }, + "Centro de Investigación Biomédica de Aragón": { + "collecting_institution_address": "Avenida San Juan Bosco, 13", + "collecting_institution_email": "mhpstrunk.iacs@aragon.es", + "geo_loc_state": "Aragon", + "geo_loc_region": "Zaragoza", + "geo_loc_city": "Zaragoza", + "geo_loc_country": "Spain", + "submitting_institution": "Centro de Investigación Biomédica de Aragón", + "submitting_institution_address": "Avenida San Juan Bosco 13", + "submitting_institution_email": "mhpstrunk.iacs@aragon.es" + }, + "Hospital Universitario Central de Asturias": { + "collecting_institution_address": "Av. Roma, s/n", + "collecting_institution_email": "santiago.melon@sespa.es", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Oviedo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Servicio de Microbiologia HU Son Espases": { + "collecting_institution_address": " Ctra. Valldemossa 79", + "collecting_institution_email": "antonio.oliver@ssib.es", + "geo_loc_state": "Islas Baleares", + "geo_loc_region": "Palma de Mallorca", + "geo_loc_city": "Palma de Mallorca", + "geo_loc_country": "Spain", + "submitting_institution": "Servicio de Microbiologia HU Son Espases", + "submitting_institution_address": " Ctra. Valldemossa 79", + "submitting_institution_email": "antonio.oliver@ssib.es" + }, + "Hospital Universitario Ntra. Sra de Candelaria (HUNSC)": { + "collecting_institution_address": "Carretera del Rosario 145", + "collecting_institution_email": "jalcflo@gobiernodecanarias.org", + "geo_loc_state": "Islas Canarias", + "geo_loc_region": "Santa Cruz de Tenerife", + "geo_loc_city": "Santa Cruz de Tenerife", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Ntra. Sra de Candelaria (HUNSC)", + "submitting_institution_address": "Carretera del Rosario 145", + "submitting_institution_email": "jalcflo@gobiernodecanarias.org" + }, + "Instituto Tecnológico y de Energías Renovables": { + "collecting_institution_address": "Polígono Industrial de Granadilla, s/n", + "collecting_institution_email": "jlorenzo@iter.es", + "geo_loc_state": "Islas Canarias", + "geo_loc_region": "Santa Cruz de Tenerife", + "geo_loc_city": "Santa Cruz de Tenerife", + "geo_loc_country": "Spain", + "submitting_institution": "Instituto Tecnológico y de Energías Renovables", + "submitting_institution_address": "Polígono Industrial de Granadilla, s/n", + "submitting_institution_email": "jlorenzo@iter.es" + }, + "Hospital Universitario de Canarias": { + "collecting_institution_address": "Carretera Ofra s/n, La Laguna", + "collecting_institution_email": "", + "geo_loc_state": "Islas Canarias", + "geo_loc_region": "Santa Cruz de Tenerife", + "geo_loc_city": "Santa Cruz de Tenerife", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Canarias", + "submitting_institution_address": "Carretera Ofra s/n, La Laguna", + "submitting_institution_email": "" + }, + "Hospital Universitario de Gran Canaria": { + "collecting_institution_address": "C. Pl. Barranco de la Ballena s/n", + "collecting_institution_email": "", + "geo_loc_state": "Islas Canarias", + "geo_loc_region": "Las Palmas", + "geo_loc_city": "Las Palmas", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Gran Canaria", + "submitting_institution_address": "C. Pl. Barranco de la Ballena s/n", + "submitting_institution_email": "" + }, + "Hospital Insular de Lanzarote": { + "collecting_institution_address": "C. Juan de Quesada, s/n, Arrecife", + "collecting_institution_email": "", + "geo_loc_state": "Islas Canarias", + "geo_loc_region": "Las Palmas", + "geo_loc_city": "Las Palmas", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Insular de Lanzarote", + "submitting_institution_address": "C. Juan de Quesada, s/n, Arrecife", + "submitting_institution_email": "" + }, + "Hospital Universitario Marqués de Valdecilla": { + "collecting_institution_address": "Av. Valdecilla s/n", + "collecting_institution_email": "", + "geo_loc_state": "Cantabria", + "geo_loc_region": "Cantabria", + "geo_loc_city": "Santander", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Marqués de Valdecilla", + "submitting_institution_address": "Av. Valdecilla s/n", + "submitting_institution_email": "" + }, + "Hospital Sierrallana": { + "collecting_institution_address": "Bo. Ganzo, s/n, Torrelavega", + "collecting_institution_email": "", + "geo_loc_state": "Cantabria", + "geo_loc_region": "Cantabria", + "geo_loc_city": "Santander", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Sierrallana", + "submitting_institution_address": "Bo. Ganzo, s/n, Torrelavega", + "submitting_institution_email": "" + }, + "Hospital General Universitario de Ciudad Real": { + "collecting_institution_address": "C. Obispo Rafael Torija, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Ciudad Real", + "geo_loc_city": "Ciudad Real", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General Universitario de Ciudad Real", + "submitting_institution_address": "C. Obispo Rafael Torija, s/n", + "submitting_institution_email": "" + }, + "Hospital General La Mancha Centro": { + "collecting_institution_address": "Av. Constitución, 3", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Ciudad Real", + "geo_loc_city": "Alcazar de San Juan", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General La Mancha Centro", + "submitting_institution_address": "Av. Constitución, 3", + "submitting_institution_email": "" + }, + "Hospital Virgen de Altagracia": { + "collecting_institution_address": "Avda. D. Emiliano García Roldan, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Ciudad Real", + "geo_loc_city": "Manzanares", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Virgen de Altagracia", + "submitting_institution_address": "Avda. D. Emiliano García Roldan, s/n", + "submitting_institution_email": "" + }, + "Hospital General de Valdepeñas": { + "collecting_institution_address": "Av. de los Estudiantes, s/n, Valdepeñas", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Ciudad Real", + "geo_loc_city": "Valdepeñas", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General de Valdepeñas", + "submitting_institution_address": "Av. de los Estudiantes, s/n, Valdepeñas", + "submitting_institution_email": "" + }, + "Hospital Público Santa Barbara": { + "collecting_institution_address": "C. Malagón s/n", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Ciudad Real", + "geo_loc_city": "Puertollano", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Público Santa Barbara", + "submitting_institution_address": "C. Malagón s/n", + "submitting_institution_email": "" + }, + "Hospital Virgen de la Luz": { + "collecting_institution_address": "Hermandad de Donantes de Sangre, 1", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Cuenca", + "geo_loc_city": "Cuenca", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Virgen de la Luz", + "submitting_institution_address": "Hermandad de Donantes de Sangre, 1", + "submitting_institution_email": "" + }, + "Hospital Universitario de Guadalajara": { + "collecting_institution_address": "C. Donante de Sangre, S/N", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Guadalajara", + "geo_loc_city": "Guadalajara", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Guadalajara", + "submitting_institution_address": "C. Donante de Sangre, S/N", + "submitting_institution_email": "" + }, + "Hospital Virgen de la Salud": { + "collecting_institution_address": "Calle Barber 30", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Toledo", + "geo_loc_city": "Toledo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Virgen de la Salud", + "submitting_institution_address": "Calle Barber 30", + "submitting_institution_email": "" + }, + "Hospital General Nuestra Sra. del Prado": { + "collecting_institution_address": "CTRA. MADRID, Av. Extremadura, KM 114", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Toledo", + "geo_loc_city": "Talavera de la Reina", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General Nuestra Sra. del Prado", + "submitting_institution_address": "CTRA. MADRID, Av. Extremadura, KM 114", + "submitting_institution_email": "" + }, + "Consejería de Sanidad": { + "collecting_institution_address": "Avda. de Francia, 4.", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Toledo", + "geo_loc_city": "Toledo", + "geo_loc_country": "Spain", + "submitting_institution": "Consejería de Sanidad", + "submitting_institution_address": "Avda. de Francia, 4.", + "submitting_institution_email": "" + }, + "Hospital General Universitario de Albacete": { + "collecting_institution_address": "Calle Hermanos Falcó 37", + "collecting_institution_email": "", + "geo_loc_state": "Castilla-La Mancha", + "geo_loc_region": "Albacete", + "geo_loc_city": "Albacete", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General Universitario de Albacete", + "submitting_institution_address": "Calle Hermanos Falcó 37", + "submitting_institution_email": "" + }, + "Consorcio LUCIA (SACYL,ITACYL UBU,UVa)": { + "collecting_institution_address": "Paseo de Belen 19", + "collecting_institution_email": "", + "geo_loc_state": "Castilla y León", + "geo_loc_region": "Valladolid", + "geo_loc_city": "Valladolid", + "geo_loc_country": "Spain", + "submitting_institution": "Consorcio LUCIA (SACYL,ITACYL UBU,UVa)", + "submitting_institution_address": "Paseo de Belen 19", + "submitting_institution_email": "" + }, + "Complejo Asistencial Universitario de Salamanca": { + "collecting_institution_address": "P.º de San Vicente, 58", + "collecting_institution_email": "", + "geo_loc_state": "Castilla y León", + "geo_loc_region": "Salamanca", + "geo_loc_city": "Salamanca", + "geo_loc_country": "Spain", + "submitting_institution": "Complejo Asistencial Universitario de Salamanca", + "submitting_institution_address": "P.º de San Vicente, 58", + "submitting_institution_email": "" + }, + "Gerencia de Salud de Area de Soria": { + "collecting_institution_address": "P.º el Espolón", + "collecting_institution_email": "", + "geo_loc_state": "Castilla y León", + "geo_loc_region": "Soria", + "geo_loc_city": "Soria", + "geo_loc_country": "Spain", + "submitting_institution": "Gerencia de Salud de Area de Soria", + "submitting_institution_address": "P.º el Espolón", + "submitting_institution_email": "" + }, + "Hospital General Río Carrión": { + "collecting_institution_address": "Av. Donantes de Sangre, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Castilla y León", + "geo_loc_region": "Palencia", + "geo_loc_city": "Palencia", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General Río Carrión", + "submitting_institution_address": "Av. Donantes de Sangre, s/n", + "submitting_institution_email": "" + }, + "Hospital Nuestra Señora de Sonsoles": { + "collecting_institution_address": " Av. Juan Carlos I, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Castilla y León", + "geo_loc_region": "Avila", + "geo_loc_city": "Avila", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Nuestra Señora de Sonsoles", + "submitting_institution_address": " Av. Juan Carlos I, s/n", + "submitting_institution_email": "" + }, + "Hospital Universitario de Burgos": { + "collecting_institution_address": "Av. Islas Baleares, 3", + "collecting_institution_email": "", + "geo_loc_state": "Castilla y León", + "geo_loc_region": "Burgos", + "geo_loc_city": "Burgos", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Burgos", + "submitting_institution_address": "Av. Islas Baleares, 3", + "submitting_institution_email": "" + }, + "Hospital Universitari Vall d'Hebron": { + "collecting_institution_address": " Passeig de la Vall d'Hebron, 119-129", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Vall d'Hebron", + "submitting_institution_address": " Passeig de la Vall d'Hebron, 119-129", + "submitting_institution_email": "" + }, + "Hospital Universitari Germans Trias i Pujol (HUGTiP)": { + "collecting_institution_address": " Crta del Canyet s/n", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Badalona", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Germans Trias i Pujol (HUGTiP)", + "submitting_institution_address": " Crta del Canyet s/n", + "submitting_institution_email": "" + }, + "Hospital Clínic de Barcelona (HCB)": { + "collecting_institution_address": "C/Villarroel 170", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Clínic de Barcelona (HCB)", + "submitting_institution_address": "C/Villarroel 170", + "submitting_institution_email": "" + }, + "Hospital Universitari Bellvitge (HUB)": { + "collecting_institution_address": "Carrer Feixa Llarga s/n", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "L'Hospitalet de Llobregat", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Bellvitge", + "submitting_institution_address": "Carrer Feixa Llarga s/n", + "submitting_institution_email": "" + }, + "Lab Clínic ICS Camp de Tarragona-Terres de l'Ebre. Hospital Joan XXIII": { + "collecting_institution_address": "c/ Dr. Mallafrè Guasch, 4,", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Tarragona", + "geo_loc_city": "Tarragona", + "geo_loc_country": "Spain", + "submitting_institution": "Lab Clínic ICS Camp de Tarragona-Terres de l'Ebre. Hospital Joan XXIII", + "submitting_institution_address": "c/ Dr. Mallafrè Guasch, 4,", + "submitting_institution_email": "" + }, + "LABORATORI DE REFERENCIA DE CATALUNYA": { + "collecting_institution_address": "Carrer de la Selva, 10", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Banc de Sang i Teixits Catalunya": { + "collecting_institution_address": "Passeig del Taulat, 116", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "Banc de Sang i Teixits Catalunya", + "submitting_institution_address": "Passeig del Taulat, 116", + "submitting_institution_email": "" + }, + "Laboratorio Echevarne Sant Cugat del Vallès": { + "collecting_institution_address": "Carrer de Martorell,20 Sant Cugat del Vallès", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Sant Cugat del Valles", + "geo_loc_country": "Spain", + "submitting_institution": "Laboratorio Echevarne Sant Cugat del Vallès", + "submitting_institution_address": "Carrer de Martorell,20 Sant Cugat del Vallès", + "submitting_institution_email": "" + }, + "Unitat de Genòmica i Medicina Personalitzada - Hospital Dr. Josep Trueta": { + "collecting_institution_address": "Avinguda de França, S/N,", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Girona", + "geo_loc_city": "Girona", + "geo_loc_country": "Spain", + "submitting_institution": "Unitat de Genòmica i Medicina Personalitzada - Hospital Dr. Josep Trueta", + "submitting_institution_address": "Avinguda de França, S/N,", + "submitting_institution_email": "" + }, + "Hospital Universitario San Jorge": { + "collecting_institution_address": "Av. Martínez de Velasco, 36", + "collecting_institution_email": "", + "geo_loc_state": "Aragon", + "geo_loc_region": "Huesca", + "geo_loc_city": "Huesca", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario San Jorge", + "submitting_institution_address": "Av. Martínez de Velasco, 36", + "submitting_institution_email": "" + }, + "Hospital Universitario de Ceuta": { + "collecting_institution_address": "Colmenar s/n", + "collecting_institution_email": "", + "geo_loc_state": "Ceuta", + "geo_loc_region": "Ceuta", + "geo_loc_city": "Ceuta", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Ceuta", + "submitting_institution_address": "Colmenar s/n", + "submitting_institution_email": "" + }, + "Hospital Comarcal de Melilla": { + "collecting_institution_address": "Remonta, 2", + "collecting_institution_email": "", + "geo_loc_state": "Melilla", + "geo_loc_region": "Melilla", + "geo_loc_city": "Melilla", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Comarcal de Melilla", + "submitting_institution_address": "Remonta, 2", + "submitting_institution_email": "" + }, + "Hospital de Mostoles": { + "collecting_institution_address": "C. Dr. Luis Montes, S/N", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Mostoles", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital de Mostoles", + "submitting_institution_address": "C. Dr. Luis Montes, S/N", + "submitting_institution_email": "" + }, + "Hospital Clínico San Carlos": { + "collecting_institution_address": "Calle del Prof Martín Lagos", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Clínico San Carlos", + "submitting_institution_address": "Calle del Prof Martín Lagos", + "submitting_institution_email": "" + }, + "Centro Militar de Veterinaria de la Defensa": { + "collecting_institution_address": "C. Darío Gazapo 3", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain", + "submitting_institution": "Centro Militar de Veterinaria de la Defensa", + "submitting_institution_address": "C. Darío Gazapo 3", + "submitting_institution_email": "" + }, + "Hospital Universitario Severo Ochoa": { + "collecting_institution_address": "Av. de Orellana, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Leganés", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Severo Ochoa", + "submitting_institution_address": "Av. de Orellana, s/n", + "submitting_institution_email": "" + }, + "Hospital Universitario de Getafe": { + "collecting_institution_address": "Carr. Madrid - Toledo, Km 12,500, Getafe", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Getafe", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Getafe", + "submitting_institution_address": "Carr. Madrid - Toledo, Km 12,500, Getafe", + "submitting_institution_email": "" + }, + "Presidencia de Gobierno": { + "collecting_institution_address": "", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain", + "submitting_institution": "Presidencia de Gobierno", + "submitting_institution_address": "", + "submitting_institution_email": "" + }, + "Ministerio de Sanidad, Servicios Sociales e Igualdad": { + "collecting_institution_address": "P.º del Prado, 18", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain", + "submitting_institution": "Ministerio de Sanidad, Servicios Sociales e Igualdad", + "submitting_institution_address": "P.º del Prado, 18", + "submitting_institution_email": "" + }, + "HU Virgen de la Arrixaca": { + "collecting_institution_address": "Ctra. Madrid-Cartagena, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "submitting_institution": "HU Virgen de la Arrixaca", + "submitting_institution_address": "Ctra. Madrid-Cartagena, s/n", + "submitting_institution_email": "" + }, + "Hospital Universitario Reina Sofía": { + "collecting_institution_address": "Av. Intendente Jorge Palacios, 1", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Reina Sofía", + "submitting_institution_address": "Av. Intendente Jorge Palacios, 1", + "submitting_institution_email": "" + }, + "Hospital General Universitario Santa Lucía": { + "collecting_institution_address": "C. Minarete, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Cartagena", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General Universitario Santa Lucía", + "submitting_institution_address": "C. Minarete, s/n", + "submitting_institution_email": "" + }, + "Hospital General Universitario Los Arcos del Mar Menor": { + "collecting_institution_address": "Paraje Torre Octavio, 54, Pozo Aledo", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Pozo Aledo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General Universitario Los Arcos del Mar Menor", + "submitting_institution_address": "Paraje Torre Octavio, 54, Pozo Aledo", + "submitting_institution_email": "" + }, + "Hospital General Universitario Morales Meseguer": { + "collecting_institution_address": "Av. Marqués de Los Vélez, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital General Universitario Morales Meseguer", + "submitting_institution_address": "Av. Marqués de Los Vélez, s/n", + "submitting_institution_email": "" + }, + "Hospital Clínico Universitario Virgen de la Arrixaca": { + "collecting_institution_address": "Ctra. Madrid-Cartagena, s/n, El Palmar", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Clínico Universitario Virgen de la Arrixaca", + "submitting_institution_address": "Ctra. Madrid-Cartagena, s/n, El Palmar", + "submitting_institution_email": "" + }, + "Hospital Rafael Méndez de Lorca": { + "collecting_institution_address": "Ctra. N-340, Lorca", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Lorca", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Rafael Méndez de Lorca", + "submitting_institution_address": "Ctra. N-340, Lorca", + "submitting_institution_email": "" + }, + "Hospital de la Vega Lorenzo Guirao": { + "collecting_institution_address": "Vereda de Morcillo, s/n, Cieza", + "collecting_institution_email": "", + "geo_loc_state": "Murcia", + "geo_loc_region": "Murcia", + "geo_loc_city": "Cieza", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital de la Vega Lorenzo Guirao", + "submitting_institution_address": "Vereda de Morcillo, s/n, Cieza", + "submitting_institution_email": "" + }, + "Hospital Universitario Donostia": { + "collecting_institution_address": "Begiristain Doktorea Pasealekua, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Pais Vasco", + "geo_loc_region": "Guipúzcoa", + "geo_loc_city": "Donostia", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Univeristario Donostia", + "submitting_institution_address": "Begiristain Doktorea Pasealekua, s/n", + "submitting_institution_email": "" + }, + "Hospital Universitario de Badajoz": { + "collecting_institution_address": "Av. de Elvas, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Extremadura", + "geo_loc_region": "Badajoz", + "geo_loc_city": "Badajoz", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Badajoz", + "submitting_institution_address": "Av. de Elvas, s/n", + "submitting_institution_email": "" + }, + "Hospital de Mérida": { + "collecting_institution_address": "Av. Don Antonio Campos Hoyos, 26", + "collecting_institution_email": "", + "geo_loc_state": "Extremadura", + "geo_loc_region": "Badajoz", + "geo_loc_city": "Merida", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital de Mérida", + "submitting_institution_address": "Av. Don Antonio Campos Hoyos, 26", + "submitting_institution_email": "" + }, + "Hospital San Pedro de Alcantara": { + "collecting_institution_address": "Av. Pablo Naranjo Porras, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Extremadura", + "geo_loc_region": "Caceres", + "geo_loc_city": "Caceres", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital San Pedro de Alcantara", + "submitting_institution_address": "Av. Pablo Naranjo Porras, s/n", + "submitting_institution_email": "" + }, + "Gerencia de Atención Primaria Pontevedra Sur": { + "collecting_institution_address": "Av. das Camelias, 109, Vigo", + "collecting_institution_email": "", + "geo_loc_state": "Galicia", + "geo_loc_region": "Pontevedra", + "geo_loc_city": "Vigo", + "geo_loc_country": "Spain", + "submitting_institution": "Gerencia de Atención Primaria Pontevedra Sur", + "submitting_institution_address": "Av. das Camelias, 109, Vigo", + "submitting_institution_email": "" + }, + "Hospital Universitario Lucus Augusti": { + "collecting_institution_address": " Rúa Dr. Ulises Romero, 1", + "collecting_institution_email": "", + "geo_loc_state": "Galicia", + "geo_loc_region": "Lugp", + "geo_loc_city": "Lugo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Lucus Augusti", + "submitting_institution_address": " Rúa Dr. Ulises Romero, 1", + "submitting_institution_email": "" + }, + "Hospital Universitario de A Coruña": { + "collecting_institution_address": "As Xubias, 84", + "collecting_institution_email": "", + "geo_loc_state": "Galicia", + "geo_loc_region": "A Coruña", + "geo_loc_city": "A Coruña", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de A Coruña", + "submitting_institution_address": "As Xubias, 84", + "submitting_institution_email": "" + }, + "Hospital Clínico Universitario de Santiago": { + "collecting_institution_address": "Rúa da Choupana, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Galicia", + "geo_loc_region": "A Coruña", + "geo_loc_city": "Santiago de Compostela", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Clínico Universitario de Santiago", + "submitting_institution_address": "Rúa da Choupana, s/n", + "submitting_institution_email": "" + }, + "Hospital Universitario de Navarra": { + "collecting_institution_address": "C. de Irunlarrea, 3, Pamplona", + "collecting_institution_email": "", + "geo_loc_state": "Navarra", + "geo_loc_region": "Navarra", + "geo_loc_city": "Pamplona", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Navarra", + "submitting_institution_address": "C. de Irunlarrea, 3, Pamplona", + "submitting_institution_email": "" + }, + "Hospital General Universitario de Elx": { + "collecting_institution_address": "Carrer Almazara, 11, Elche", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Alicante", + "geo_loc_city": "Elche", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Universitario San Pedro": { + "collecting_institution_address": "C. Piqueras, 98", + "collecting_institution_email": "", + "geo_loc_state": "La Rioja", + "geo_loc_region": "La Rioja", + "geo_loc_city": "Logroño", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario San Pedro", + "submitting_institution_address": "C. Piqueras, 98", + "submitting_institution_email": "" + }, + "Hospital Universitario Txagorritxu": { + "collecting_institution_address": " Jose Atxotegi Kalea, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Pais Vasco", + "geo_loc_region": "Alava", + "geo_loc_city": "Gasteiz", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Txagorritxu", + "submitting_institution_address": " Jose Atxotegi Kalea, s/n", + "submitting_institution_email": "" + }, + "Hospital Universitari Arnau de Vilanova": { + "collecting_institution_address": "Av. Alcalde Rovira Roure, 80", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Lleida", + "geo_loc_city": "Lleida", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Arnau de Vilanova", + "submitting_institution_address": "Av. Alcalde Rovira Roure, 80", + "submitting_institution_email": "" + }, + "Hospital Clínico Universitario de Valencia": { + "collecting_institution_address": "Av. de Blasco Ibañez, 17", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Valencia", + "geo_loc_city": "Valencia", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Universitario y Politécnico La Fe de Valencia": { + "collecting_institution_address": "Avinguda de Fernando Abril Martorell, 106", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Valencia", + "geo_loc_city": "Valencia", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Doctor Peset de Valencia": { + "collecting_institution_address": "Av. de Gaspar Aguilar, 90", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Valencia", + "geo_loc_city": "Valencia", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital de l'Esperança": { + "collecting_institution_address": "Ptge. de Sant Josep la Muntanya, 12", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Centre Forum-Parc Salut Mar": { + "collecting_institution_address": "Carrer de Llull, 410", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Hospital Municipal de Badalona": { + "collecting_institution_address": "Via Augusta, 9-13", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Badalona", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Hospital de Mataro": { + "collecting_institution_address": "Carrer de Cirera, 230", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Mataró", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Parc Sanitari Pere Virgili": { + "collecting_institution_address": "Carrer d'Esteve Terradas, 30", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Hospital Comarcal St. Jaume de Calella": { + "collecting_institution_address": "Carrer de Sant Jaume, 209-217", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Calella", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Hospital Comarcal de la Selva": { + "collecting_institution_address": "Carrer Acces Cala Sant Francesc, 5", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Blanes", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Hospital Universitari Sant Joan de Reus": { + "collecting_institution_address": "Avinguda del Doctor Josep Laporte, 2", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Tarragona", + "geo_loc_city": "Reus", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Clinica Terres de l'Ebre": { + "collecting_institution_address": "Plasa 1 d'octubre, 6-7", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Tarragona", + "geo_loc_city": "Tortosa", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Hospital del Mar": { + "collecting_institution_address": "Passeig Marítim, 25-29", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Barcelona", + "geo_loc_country": "Spain", + "submitting_institution": "LABORATORI DE REFERENCIA DE CATALUNYA", + "submitting_institution_address": "Carrer de la Selva, 10", + "submitting_institution_email": "" + }, + "Hospital de Jarrio": { + "collecting_institution_address": "Lugar Jarrio, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Jarrio", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Carmen y Severo Ochoa": { + "collecting_institution_address": "Lugar Sienra, 11", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Cangas del Narcea", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Universitario San Agustín": { + "collecting_institution_address": "Calle Cabruñana, 19", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Avilés", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Monte Naranco": { + "collecting_institution_address": "Avenida Doctores Fernández Vega, 107", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Oviedo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Universitario de Cabueñes": { + "collecting_institution_address": "Calle Prados, 395", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Gijón", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital de Jove": { + "collecting_institution_address": "Avenida Eduardo Castro, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Gijón", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital del Oriente de Asturias F. Grande Covián": { + "collecting_institution_address": "Barrio Castañera, 42", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Arriondas", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Vital Alvarez Buylla": { + "collecting_institution_address": "Calle Vistalegre, 2", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Mieres", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital Valle del Nalón": { + "collecting_institution_address": "Polígono de Riaño, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Asturias", + "geo_loc_region": "Asturias", + "geo_loc_city": "Langreo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Central de Asturias", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "FISABIO-Epidemiología Molecular": { + "collecting_institution_address": "Av. de Catalunya, 21", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Valencia", + "geo_loc_city": "Valencia", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. de Catalunya, 21", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital General Universitario de Alicante": { + "collecting_institution_address": "Calle Pintor Baeza, 11", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Valencia", + "geo_loc_city": "Alicante", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Consorcio Hospital General Universitario de Valencia": { + "collecting_institution_address": "Av. de les Tres Creus, 2", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Valencia", + "geo_loc_city": "Valencia", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "Hospital General Universitario de Castellón": { + "collecting_institution_address": "Avinguda de Benicàssim, 128", + "collecting_institution_email": "", + "geo_loc_state": "Comunitat Valenciana", + "geo_loc_region": "Valencia", + "geo_loc_city": "Castellón de la Plana", + "geo_loc_country": "Spain", + "submitting_institution": "FISABIO-Epidemiología Molecular", + "submitting_institution_address": "Av. Roma, s/n", + "submitting_institution_email": "santiago.melon@sespa.es" + }, + "CAP La Salut EAP Badalona": { + "collecting_institution_address": " Passatge dels Encants, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Badalona", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Germans Trias i Pujol (HUGTiP)", + "submitting_institution_address": " Crta del Canyet s/n", + "submitting_institution_email": "" + }, + "CAP Mataró-6 (Gatassa)": { + "collecting_institution_address": "Camí del Mig, 36", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Mataró", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Germans Trias i Pujol (HUGTiP)", + "submitting_institution_address": " Crta del Canyet s/n", + "submitting_institution_email": "" + }, + "CAP Can Mariner Santa Coloma-1": { + "collecting_institution_address": "Carrer de Sant Carles, 79", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Tarragona", + "geo_loc_city": "Santa Coloma de Gramenet", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Germans Trias i Pujol (HUGTiP)", + "submitting_institution_address": " Crta del Canyet s/n", + "submitting_institution_email": "" + }, + "CAP Montmeló (Montornès)": { + "collecting_institution_address": "Carrer de Can Parera, 7", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Tarragona", + "geo_loc_city": "Montornès del Vallès", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Germans Trias i Pujol (HUGTiP)", + "submitting_institution_address": " Crta del Canyet s/n", + "submitting_institution_email": "" + }, + "Hospital García Orcoyen": { + "collecting_institution_address": "Calle Sta. Soria, 22", + "collecting_institution_email": "", + "geo_loc_state": "Navarra", + "geo_loc_region": "Navarra", + "geo_loc_city": "Estella", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Navarra", + "submitting_institution_address": "C. de Irunlarrea, 3, Pamplona", + "submitting_institution_email": "" + }, + "Hospital Reina Sofía_Tudela": { + "collecting_institution_address": "Carretera de Tarazona, Km. 4", + "collecting_institution_email": "", + "geo_loc_state": "Navarra", + "geo_loc_region": "Navarra", + "geo_loc_city": "Tudela", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Navarra", + "submitting_institution_address": "C. de Irunlarrea, 3, Pamplona", + "submitting_institution_email": "" + }, + "Clínica Universidad de Navarra": { + "collecting_institution_address": "Av. de Pío XII, 36", + "collecting_institution_email": "", + "geo_loc_state": "Navarra", + "geo_loc_region": "Navarra", + "geo_loc_city": "Pamplona", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario de Navarra", + "submitting_institution_address": "C. de Irunlarrea, 3, Pamplona", + "submitting_institution_email": "" + }, + "CATLAB": { + "collecting_institution_address": "Vial St Jordi, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Viladecavalls", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Germans Trias i Pujol (HUGTiP)", + "submitting_institution_address": " Crta del Canyet s/n", + "submitting_institution_email": "" + }, + "Fundació Althaia-Manresa": { + "collecting_institution_address": "Carrer del Dr. Joan Soler, 1-3", + "collecting_institution_email": "", + "geo_loc_state": "Cataluña", + "geo_loc_region": "Barcelona", + "geo_loc_city": "Manresa", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitari Germans Trias i Pujol (HUGTiP)", + "submitting_institution_address": " Crta del Canyet s/n", + "submitting_institution_email": "" + }, + "H U RAMÓN Y CAJAL": { + "collecting_institution_address": "Ctra. Colmenar Viejo, km.9, 100", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain", + "submitting_institution": "H U RAMÓN Y CAJAL", + "submitting_institution_address": "Ctra. Colmenar Viejo, km.9, 100", + "submitting_institution_email": "" + }, + "HU GREGORIO MARAÑÓN": { + "collecting_institution_address": "C. del Dr. Esquerdo, 46", + "collecting_institution_email": "", + "geo_loc_state": "Comunidad de Madrid", + "geo_loc_region": "Madrid", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain", + "submitting_institution": "HU GREGORIO MARAÑÓN", + "submitting_institution_address": "C. del Dr. Esquerdo, 46", + "submitting_institution_email": "" + }, + "Hospital Universitario Cruces": { + "collecting_institution_address": "Plaza Cruces, s/n", + "collecting_institution_email": "", + "geo_loc_state": "Pais Vasco", + "geo_loc_region": "Vizcaya", + "geo_loc_city": "Baracaldo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Cruces", + "submitting_institution_address": "Plaza Cruces, s/n", + "submitting_institution_email": "" + }, + "Hospital de Urduliz-Alfredo Espinosa": { + "collecting_institution_address": "Goieta Kalea, 32", + "collecting_institution_email": "", + "geo_loc_state": "Pais Vasco", + "geo_loc_region": "Vizcaya", + "geo_loc_city": "Urdúliz", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Cruces", + "submitting_institution_address": "Plaza Cruces, s/n", + "submitting_institution_email": "" + }, + "Hospital San Eloy": { + "collecting_institution_address": "Av. A. Miranda, 5002", + "collecting_institution_email": "", + "geo_loc_state": "Pais Vasco", + "geo_loc_region": "Vizcaya", + "geo_loc_city": "Barakaldo", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Cruces", + "submitting_institution_address": "Plaza Cruces, s/n", + "submitting_institution_email": "" + }, + "Hospital de Gorliz": { + "collecting_institution_address": "Urezarantza ibiltokia, 1", + "collecting_institution_email": "", + "geo_loc_state": "Pais Vasco", + "geo_loc_region": "Vizcaya", + "geo_loc_city": "Gorliz", + "geo_loc_country": "Spain", + "submitting_institution": "Hospital Universitario Cruces", + "submitting_institution_address": "Plaza Cruces, s/n", + "submitting_institution_email": "" + }, + "Hospital Virgen Macarena": { + "collecting_institution_address": "Av. Dr. Fedriani, 3", + "collecting_institution_email": "", + "geo_loc_state": "Andalucia", + "geo_loc_region": "Sevilla", + "geo_loc_city": "Sevilla", + "geo_loc_country": "Spain", + "submitting_institution": "Plataforma de Medicina Computacional, Fundación Progreso y Salud", + "submitting_institution_address": "Avda. Américo Vespucio, 15, Sevilla", + "submitting_institution_email": "joaquin.dopazo@juntadeandalucia.es" + } +} diff --git a/relecov_tools/config_json.py b/relecov_tools/config_json.py new file mode 100644 index 00000000..793c5694 --- /dev/null +++ b/relecov_tools/config_json.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +import json +import os + + +# pass test +class ConfigJson: + def __init__( + self, + json_file=os.path.join(os.path.dirname(__file__), "conf", "configuration.json"), + ): + fh = open(json_file) + self.json_data = json.load(fh) + fh.close() + self.topic_config = list(self.json_data.keys()) + + def get_configuration(self, topic): + """Obtain the topic configuration from json data""" + if topic in self.topic_config: + return self.json_data[topic] + return None + + def get_topic_data(self, topic, found): + """Obtain from topic any forward items from json data""" + if found in self.json_data[topic]: + return self.json_data[topic][found] + else: + for key, value in self.json_data[topic].items(): + if isinstance(value, dict): + if found in self.json_data[topic][key]: + return self.json_data[topic][key][found] + return None diff --git a/relecov_tools/download_manager.py b/relecov_tools/download_manager.py new file mode 100755 index 00000000..df770f3a --- /dev/null +++ b/relecov_tools/download_manager.py @@ -0,0 +1,1242 @@ +#!/usr/bin/env python +import copy +import logging +import json +import sys +import os +import yaml +import warnings +import rich.console +import paramiko +import relecov_tools.utils +import relecov_tools.sftp_client +from datetime import datetime +from itertools import islice +from secrets import token_hex +from csv import writer as csv_writer, Error as CsvError +from openpyxl import load_workbook as openpyxl_load_workbook +from pandas import read_excel, ExcelWriter, concat +from pandas.errors import ParserError, EmptyDataError +from relecov_tools.config_json import ConfigJson +from relecov_tools.log_summary import LogSum + + +# from relecov_tools.rest_api import RestApi + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class MetadataError(Exception): + def __init__(self, message): + super().__init__(message) + + +class DownloadManager: + def __init__( + self, + user=None, + passwd=None, + conf_file=None, + download_option=None, + output_location=None, + target_folders=None, + ): + """Initializes the sftp object""" + config_json = ConfigJson() + self.allowed_file_ext = config_json.get_topic_data( + "sftp_handle", "allowed_file_extensions" + ) + sftp_user = user + sftp_passwd = passwd + self.target_folders = target_folders + self.allowed_download_options = config_json.get_topic_data( + "sftp_handle", "allowed_download_options" + ) + if download_option not in self.allowed_download_options: + self.download_option = relecov_tools.utils.prompt_selection( + "Options", self.allowed_download_options + ) + else: + self.download_option = download_option + if conf_file is None: + # self.sftp_server = config_json.get_topic_data("sftp_handle", "sftp_server") + # self.sftp_port = config_json.get_topic_data("sftp_handle", "sftp_port") + self.platform_storage_folder = config_json.get_topic_data( + "sftp_handle", "platform_storage_folder" + ) + self.abort_if_md5_mismatch = ( + True + if config_json.get_topic_data("sftp_handle", "abort_if_md5_mismatch") + == "True" + else False + ) + else: + if not os.path.isfile(conf_file): + log.error("Configuration file %s does not exists", conf_file) + stderr.print( + "[red] Configuration file does not exist. " + conf_file + "!" + ) + sys.exit(1) + with open(conf_file, "r") as fh: + config = yaml.load(fh, Loader=yaml.FullLoader) + try: + # self.sftp_server = config["sftp_server"] + # self.sftp_port = config["sftp_port"] + self.target_folders = config["target_folders"] + try: + self.platform_storage_folder = config["platform_storage_folder"] + except KeyError: + self.platform_storage_folder = config_json.get_topic_data( + "sftp_handle", "platform_storage_folder" + ) + sftp_user = config["sftp_user"] + sftp_passwd = config["sftp_passwd"] + except KeyError as e: + log.error("Invalid configuration file %s", e) + stderr.print(f"[red] Invalid configuration file {e} !") + sys.exit(1) + if output_location is not None: + if os.path.isdir(output_location): + self.platform_storage_folder = os.path.realpath(output_location) + else: + log.error("Output location does not exist, aborting") + stderr.print("[red] Output location does not exist, aborting") + sys.exit(1) + if sftp_user is None: + sftp_user = relecov_tools.utils.prompt_text(msg="Enter the user id") + if isinstance(self.target_folders, str): + self.target_folders = self.target_folders.split(",") + self.logsum = LogSum(output_location=self.platform_storage_folder) + if sftp_passwd is None: + sftp_passwd = relecov_tools.utils.prompt_password(msg="Enter your password") + self.metadata_lab_heading = config_json.get_topic_data( + "lab_metadata", "metadata_lab_heading" + ) + self.metadata_processing = config_json.get_topic_data( + "sftp_handle", "metadata_processing" + ) + self.avoidable_characters = config_json.get_topic_data( + "sftp_handle", "skip_when_found" + ) + self.samples_json_fields = config_json.get_topic_data( + "lab_metadata", "samples_json_fields" + ) + # initialize the sftp client + self.relecov_sftp = relecov_tools.sftp_client.SftpRelecov( + conf_file, sftp_user, sftp_passwd + ) + + def create_local_folder(self, folder): + """Create folder to download files in local path using date + + Args: + folder (str): name of remote folder to be downloaded + + Returns: + local_folder_path(str): path to the new created folder + """ + log.info("Creating folder %s to download files", folder) + platform_storage_folder = self.platform_storage_folder + if platform_storage_folder == folder: + local_folder_path = platform_storage_folder + else: + folder = folder.strip("_tmp_processing") + local_folder_path = os.path.join(platform_storage_folder, folder) + os.makedirs(local_folder_path, exist_ok=True) + log.info("created the folder to download files %s", local_folder_path) + return local_folder_path + + def get_remote_folder_files(self, folder, local_folder, file_list): + """Create the subfolder with the present date and fetch all files from + the remote sftp server + + Args: + folder (str): name of remote folder to be downloaded + local_folder (str): name of local folder to store downloaded files + file_list (list(str)): list of files in remote folder to be downloaded + + Returns: + fetched_files(list(str)): list of successfully downloaded files + """ + + fetched_files = list() + log.info("Trying to fetch files in remote server") + stderr.print(f"Fetching {len(file_list)} files from {folder}") + for file in file_list: + file_to_fetch = os.path.join(folder, os.path.basename(file)) + output_file = os.path.join(local_folder, os.path.basename(file)) + if self.relecov_sftp.get_from_sftp( + file_to_fetch, output_file, exist_ok=True + ): + fetched_files.append(os.path.basename(file)) + else: + # Try to download again n times + for _ in range(3): + if self.relecov_sftp.get_from_sftp(file_to_fetch, output_file): + fetched_files.append(os.path.basename(file)) + break + else: + log.warning("Couldn't fetch %s from %s after 3 tries", file, folder) + return fetched_files + + def find_remote_md5sum(self, folder, pattern="md5sum"): + """Search for a pattern in remote folder, by default is md5sum + + Args: + folder (str): folder path in remote repository + pattern (str, optional): Regex used to find file. Defaults to "md5sum". + + Returns: + md5_file(str): file basename if found. If not found returns False + """ + all_files = self.relecov_sftp.get_file_list(folder) + md5_file = [file for file in all_files if pattern in file] + if len(md5_file) == 1: + return md5_file[0] + else: + return False + + def verify_md5_checksum(self, local_folder, fetched_files, fetched_md5): + """Check if the md5 value from sftp matches with the one generated locally""" + required_retransmition = [] + successful_files = [] + # fetch the md5 file if exists + log.info("Searching for local md5 file") + stderr.print("[blue]Verifying file integrity in md5 hashes") + avoid_chars = self.avoidable_characters + hash_dict = relecov_tools.utils.read_md5_checksum(fetched_md5, avoid_chars) + if not hash_dict: + error_text = "md5sum file could not be read, md5 hashes won't be validated" + self.include_warning(error_text) + return fetched_files, False + # check md5 checksum for each file + for f_name in hash_dict.keys(): + if f_name not in fetched_files: + # Skip those files in md5sum that were not downloaded by any reason + continue + f_path = os.path.join(local_folder, f_name) + if hash_dict[f_name] == relecov_tools.utils.calculate_md5(f_path): + successful_files.append(f_name) + log.info("Successful file download for %s", f_name) + else: + required_retransmition.append(f_name) + log.warning("%s requested file re-sending", f_name) + return successful_files, required_retransmition + + def create_files_with_metadata_info( + self, local_folder, samples_dict, md5_dict, metadata_file + ): + """Copy metadata file from folder, extend samples_dict with md5hash for + each file. Then create a Json file with this dict + + Args: + local_folder (str): Path to folder with downloaded files and output + samples_dict (dict{str:str}): same structure as validate_remote_files() + md5_dict (dict(str:str)): Zipped dict of files_list and md5hash_list + metadata_file (str): Name of the downloaded metadata file to rename it + """ + samples_to_delete = [] + prefix_file_name = "_".join(local_folder.split("/")[-2:]) + prefix_file_name = prefix_file_name.replace("_tmp_processing", "") + # TODO: Move these prefixes to configuration.json + new_metadata_file = "lab_metadata_" + prefix_file_name + ".xlsx" + sample_data_file = "samples_data_" + prefix_file_name + ".json" + sample_data_path = os.path.join(local_folder, sample_data_file) + os.rename(metadata_file, os.path.join(local_folder, new_metadata_file)) + error_text = "Sample %s incomplete. Not added to final Json" + + data = copy.deepcopy(samples_dict) + for sample, values in data.items(): + if not all(val for val in values): + self.include_error(str(error_text % sample), sample) + samples_to_delete.append(sample) + continue + # TODO: Move these keys to configuration.json + values["r1_fastq_filepath"] = local_folder + values["fastq_r1_md5"] = md5_dict.get(values["sequence_file_R1_fastq"]) + if values.get("sequence_file_R2_fastq"): + values["r2_fastq_filepath"] = local_folder + values["fastq_r2_md5"] = md5_dict.get(values["sequence_file_R2_fastq"]) + if samples_to_delete: + data = {k: v for k, v in data.items() if k not in samples_to_delete} + with open(sample_data_path, "w", encoding="utf-8") as fh: + fh.write(json.dumps(data, indent=4, sort_keys=True, ensure_ascii=False)) + log.info("Successfully created json file with samples %s", sample_data_path) + return + + def remove_duplicated_values(self, sample_file_dict): + """remove keys that share the same value due to duplication in sample_dict + + Args: + sample_file_dict (dict(str:dict(str:str))): dictionary with sample_name + as keys and a dict for both R1 filename and/or R2 if paired-end reads + and fastq-file paths. e.g. {sample1:{r1_fastq_filepath:sample1.fastq.gz}} + + Returns: + clean_sample_dict: sample_dictionary without duplications in values + """ + inverted_dict = {} + for sample, fastq_dict in sample_file_dict.items(): + # Dictionary values are not hashable, so you need to create a tuple of them + samp_fastqs = tuple(fastq_dict.values()) + # Setting values as keys to find those samples refering to the same file + for fastq in samp_fastqs: + inverted_dict[fastq] = inverted_dict.get(fastq, []) + [sample] + duplicated_dict = {k: v for k, v in inverted_dict.items() if len(v) > 1} + dup_samples_list = [samp for dups in duplicated_dict.values() for samp in dups] + non_duplicated_keys = { + k: v for k, v in sample_file_dict.items() if k not in dup_samples_list + } + clean_sample_dict = {key: sample_file_dict[key] for key in non_duplicated_keys} + if dup_samples_list: + error_text = "Multiple samples in metadata pointing to the same file: %s" + self.include_warning(error_text % duplicated_dict) + stderr.print(f"[Orange]{error_text}") + stderr.print("[Orange]These samples won't be processed: ", dup_samples_list) + for fastq, samples in duplicated_dict.items(): + [self.include_error(str(error_text % fastq), samp) for samp in samples] + + return clean_sample_dict + + def read_metadata_file(self, meta_f_path, return_data=True): + """Read excel file, check if the header matches with the one defined in config + + Args: + meta_f_path (str): Path to the excel_file + + Raises: + MetadataError: If the header in the excel is different from config + + Returns: + ws_metadata_lab: openpyxl's workbook metadata sheet of the excel file + metadata_header: column names of the header + header_row: row where the header is located in the sheet + """ + warnings.simplefilter(action="ignore", category=UserWarning) + wb_file = openpyxl_load_workbook(meta_f_path, data_only=True) + ws_metadata_lab = wb_file[self.metadata_processing.get("excel_sheet")] + # find out the index for file names + header_flag = self.metadata_processing.get("header_flag") + try: + header_row = [ + i + 1 for i, x in enumerate(ws_metadata_lab.values) if header_flag in x + ][0] + except IndexError: + error_text = "Header could not be found for excel file %s" + raise MetadataError(str(error_text % os.path.basename(meta_f_path))) + for cell in ws_metadata_lab[header_row]: + cell.value = cell.value.strip() + metadata_header = [x.value for x in ws_metadata_lab[header_row]] + meta_column_list = self.metadata_lab_heading + if meta_column_list != metadata_header[1:]: + diffs = [ + x + for x in set(metadata_header[1:] + meta_column_list) + if x not in meta_column_list or x not in metadata_header + ] + log.error( + "Config field metadata_lab_heading is different from .xlsx header" + ) + stderr.print( + "[red]Header in metadata file is different from config file, aborting" + ) + stderr.print("[red]Differences: ", diffs) + raise MetadataError(f"Metadata header different from config: {diffs}") + if return_data: + return ws_metadata_lab, metadata_header, header_row + else: + return True + + def get_sample_fastq_file_names(self, local_folder, meta_f_path): + """Read excel metadata template and create dictionary with files for each sample + + Args: + local_folder (str): folder where the excel file has been downloaded + meta_f_path (str): path to the downloaded excel file with metadata + + Returns: + clean_sample_dict(dict(str:{str:str})): Nested dictionary for each sample + {sample1: {"sequence_file_R1_fastq": "sample1_R1.fastq.gz", + "sequence_file_R2_fastq": "sample1_R2.fastq.gz"}, + sample2:{...} } + """ + if not os.path.isfile(meta_f_path): + log.error("Metadata file does not exist on %s", local_folder) + stderr.print("[red] METADATA_LAB.xlsx do not exist in" + local_folder) + return False + sample_file_dict = {} + metadata_ws, meta_header, header_row = self.read_metadata_file(meta_f_path) + # TODO Include these columns in config + index_sampleID = meta_header.index("Sample ID given for sequencing") + index_layout = meta_header.index("Library Layout") + index_fastq_r1 = meta_header.index("Sequence file R1 fastq") + index_fastq_r2 = meta_header.index("Sequence file R2 fastq") + counter = header_row + for row in islice(metadata_ws.values, header_row, metadata_ws.max_row): + counter += 1 + if row[index_sampleID] is not None: + row_complete = True + try: + s_name = str(row[index_sampleID]).strip() + except ValueError as e: + stderr.print("[red]Unable to convert to string. ", e) + continue + if s_name not in sample_file_dict: + sample_file_dict[s_name] = {} + else: + log_text = f"Found duplicated sample name: {s_name}. Skipped." + stderr.print(log_text) + self.include_warning(log_text, sample=s_name) + continue + if row[index_layout] == "paired" and row[index_fastq_r2] is None: + error_text = "Sample %s is paired-end, but no R2 given" + self.include_error(error_text % str(row[index_sampleID]), s_name) + row_complete = False + if row[index_layout] == "single" and row[index_fastq_r2] is not None: + error_text = "Sample %s is single-end, but R1&R2 given" + self.include_error(error_text % str(row[index_sampleID]), s_name) + row_complete = False + if row_complete: + if row[index_fastq_r1] is not None: + # TODO: move these keys to configuration.json + sample_file_dict[s_name]["sequence_file_R1_fastq"] = row[ + index_fastq_r1 + ].strip() + if row[index_fastq_r2] is not None: + sample_file_dict[s_name]["sequence_file_R2_fastq"] = row[ + index_fastq_r2 + ].strip() + else: + log_text = "Fastq_R1 not defined in Metadata for sample %s" + stderr.print(f"[red]{str(log_text % s_name)}") + self.include_error(entry=str(log_text % s_name), sample=s_name) + del sample_file_dict[s_name] + else: + self.include_warning(entry=f"Row {counter} skipped. No sample ID given") + # Remove duplicated files + clean_sample_dict = self.remove_duplicated_values(sample_file_dict) + return clean_sample_dict + + def get_metadata_file(self, remote_folder, local_folder): + """Check if the metadata file exists + + Args: + remote_folder (str): path to the folder in remote repository + local_folder (str): path to the local folder + + Raises: + FileNotFoundError: If missing metadata excel file or merging error. + + Returns: + local_meta_file: Path to downloaded metadata file / merged metadata file. + """ + remote_files_list = self.relecov_sftp.get_file_list(remote_folder) + meta_files = [fi for fi in remote_files_list if fi.endswith(".xlsx")] + + def download_remote_metafile(target_meta_file): + local_meta_file = os.path.join( + local_folder, os.path.basename(target_meta_file) + ) + try: + self.relecov_sftp.get_from_sftp(target_meta_file, local_meta_file) + except (IOError, PermissionError) as e: + raise type(e)(f"[red]Unable to fetch metadata file {e}") + log.info( + "Obtained metadata file %s from %s", + local_meta_file, + remote_folder, + ) + return local_meta_file + + if not meta_files: + raise FileNotFoundError(f"Missing metadata file for {remote_folder}") + os.makedirs(self.platform_storage_folder, exist_ok=True) + if len(meta_files) > 1: + # Merging multiple excel files into a single one + log.warning(f"[yellow]Merging multiple metadata files in {remote_folder}") + metadata_ws = self.metadata_processing.get("excel_sheet") + header_flag = self.metadata_processing.get("header_flag") + local_meta_list = [] + for remote_metafile in meta_files: + local_meta_file = download_remote_metafile(remote_metafile) + local_meta_list.append(local_meta_file) + meta_df_list = [] + for loc_meta in local_meta_list: + try: + loc_meta_df = self.excel_to_df(loc_meta, metadata_ws, header_flag) + meta_df_list.append(loc_meta_df) + except (ParserError, EmptyDataError, MetadataError, KeyError) as e: + error_text = f"Could not process {os.path.basename(loc_meta)}: {e}" + self.include_error(error_text) + os.remove(loc_meta) + if meta_df_list: + merged_df = meta_df_list[0] + else: + raise MetadataError("No single metadata file could be merged") + for meta_df in meta_df_list[1:]: + merged_df = self.merge_metadata(metadata_ws, merged_df, meta_df) + folder_name = os.path.dirname(local_meta_file) + excel_name = str(folder_name.split("/")[-1]) + "merged_metadata.xlsx" + merged_excel_path = os.path.join(folder_name, excel_name) + pd_writer = ExcelWriter(merged_excel_path, engine="xlsxwriter") + for sheet in merged_df.keys(): + format_sheet = merged_df[sheet].astype(str) + format_sheet.replace("nan", None, inplace=True) + format_sheet.to_excel(pd_writer, sheet_name=sheet, index=False) + pd_writer.close() + local_meta_file = merged_excel_path + return merged_excel_path + else: + target_meta_file = meta_files[0] + local_meta_file = download_remote_metafile(target_meta_file) + return local_meta_file + + def validate_remote_files(self, remote_folder, local_folder): + """Check if the files in the remote folder are the ones defined in metadata file + + Args: + remote_folder (str): Name of remote folder being validated + local_folder (str): Name of folder where files are being downloaded + + Raises: + FileNotFoundError: If none of the files in remote folder are valid + + Returns: + sample_files_dict (dict): same structure as self.get_sample_fastq_file_names + local_meta_file (str): location of downloaded metadata excel file + """ + local_meta_file = self.get_metadata_file(remote_folder, local_folder) + out_folder = os.path.dirname(local_meta_file) + allowed_extensions = self.allowed_file_ext + remote_files_list = [ + os.path.basename(file) + for file in self.relecov_sftp.get_file_list(remote_folder) + ] + filtered_files_list = sorted( + [fi for fi in remote_files_list if fi.endswith(tuple(allowed_extensions))] + ) + sample_files_dict = self.get_sample_fastq_file_names( + out_folder, local_meta_file + ) + # Include the samples in the process log summary + for sample in sample_files_dict.keys(): + self.include_new_key(sample=sample) + metafiles_list = sorted( + sum([list(fi.values()) for _, fi in sample_files_dict.items()], []) + ) + if sorted(filtered_files_list) == sorted(metafiles_list): + log.info("Files in %s match with metadata file", remote_folder) + else: + log_text = "Some files in %s do not match the ones described in metadata" + log.warning(log_text % remote_folder) + stderr.print(f"[gold1]{log_text % remote_folder}") + set_list = set(metafiles_list) + mismatch_files = [fi for fi in filtered_files_list if fi not in set_list] + mismatch_rev = [fi for fi in set_list if fi not in filtered_files_list] + + if mismatch_files: + error_text1 = "Files in folder missing in metadata %s" + self.include_warning(error_text1 % str(mismatch_files)) + if mismatch_rev: + error_text2 = "Files in metadata missing in folder %s" + self.include_warning(error_text2 % str(mismatch_rev)) + # Try to check if the metadata filename lacks the proper extension + log.info("Trying to match files without proper file extension") + sample_files_dict = self.process_filedict( + sample_files_dict, filtered_files_list + ) + if not any(value for value in sample_files_dict.values()): + raise FileNotFoundError( + "No files from metadata found in %s" % remote_folder + ) + stderr.print("[blue]Finished validating files based on metadata") + return sample_files_dict, local_meta_file + + def delete_remote_files(self, remote_folder, files=None, skip_seqs=False): + """Delete files from remote folder + + Args: + remote_folder (str): path to folder in remote repository + files (list(str), optional): list of target filenames in remote repository + skip_seqs (bool, optional): Skip sequencing files based on extension + """ + stderr.print(f"[blue]Deleting files in remote {remote_folder}...") + if files is None: + files_to_remove = self.relecov_sftp.get_file_list(remote_folder) + else: + files_to_remove = files + if any(file.endswith(tuple(self.allowed_file_ext)) for file in files_to_remove): + if skip_seqs is True: + log_text = f"Folder {remote_folder} has sequencing files. Not removed." + self.include_warning(log_text) + return + for file in files_to_remove: + try: + self.relecov_sftp.remove_file( + os.path.join(remote_folder, os.path.basename(file)) + ) + log.info("%s Deleted from remote server", file) + except (IOError, PermissionError) as e: + self.include_warning(f"Could not delete remote file {file}: {e}") + stderr.print(f"Could not delete remote file {file}. Error: {e}") + return + + def delete_remote_folder(self, remote_folder): + """Delete a folder from remote sftp, check if it is empty or not first. + + Args: + remote_folder (str): path to folder in remote repository + """ + + def remove_client_dir(remote_folder): + # Never remove a folder in the top level + if len(remote_folder.replace("./", "").split("/")) >= 2: + log.info("Trying to remove %s", remote_folder) + try: + self.relecov_sftp.remove_dir(remote_folder) + log.info("Successfully removed %s", remote_folder) + except (OSError, PermissionError) as e: + log_text = f"Could not delete remote {remote_folder}. Error: {e}" + self.include_warning(log_text) + stderr.print(log_text) + else: + log.info("%s is a top-level folder. Not removed", remote_folder) + + remote_folder_files = self.relecov_sftp.get_file_list(remote_folder) + if remote_folder_files: + log_text = f"Remote folder {remote_folder} not empty. Not removed" + self.include_warning(log_text) + else: + remove_client_dir(remote_folder) + return + + def move_processing_fastqs(self, folders_with_metadata): + """Gather all the files from any subfolder into a processing folder + + Args: + folders_with_metadata (dict(str:list)): Dictionary updated from merge_md5sums() + + Returns: + folders_with_metadata (dict(str:list)): Same dict updated with files successfully moved + """ + log.info("Moving remote files to each temporal processing folder") + stderr.print("[blue]Moving remote files to each temporal processing folder") + for folder, files in folders_with_metadata.items(): + self.current_folder = folder.split("/")[0] + successful_files = [] + for file in files: + if not file.endswith(tuple(self.allowed_file_ext)): + continue + file_dest = os.path.join(folder, os.path.basename(file)) + try: + # Paramiko.SSHClient.sftp_open does not have a method to copy files + self.relecov_sftp.rename_file(file, file_dest) + successful_files.append(file_dest) + except OSError: + if file in folders_with_metadata[folder]: + error_text = "File named %s already in %s. Skipped" + self.include_warning(error_text % (file, self.current_folder)) + else: + error_text = "Error while moving file %s" + self.include_error(error_text % file) + folders_with_metadata[folder] = successful_files + return folders_with_metadata + + def merge_md5sums(self, folders_with_metadata): + """Download the md5sums for each folder, merge them into a single one, + upload them to the remote processing folder. + + Args: + folders_with_metadata (dict(str:list)): Dictionary with remote folders + and their files. All subfolder filenames are merged into a single key. + + Raises: + FileNotFoundError: If no md5sum file is found in the folder + + Returns: + folders_with_metadata: Same dict updated with the merged md5sum file + """ + output_location = self.platform_storage_folder + + # TODO: Include this function in relecov_tools.utils + def md5_merger(md5_filelist, avoid_chars=None): + """Merge all md5 files from a given list into a single multi-line md5sum""" + md5dict_list = [] + for md5sum in md5_filelist: + hash_dict = relecov_tools.utils.read_md5_checksum(md5sum, avoid_chars) + if hash_dict: + md5dict_list.append(hash_dict) + # Sort hashes and files back to the original order. + merged_md5 = { + md5: file for mdict in md5dict_list for file, md5 in mdict.items() + } + return merged_md5 + + def md5_handler(md5sumlist, output_location): + """Download all the remote md5sum files in a list, merge them + into a single md5checksum and upload it back to sftp""" + downloaded_md5files = [] + for md5sum in md5sumlist: + md5_name = "_".join([token_hex(nbytes=12), "md5_temp.md5"]) + fetched_md5 = os.path.join(output_location, md5_name) + if self.relecov_sftp.get_from_sftp( + file=md5sum, destination=fetched_md5 + ): + downloaded_md5files.append(fetched_md5) + merged_md5 = md5_merger(downloaded_md5files, self.avoidable_characters) + if merged_md5: + merged_name = "_".join([folder.split("/")[0], "md5sum.md5"]) + merged_md5_path = os.path.join(output_location, merged_name) + with open(merged_md5_path, "w") as md5out: + write_md5 = csv_writer(md5out, delimiter="\t") + write_md5.writerows(merged_md5.items()) + md5_dest = os.path.join(folder, os.path.basename(merged_md5_path)) + self.relecov_sftp.upload_file(merged_md5_path, md5_dest) + # Remove local files once merged and uploaded + os.remove(merged_md5_path) + [os.remove(md5_file) for md5_file in downloaded_md5files] + return md5_dest + else: + error_text = "No md5sum could be processed in remote folder" + raise FileNotFoundError(error_text) + + for folder, files in folders_with_metadata.items(): + self.current_folder = folder.split("/")[0] + log.info("Merging md5sum files from %s...", self.current_folder) + stderr.print(f"[blue]Merging md5sum files from {self.current_folder}...") + md5flags = [".md5", "md5sum", "md5checksum"] + md5sumlist = [fi for fi in files if any(flag in fi for flag in md5flags)] + if not md5sumlist: + error_text = "No md5sum could be found in remote folder %s" + stderr.print(f"[yellow]{error_text % folder}") + self.include_warning(error_text) + continue + folders_with_metadata[folder] = [fi for fi in files if fi not in md5sumlist] + try: + uploaded_md5 = md5_handler(md5sumlist, output_location) + except (FileNotFoundError, OSError, PermissionError, CsvError) as e: + error_text = "Could not merge md5files for %s. Reason: %s" + stderr.print(f"[yellow]{error_text % (self.current_folder, str(e))}") + self.include_warning(error_text % (self.current_folder, str(e))) + continue + if uploaded_md5: + folders_with_metadata[folder].append(uploaded_md5) + + return folders_with_metadata + + def merge_metadata(self, meta_sheet=None, *metadata_tables): + """Merge a variable number of metadata dataframes to the first one. Merge them + only into a certain sheet from a multi-sheet excel file if sheetname is given. + + Args: + meta_sheet (str): Name of the sheet containing metadata in excel file + *metadata_tables (list(pandas.DataFrame)): Dataframes to be merged + + Returns: + merged_df (pandas.DataFrame): A merged dataframe from the given tables + """ + for idx, table in enumerate(metadata_tables): + if idx == 0: + merged_df = table + continue + if meta_sheet: + merged_df[meta_sheet] = concat( + [merged_df[meta_sheet], table[meta_sheet]], ignore_index=True + ) + else: + merged_df = concat([merged_df, table], ignore_index=True) + return merged_df + + def excel_to_df(self, excel_file, metadata_sheet, header_flag): + """Read an excel file, return a dict with a dataframe for each sheet in it. + Process the given sheet with metadata, removing all rows until header is found + + Args: + excel_file (str): Path to the local excel file with metadata + metadata_sheet (str): Name of the sheet containing metadata in excel file + header_flag (str): Name of one of the columns from the metadata header + + Raises: + MetadataError: If no header could be found matching header flag + + Returns: + excel_df (dict(str:pandas.DataFrame)): Dict {name_of_excel_sheet:DataFrame} + containing all sheets in the excel file as pandas dataframes. + """ + # Get every sheet from the first excel file + excel_df = read_excel(excel_file, dtype=str, sheet_name=None) + meta_df = excel_df[metadata_sheet] + if header_flag in meta_df.columns: + return excel_df + header_row = None + for idx in range(len(meta_df)): + if any(meta_df.loc[idx, x] == header_flag for x in meta_df.columns): + header_row = idx + meta_df.columns = meta_df.iloc[header_row] + excel_df[metadata_sheet] = meta_df.drop(meta_df.index[: (header_row + 1)]) + excel_df[metadata_sheet] = excel_df[metadata_sheet].reset_index(drop=True) + return excel_df + + def merge_subfolders(self, target_folders): + """For each first-level folder in the sftp, merge all the subfolders within + it in a single one called '*_tmp_processing' by moving all the fastq files from + them. Merge the metadata excel and md5 files from each subfolder too. + + Args: + target_folders (dict(str:list)): Dictionary with folders and their files + + Returns: + clean_target_folders (dict(str:list)): Dict with '*_tmp_processing' folders + and their content. All subfolder filenames are merged into a single key. + """ + metadata_ws = self.metadata_processing.get("excel_sheet") + header_flag = self.metadata_processing.get("header_flag") + output_location = self.platform_storage_folder + date_and_time = datetime.today().strftime("%Y%m%d%-H%M%S") + exts = self.allowed_file_ext + + def upload_merged_df(merged_excel_path, last_main_folder, merged_df): + """Upload metadata dataframe merged from all subfolders back to sftp""" + self.relecov_sftp.make_dir(last_main_folder) + pd_writer = ExcelWriter(merged_excel_path, engine="xlsxwriter") + for sheet in merged_df.keys(): + format_sheet = merged_df[sheet].astype(str) + format_sheet.replace("nan", None, inplace=True) + format_sheet.to_excel(pd_writer, sheet_name=sheet, index=False) + pd_writer.close() + dest = os.path.join(last_main_folder, os.path.basename(merged_excel_path)) + self.relecov_sftp.upload_file(merged_excel_path, dest) + os.remove(merged_excel_path) + return + + def pre_validate_folder(folder, folder_files): + """Check if remote folder has sequencing files and a valid metadata file""" + if not any(file.endswith(tuple(exts)) for file in folder_files): + error_text = "Remote folder %s skipped. No sequencing files found." + self.include_error(error_text % folder) + return + try: + downloaded_metadata = self.get_metadata_file(folder, output_location) + except (FileNotFoundError, OSError, PermissionError, MetadataError) as err: + error_text = "Remote folder %s skipped. Reason: %s" + self.include_error(error_text % (folder, err)) + return + try: + self.read_metadata_file(downloaded_metadata, return_data=False) + except (MetadataError, KeyError) as excel_error: + error_text = f"Folder {self.current_folder} skipped: %s" + os.remove(downloaded_metadata) + self.include_error(error_text % excel_error) + return + return downloaded_metadata + + folders_with_metadata = {} + processed_folders = [] + merged_df = merged_excel_path = last_main_folder = excel_name = None + log.info("Setting %s remote folders...", str(len(target_folders.keys()))) + stderr.print(f"[blue]Setting {len(target_folders.keys())} remote folders...") + for folder in sorted(target_folders.keys()): + self.current_folder = folder + # Include the folder in the final process log summary + self.include_new_key() + downloaded_metadata = pre_validate_folder(folder, target_folders[folder]) + if not downloaded_metadata: + continue + # Create a temporal name to avoid duplicated filenames + meta_filename = "_".join([folder.split("/")[-1], "metadata_temp.xlsx"]) + local_meta = os.path.join(output_location, meta_filename) + os.rename(downloaded_metadata, local_meta) + + # Taking the main folder for each lab as reference for merge and logs + main_folder = folder.split("/")[0] + self.current_folder = main_folder + temporal_foldername = "_".join([date_and_time, "tmp_processing"]) + temp_folder = os.path.join(main_folder, temporal_foldername) + # Get every file except the excel ones as they are going to be merged + filelist = [fi for fi in target_folders[folder] if not fi.endswith(".xlsx")] + if not folders_with_metadata.get(temp_folder): + log_text = "Trying to merge metadata from %s in %s" + log.info(log_text % (main_folder, temp_folder)) + stderr.print(f"[blue]{log_text % (main_folder, temp_folder)}") + if merged_df: + # Write the previous merged metadata df before overriding it + try: + upload_merged_df(merged_excel_path, last_main_folder, merged_df) + folders_with_metadata[last_main_folder].append(excel_name) + except OSError: + error_text = "Error uploading merged metadata back to sftp: %s" + self.include_error(error_text % last_main_folder) + del folders_with_metadata[last_main_folder] + try: + merged_df = self.excel_to_df(local_meta, metadata_ws, header_flag) + except (ParserError, EmptyDataError, MetadataError, KeyError) as e: + meta_name = os.path.basename(downloaded_metadata) + error_text = "%s skipped. Error while processing excel %s: %s" + self.include_error(error_text % (main_folder, meta_name, str(e))) + os.remove(local_meta) + continue + folders_with_metadata[temp_folder] = [] + folders_with_metadata[temp_folder].extend(filelist) + # rename metadata file to avoid filename duplications + excel_name = "_".join([folder.split("/")[0], "merged_metadata.xlsx"]) + merged_excel_path = os.path.join(output_location, excel_name) + os.rename(local_meta, merged_excel_path) + # Keep a track of the main_folder for next iteration + last_main_folder = temp_folder + else: + # If temp_folder has subfolders in it, merge everything + folders_with_metadata[temp_folder].extend(filelist) + new_df = self.excel_to_df(local_meta, metadata_ws, header_flag) + merged_df = self.merge_metadata(metadata_ws, merged_df, new_df) + os.remove(local_meta) + processed_folders.append(folder) + # End of loop + + # Write last dataframe to file once loop is finished + if folders_with_metadata.get(last_main_folder): + if excel_name not in folders_with_metadata[last_main_folder]: + upload_merged_df(merged_excel_path, last_main_folder, merged_df) + folders_with_metadata[last_main_folder].append(excel_name) + + # Merge md5files and upload them to tmp_processing folder + merged_md5_folders = self.merge_md5sums(folders_with_metadata) + # Move all the files from each subfolder into its tmp_processing folder + clean_target_folders = self.move_processing_fastqs(merged_md5_folders) + log_text = "Remote folders merged into %s folders. Proceed with processing" + log.info(log_text % len(clean_target_folders.keys())) + stderr.print(f"[green]{log_text % len(clean_target_folders.keys())}") + return clean_target_folders, processed_folders + + def select_target_folders(self): + """Find the selected folders in remote if given, else select every folder + + Returns: + folders_to_process (dict(str:list)): Dictionary with folders and their files + """ + root_directory_list = self.relecov_sftp.list_remote_folders(".", recursive=True) + clean_root_list = [folder.replace("./", "") for folder in root_directory_list] + if not root_directory_list: + log.error("Error while listing folders in remote. Aborting") + sys.exit(1) + if self.target_folders is None: + target_folders = clean_root_list + elif self.target_folders[0] == "ALL": + log.info("Showing folders from remote SFTP for user selection") + target_folders = relecov_tools.utils.prompt_checkbox( + msg="Select the folders that will be targeted", + choices=sorted(clean_root_list), + ) + else: + target_folders = [tf for tf in self.target_folders if tf in clean_root_list] + if not target_folders: + log.error("No remote folders matching selection %s", self.target_folders) + stderr.print("Found no remote folders matching selection") + stderr.print(f"List of remote folders: {str(clean_root_list)}") + sys.exit(1) + folders_to_process = {} + for targeted_folder in target_folders: + try: + full_folders = self.relecov_sftp.list_remote_folders( + targeted_folder, recursive=True + ) + except (FileNotFoundError, OSError) as e: + log.error(f"Error during sftp listing. {targeted_folder} skipped:", e) + continue + for folder in full_folders: + list_files = self.relecov_sftp.get_file_list(folder) + if list_files: + folders_to_process[folder] = list_files + else: + log.info("%s is empty", folder) + continue + if len(folders_to_process) == 0: + log.info("Exiting process, folders were empty.") + log.error("There are no files in the selected folders.") + self.relecov_sftp.close_connection() + sys.exit(0) + return folders_to_process + + def compress_and_update(self, fetched_files, files_to_compress, local_folder): + """compress the given list of files_to_compress and update files_list + + Args: + fetched_files (list(str)): list of all downloaded files + files_to_compress (list(str)): list of files that are uncompressed + + Returns: + fetched_files(list(str)): files list including the new compressed files + """ + compressed_files = list() + for file in files_to_compress: + f_path = os.path.join(local_folder, file) + compressed = relecov_tools.utils.compress_file(f_path) + if not compressed: + error_text = "Could not compress file %s, file not found" % str(file) + self.include_error(error_text, f_path) + continue + # Remove file after compression is completed + compressed_files.append(file) + try: + os.remove(f_path) + except (FileNotFoundError, PermissionError) as e: + log.warning(f"Could not delete file: {e}") + fetched_files = [ + (fi + ".gz" if fi in compressed_files else fi) for fi in fetched_files + ] + return fetched_files + + def process_filedict( + self, valid_filedict, clean_fetchlist, corrupted=[], md5miss=[] + ): + """Process the dictionary from validate_remote_files() to update filenames + and remove samples that failed any validation process. + + Args: + valid_filedict (dict{str:str}): same structure as validate_remote_files() + clean_fetchlist (list(str)): List of files that passed validation process + processed (bool): Indicates if filedict has been processed previously + + Returns: + processed(dict{str:str}): Updated valid_filedict + """ + processed_dict = {} + error_text = "md5 mismatch for %s" + warning_text = "File %s not found in md5sum. Creating hash" + for sample, vals in valid_filedict.items(): + processed_dict[sample] = {} + for key, val in vals.items(): + processed_dict[sample][key] = None + if val in corrupted: + self.include_error(error_text % val, sample=sample) + if val in md5miss: + self.include_warning(warning_text % val, sample=sample) + for file in clean_fetchlist: + if val in file: + processed_dict[sample][key] = file + # remove sample if it has missing files + if not all(x in clean_fetchlist for x in processed_dict[sample].values()): + if not corrupted: + error_text = "Sample %s skipped: missing files in sftp" + self.include_error(str(error_text % sample), sample=sample) + log.error(str(error_text % sample)) + del processed_dict[sample] + return processed_dict + + def download(self, target_folders, option="download"): + """Manages all the different functions to download files, verify their + integrity and create initial json with filepaths and md5 hashes + + Args: + target_folders (dict): dictionary + option (str, optional): Download option. Defaults to "download". + """ + log.info("Initiating download process") + main_folder = self.platform_storage_folder + try: + os.makedirs(main_folder, exist_ok=True) + except OSError as e: + log.error("You do not have permissions to create folder %s", e) + sys.exit(1) + folders_to_download = target_folders + for folder in folders_to_download.keys(): + self.current_folder = folder.split("/")[0] + # Close previously open connection to avoid timeouts + try: + self.relecov_sftp.close_connection() + except paramiko.ssh_exception.NoValidConnectionsError: + pass + # Check if the connection has been closed due to time limit + self.relecov_sftp.open_connection() + log.info("Processing folder %s", folder) + stderr.print("[blue]Processing folder " + folder) + # Validate that the files are the ones described in metadata. + + local_folder = self.create_local_folder(folder) + try: + valid_filedict, meta_file = self.validate_remote_files( + folder, local_folder + ) + except (FileNotFoundError, IOError, PermissionError, MetadataError) as fail: + log.error(fail) + stderr.print(f"[red]{fail}, skipped") + self.include_error(fail) + continue + # Get the files in each folder + files_to_download = [ + fi for vals in valid_filedict.values() for fi in vals.values() + ] + fetched_files = self.get_remote_folder_files( + folder, local_folder, files_to_download + ) + if not fetched_files: + error_text = "No files could be downloaded in folder %s" % str(folder) + stderr.print(f"{error_text}") + self.include_error(error_text) + continue + log.info("Finished download for folder: %s", folder) + stderr.print(f"Finished download for folder {folder}") + remote_md5sum = self.find_remote_md5sum(folder) + if remote_md5sum: + # Get the md5checksum to validate integrity of files after download + fetched_md5 = os.path.join( + local_folder, os.path.basename(remote_md5sum) + ) + self.relecov_sftp.get_from_sftp( + file=remote_md5sum, destination=fetched_md5 + ) + successful_files, corrupted = self.verify_md5_checksum( + local_folder, fetched_files, fetched_md5 + ) + # try to download the files again to discard errors during download + if corrupted: + stderr.print("[gold1]Found md5 mismatches, downloading again...") + self.get_remote_folder_files(folder, local_folder, corrupted) + saved_files, corrupted = self.verify_md5_checksum( + local_folder, corrupted, fetched_md5 + ) + if saved_files: + successful_files.extend(saved_files) + if corrupted: + corr_fold = os.path.join(local_folder, "corrupted") + os.mkdir(corr_fold) + error_text = "Found corrupted files: %s. Moved to: %s" + stderr.print(f"[red]{error_text % (str(corrupted), corr_fold)}") + self.include_warning(error_text % (str(corrupted), corr_fold)) + for corr_file in corrupted: + path = os.path.join(local_folder, corr_file) + try: + os.rename(path, os.path.join(corr_fold, corr_file)) + except (FileNotFoundError, PermissionError, OSError) as e: + error_text = ( + "Could not move corrupted file %s to %s: %s" + ) + log.error(error_text % (path, corr_fold, e)) + stderr.print( + f"[red]{error_text % (path, corr_fold, e)}" + ) + if self.abort_if_md5_mismatch: + error_text = "Stop processing %s due to corrupted files." + stderr.print(f"[red]{error_text % folder}") + self.include_error(error_text % "folder") + relecov_tools.utils.delete_local_folder(local_folder) + continue + hash_dict = relecov_tools.utils.read_md5_checksum( + fetched_md5, self.avoidable_characters + ) + log.info("Finished md5 check for folder: %s", folder) + stderr.print(f"[blue]Finished md5 verification for folder {folder}") + else: + corrupted = [] + error_text = "No single md5sum file could be found in %s" % folder + stderr.print(f"[red]{error_text}") + self.include_warning(error_text) + + clean_fetchlist = [ + fi for fi in fetched_files if fi.endswith(tuple(self.allowed_file_ext)) + ] + + clean_fetchlist = [fi for fi in clean_fetchlist if fi not in corrupted] + # Checking for uncompressed files + files_to_compress = [fi for fi in clean_fetchlist if not fi.endswith(".gz")] + if files_to_compress: + comp_files = str(len(files_to_compress)) + log.info("Found %s uncompressed files, compressing...", comp_files) + stderr.print(f"Found {comp_files} uncompressed files, compressing...") + clean_fetchlist = self.compress_and_update( + clean_fetchlist, files_to_compress, local_folder + ) + clean_pathlist = [os.path.join(local_folder, fi) for fi in clean_fetchlist] + not_md5sum = [] + if remote_md5sum: + # Get hashes from provided md5sum, create them for those not provided + files_md5_dict = {} + for path in clean_pathlist: + f_name = os.path.basename(path) + if f_name in successful_files: + files_md5_dict[f_name] = hash_dict[f_name] + elif f_name in corrupted: + clean_fetchlist.remove(f_name) + else: + if not str(f_name).rstrip(".gz") in files_to_compress: + error_text = "File %s not found in md5sum. Creating hash" + log.warning(error_text % f_name) + not_md5sum.append(f_name) + else: + log.info("File %s was compressed, creating md5hash", f_name) + files_md5_dict[f_name] = relecov_tools.utils.calculate_md5(path) + else: + md5_hashes = [ + relecov_tools.utils.calculate_md5(path) for path in clean_pathlist + ] + files_md5_dict = dict(zip(clean_fetchlist, md5_hashes)) + + processed_filedict = self.process_filedict( + valid_filedict, clean_fetchlist, corrupted=corrupted, md5miss=not_md5sum + ) + self.create_files_with_metadata_info( + local_folder, processed_filedict, files_md5_dict, meta_file + ) + # If download_option is "download_clean", remove + # sftp folder content after download is finished + if option == "clean": + self.delete_remote_files(folder, files=files_to_download) + self.delete_remote_files(folder, skip_seqs=True) + self.delete_remote_folder(folder) + stderr.print(f"Delete process finished in remote {folder}") + stderr.print(f"[green]Finished processing {folder}") + return + + def include_new_key(self, sample=None): + self.logsum.feed_key(key=self.current_folder, sample=sample) + return + + def include_error(self, entry, sample=None): + self.logsum.add_error(key=self.current_folder, entry=entry, sample=sample) + return + + def include_warning(self, entry, sample=None): + self.logsum.add_warning(key=self.current_folder, entry=entry, sample=sample) + return + + def execute_process(self): + """Executes different processes depending on the download_option""" + if not self.relecov_sftp.open_connection(): + log.error("Unable to establish connection towards sftp server") + stderr.print("[red]Unable to establish sftp connection") + sys.exit(1) + target_folders = self.select_target_folders() + if self.download_option == "delete_only": + log.info("Initiating delete_only process") + processed_folders = target_folders.keys() + for folder in processed_folders: + self.current_folder = folder + self.delete_remote_files(folder) + self.delete_remote_folder(folder) + stderr.print(f"Delete process finished in {folder}") + else: + target_folders, processed_folders = self.merge_subfolders(target_folders) + if self.download_option == "download_only": + self.download(target_folders, option="download") + if self.download_option == "download_clean": + self.download(target_folders, option="clean") + for folder in processed_folders: + self.current_folder = folder + self.delete_remote_files(folder, skip_seqs=True) + self.delete_remote_folder(folder) + stderr.print(f"Delete process finished in {folder}") + + self.relecov_sftp.close_connection() + stderr.print(f"Processed {len(processed_folders)} folders: {processed_folders}") + if self.logsum.logs: + log.info("Printing process summary to %s", self.platform_storage_folder) + self.logsum.create_error_summary(called_module="download") + else: + log.info("Process log summary was empty. Not generated.") + stderr.print("Finished execution") + return diff --git a/relecov_tools/example_data/0.download_module/README b/relecov_tools/example_data/0.download_module/README new file mode 100644 index 00000000..778d3d32 --- /dev/null +++ b/relecov_tools/example_data/0.download_module/README @@ -0,0 +1,6 @@ +To use this module it is needed: +-u sftp user associated to ISCIII +-p sftp password associated to ISCIII sftp user +The sftp server must contain fastq.gz files and the associated excel containting those sample metadata +The ouput is: +sample_data.json containing the md5 info and fastq names for each sample diff --git a/relecov_tools/example_data/1.read_lab_metadata_module/README b/relecov_tools/example_data/1.read_lab_metadata_module/README new file mode 100644 index 00000000..69b88943 --- /dev/null +++ b/relecov_tools/example_data/1.read_lab_metadata_module/README @@ -0,0 +1,8 @@ +This module transforms the metadata_lab.xlsx to a processed_metadata_lab.json +The input for this module is: +-m metadata_lab.xlsx +-s samples_data.json (with md5 and fastq.gz name for each sample) generated by downloaded module +-o output folder where the json file will be generated + +The output of this module is processed_metadata_lab.json + diff --git a/relecov_tools/example_data/1.read_lab_metadata_module/converted_metadata_lab.xlsx b/relecov_tools/example_data/1.read_lab_metadata_module/converted_metadata_lab.xlsx new file mode 100755 index 00000000..be04ecae Binary files /dev/null and b/relecov_tools/example_data/1.read_lab_metadata_module/converted_metadata_lab.xlsx differ diff --git a/relecov_tools/example_data/1.read_lab_metadata_module/sample_data.json b/relecov_tools/example_data/1.read_lab_metadata_module/sample_data.json new file mode 100644 index 00000000..79981d15 --- /dev/null +++ b/relecov_tools/example_data/1.read_lab_metadata_module/sample_data.json @@ -0,0 +1,26 @@ +{ + "210141": { + "fastq_r1_md5": "eab8b05ef27f4f5cba5cddf6ad627de2", + "fastq_r2_md5": "d82a37aa970df2b8bf8f547ca7c18ac8", + "r1_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "sequence_file_R1_fastq": "210141_R1.fastq.gz", + "sequence_file_R2_fastq": "210141_R2.fastq.gz" + }, + "2018086": { + "fastq_r1_md5": "b5242d60471e5a5a97b35531dbbe8c30", + "fastq_r2_md5": "57525c5a1ec992098e652aa01b366d69", + "r1_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "sequence_file_R1_fastq": "2018086_R1.fastq.gz", + "sequence_file_R2_fastq": "2018086_R2.fastq.gz" + }, + "2018102": { + "fastq_r1_md5": "b5242d60471e5a5a97b35531dbbe8c30", + "fastq_r2_md5": "57525c5a1ec992098e652aa01b366d69", + "r1_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "sequence_file_R1_fastq": "2018102_R1.fastq.gz", + "sequence_file_R2_fastq": "2018102_R2.fastq.gz" + } +} diff --git a/relecov_tools/example_data/2.validate_module/README b/relecov_tools/example_data/2.validate_module/README new file mode 100644 index 00000000..e38dc34f --- /dev/null +++ b/relecov_tools/example_data/2.validate_module/README @@ -0,0 +1,7 @@ +This module will validate the processed_metadata_lab_XXXX.json (output of the read_lab_metadata) against the metadata_lab_XXXXX.xlsx +For this module the input is: +- processed_metadata_lab_XXXX.json +- metadata_lab_XXXXX_xlsx +- output folder where the validation errors will be created + +The output of this module will be a series of error in case something is wrong. If there are no mistakes no output is generated. diff --git a/relecov_tools/example_data/2.validate_module/metadata_lab.xlsx b/relecov_tools/example_data/2.validate_module/metadata_lab.xlsx new file mode 100755 index 00000000..be04ecae Binary files /dev/null and b/relecov_tools/example_data/2.validate_module/metadata_lab.xlsx differ diff --git a/relecov_tools/example_data/2.validate_module/processed_metadata_lab.json b/relecov_tools/example_data/2.validate_module/processed_metadata_lab.json new file mode 100644 index 00000000..5b11ba10 --- /dev/null +++ b/relecov_tools/example_data/2.validate_module/processed_metadata_lab.json @@ -0,0 +1,242 @@ +[ + { + "amplicon_protocol": "ARTIC", + "amplicon_version": "", + "analysis_authors": "", + "anatomical_material": "Nasopharyngeal exudate", + "author_submitter": "", + "authors": "Iglesias-Caballero, M. Camarero, S. Molinero Calamita, M. González-Esguevillas, M. Pozo, F. Casas, I. Jiménez, P. Jiménez, M. Zaballos, A. Monzón, S. Varona, S. Juliá, M. Cuesta, I, Moreno, A.", + "biosample_accession_ENA": "", + "collecting_institution": "Hospital Clínico Universitario Virgen de la Arrixaca", + "collecting_institution_address": "Ctra. Madrid-Cartagena, s/n, El Palmar", + "collecting_institution_email": "", + "collecting_lab_sample_id": "16065902", + "collection_device": "", + "collector_name": "unknown", + "common_name": "Severe acute respiratory syndrome", + "design_description": "Design Description", + "diagnostic_pcr_Ct_value_1": "18", + "diagnostic_pcr_Ct_value_2": "", + "enrichment_protocol": "Amplicon", + "environmental_material": "", + "environmental_site": "Swab", + "experiment_alias": "2018086_R1.fastq.gz_2018086_R2.fastq.gz", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1": "2018086_R1.fastq.gz", + "fastq_r1_md5": "eab8b05ef27f4f5cba5cddf6ad627de2", + "fastq_r2": "2018086_R2.fastq.gz", + "fastq_r2_md5": "d82a37aa970df2b8bf8f547ca7c18ac8", + "flowcell_kit": "", + "gene_name_1": "ORF E", + "gene_name_2": "", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "geo_loc_latitude": "37.9861", + "geo_loc_longitude": "-1.1303", + "geo_loc_state": "Murcia", + "gisaid_id": "EPI_ISL_862545", + "host_age": "26", + "host_common_name": "Human", + "host_disease": "COVID-19", + "host_gender": "Male", + "host_scientific_name": "Homo Sapiens", + "host_subject_id": "2018086", + "isolate_sample_id": "2018086", + "library_kit": "", + "library_layout": "Paired", + "library_preparation_kit": "Illumina DNA Prep", + "library_selection": "PCR", + "library_source": "Viral RNA", + "library_strategy": "Amplicon", + "microbiology_lab_sample_id": "2018086", + "number_of_samples_in_run": "", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "purpose_sampling": "Surveillance", + "r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "rna_extraction_protocol": "RT-PCR", + "runID": "MiSeaq_GN_195", + "run_alias": "2018086_R1.fastq.gz_2018086_R2.fastq.gz", + "sample_collection_date": "2020-12-20", + "sample_description": "Sample for surveillance", + "sample_received_date": "2020-12-22", + "sample_storage_conditions": "-80 C", + "sequence_file_R1_fastq": "2018086_R1.fastq.gz", + "sequence_file_R2_fastq": "2018086_R2.fastq.gz", + "sequencing_date": "2021-01-13", + "sequencing_instrument_model": "Illumina MiSeq", + "sequencing_instrument_platform": "Illumina", + "sequencing_sample_id": "2018086", + "study_alias": "RELECOV", + "study_id": "ERP137164", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N,", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "202074288", + "tax_id": "2697049", + "type": "betacoronavirus", + "virus_name": "" + }, + { + "amplicon_protocol": "ARTIC", + "amplicon_version": "", + "analysis_authors": "", + "anatomical_material": "Nasopharyngeal exudate", + "author_submitter": "", + "authors": "Iglesias-Caballero, M. Camarero, S. Molinero Calamita, M. González-Esguevillas, M. Pozo, F. Casas, I. Jiménez, P. Jiménez, M. Zaballos, A. Monzón, S. Varona, S. Juliá, M. Cuesta, I, Moreno, A.", + "biosample_accession_ENA": "", + "collecting_institution": "Hospital Clínico Universitario Virgen de la Arrixaca", + "collecting_institution_address": "Ctra. Madrid-Cartagena, s/n, El Palmar", + "collecting_institution_email": "", + "collecting_lab_sample_id": "11522092", + "collection_device": "", + "collector_name": "unknown", + "common_name": "Severe acute respiratory syndrome", + "design_description": "Design Description", + "diagnostic_pcr_Ct_value_1": "18", + "diagnostic_pcr_Ct_value_2": "", + "enrichment_protocol": "Amplicon", + "environmental_material": "", + "environmental_site": "Swab", + "experiment_alias": "2018102_R1.fastq.gz_2018102_R2.fastq.gz", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1": "2018102_R1.fastq.gz", + "fastq_r1_md5": "b5242d60471e5a5a97b35531dbbe8c30", + "fastq_r2": "2018102_R2.fastq.gz", + "fastq_r2_md5": "57525c5a1ec992098e652aa01b366d69", + "flowcell_kit": "", + "gene_name_1": "ORF E", + "gene_name_2": "", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "geo_loc_latitude": "37.9861", + "geo_loc_longitude": "-1.1303", + "geo_loc_state": "Murcia", + "gisaid_id": "EPI_ISL_862568", + "host_age": "36", + "host_common_name": "Human", + "host_disease": "COVID-19", + "host_gender": "Male", + "host_scientific_name": "Homo Sapiens", + "host_subject_id": "2018102", + "isolate_sample_id": "2018102", + "library_kit": "", + "library_layout": "Paired", + "library_preparation_kit": "Illumina DNA Prep", + "library_selection": "PCR", + "library_source": "Viral RNA", + "library_strategy": "Amplicon", + "microbiology_lab_sample_id": "2018102", + "number_of_samples_in_run": "", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "purpose_sampling": "Surveillance", + "r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "rna_extraction_protocol": "RT-PCR", + "runID": "MiSeaq_GN_195", + "run_alias": "2018102_R1.fastq.gz_2018102_R2.fastq.gz", + "sample_collection_date": "2020-12-22", + "sample_description": "Sample for surveillance", + "sample_received_date": "2020-12-23", + "sample_storage_conditions": "-80 C", + "sequence_file_R1_fastq": "2018102_R1.fastq.gz", + "sequence_file_R2_fastq": "2018102_R2.fastq.gz", + "sequencing_date": "2021-01-13", + "sequencing_instrument_model": "Illumina MiSeq", + "sequencing_instrument_platform": "Illumina", + "sequencing_sample_id": "2018102", + "study_alias": "RELECOV", + "study_id": "ERP137164", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N,", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "202074526", + "tax_id": "2697049", + "type": "betacoronavirus", + "virus_name": "" + }, + { + "amplicon_protocol": "ARTIC", + "amplicon_version": "", + "analysis_authors": "", + "anatomical_material": "Nasopharyngeal exudate", + "author_submitter": "", + "authors": "Iglesias-Caballero, M. Camarero, S. Molinero Calamita, M. González-Esguevillas, M. Pozo, F. Casas, I. Jiménez, P. Jiménez, M. Zaballos, A. Monzón, S. Varona, S. Juliá, M. Cuesta, I. Gutiérrez, G.", + "biosample_accession_ENA": "", + "collecting_institution": "Consejería de Sanidad", + "collecting_institution_address": "Avda. de Francia, 4.", + "collecting_institution_email": "", + "collecting_lab_sample_id": "558955", + "collection_device": "", + "collector_name": "unknown", + "common_name": "Severe acute respiratory syndrome", + "design_description": "Design Description", + "diagnostic_pcr_Ct_value_1": "19", + "diagnostic_pcr_Ct_value_2": "", + "enrichment_protocol": "Amplicon", + "environmental_material": "", + "environmental_site": "Swab", + "experiment_alias": "2018109_R1.fastq.gz_2018109_R2.fastq.gz", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1": "2018109_R1.fastq.gz", + "fastq_r1_md5": "a9dc47d987aeac8896481029c774a8df", + "fastq_r2": "2018109_R2.fastq.gz", + "fastq_r2_md5": "ca9d99833ff7774f058fb66fe0ca2ab3", + "flowcell_kit": "", + "gene_name_1": "ORF E", + "gene_name_2": "", + "geo_loc_city": "Toledo", + "geo_loc_country": "Spain", + "geo_loc_latitude": "39.8567", + "geo_loc_longitude": "-4.0244", + "geo_loc_state": "Castilla la Mancha", + "gisaid_id": "EPI_ISL_862546", + "host_age": "39", + "host_common_name": "Human", + "host_disease": "COVID-19", + "host_gender": "Female", + "host_scientific_name": "Homo Sapiens", + "host_subject_id": "2018109", + "isolate_sample_id": "2018109", + "library_kit": "", + "library_layout": "Paired", + "library_preparation_kit": "Illumina DNA Prep", + "library_selection": "PCR", + "library_source": "Viral RNA", + "library_strategy": "Amplicon", + "microbiology_lab_sample_id": "2018109", + "number_of_samples_in_run": "", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "purpose_sampling": "Surveillance", + "r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "rna_extraction_protocol": "RT-PCR", + "runID": "MiSeaq_GN_195", + "run_alias": "2018109_R1.fastq.gz_2018109_R2.fastq.gz", + "sample_collection_date": "2020-12-21", + "sample_description": "Sample for surveillance", + "sample_received_date": "2020-12-23", + "sample_storage_conditions": "-80 C", + "sequence_file_R1_fastq": "2018109_R1.fastq.gz", + "sequence_file_R2_fastq": "2018109_R2.fastq.gz", + "sequencing_date": "2021-01-13", + "sequencing_instrument_model": "Illumina MiSeq", + "sequencing_instrument_platform": "Illumina", + "sequencing_sample_id": "2018109", + "study_alias": "RELECOV", + "study_id": "ERP137164", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N,", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "202074508", + "tax_id": "2697049", + "type": "betacoronavirus", + "virus_name": "" + } +] diff --git a/relecov_tools/example_data/3.map_module/README b/relecov_tools/example_data/3.map_module/README new file mode 100644 index 00000000..a6b8a63a --- /dev/null +++ b/relecov_tools/example_data/3.map_module/README @@ -0,0 +1,7 @@ +This module will map the metadata_lab.json to the selected DB (ENA or GISAID) +This module input is: +-j processed_metadata_lab.json +-d ENA or GISAID +-o output folder where json file will be generated + +The output of this module is processed_metadata_lab_ENA_mapped.json diff --git a/relecov_tools/example_data/3.map_module/processed_metadata_lab.json b/relecov_tools/example_data/3.map_module/processed_metadata_lab.json new file mode 100644 index 00000000..5b11ba10 --- /dev/null +++ b/relecov_tools/example_data/3.map_module/processed_metadata_lab.json @@ -0,0 +1,242 @@ +[ + { + "amplicon_protocol": "ARTIC", + "amplicon_version": "", + "analysis_authors": "", + "anatomical_material": "Nasopharyngeal exudate", + "author_submitter": "", + "authors": "Iglesias-Caballero, M. Camarero, S. Molinero Calamita, M. González-Esguevillas, M. Pozo, F. Casas, I. Jiménez, P. Jiménez, M. Zaballos, A. Monzón, S. Varona, S. Juliá, M. Cuesta, I, Moreno, A.", + "biosample_accession_ENA": "", + "collecting_institution": "Hospital Clínico Universitario Virgen de la Arrixaca", + "collecting_institution_address": "Ctra. Madrid-Cartagena, s/n, El Palmar", + "collecting_institution_email": "", + "collecting_lab_sample_id": "16065902", + "collection_device": "", + "collector_name": "unknown", + "common_name": "Severe acute respiratory syndrome", + "design_description": "Design Description", + "diagnostic_pcr_Ct_value_1": "18", + "diagnostic_pcr_Ct_value_2": "", + "enrichment_protocol": "Amplicon", + "environmental_material": "", + "environmental_site": "Swab", + "experiment_alias": "2018086_R1.fastq.gz_2018086_R2.fastq.gz", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1": "2018086_R1.fastq.gz", + "fastq_r1_md5": "eab8b05ef27f4f5cba5cddf6ad627de2", + "fastq_r2": "2018086_R2.fastq.gz", + "fastq_r2_md5": "d82a37aa970df2b8bf8f547ca7c18ac8", + "flowcell_kit": "", + "gene_name_1": "ORF E", + "gene_name_2": "", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "geo_loc_latitude": "37.9861", + "geo_loc_longitude": "-1.1303", + "geo_loc_state": "Murcia", + "gisaid_id": "EPI_ISL_862545", + "host_age": "26", + "host_common_name": "Human", + "host_disease": "COVID-19", + "host_gender": "Male", + "host_scientific_name": "Homo Sapiens", + "host_subject_id": "2018086", + "isolate_sample_id": "2018086", + "library_kit": "", + "library_layout": "Paired", + "library_preparation_kit": "Illumina DNA Prep", + "library_selection": "PCR", + "library_source": "Viral RNA", + "library_strategy": "Amplicon", + "microbiology_lab_sample_id": "2018086", + "number_of_samples_in_run": "", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "purpose_sampling": "Surveillance", + "r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "rna_extraction_protocol": "RT-PCR", + "runID": "MiSeaq_GN_195", + "run_alias": "2018086_R1.fastq.gz_2018086_R2.fastq.gz", + "sample_collection_date": "2020-12-20", + "sample_description": "Sample for surveillance", + "sample_received_date": "2020-12-22", + "sample_storage_conditions": "-80 C", + "sequence_file_R1_fastq": "2018086_R1.fastq.gz", + "sequence_file_R2_fastq": "2018086_R2.fastq.gz", + "sequencing_date": "2021-01-13", + "sequencing_instrument_model": "Illumina MiSeq", + "sequencing_instrument_platform": "Illumina", + "sequencing_sample_id": "2018086", + "study_alias": "RELECOV", + "study_id": "ERP137164", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N,", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "202074288", + "tax_id": "2697049", + "type": "betacoronavirus", + "virus_name": "" + }, + { + "amplicon_protocol": "ARTIC", + "amplicon_version": "", + "analysis_authors": "", + "anatomical_material": "Nasopharyngeal exudate", + "author_submitter": "", + "authors": "Iglesias-Caballero, M. Camarero, S. Molinero Calamita, M. González-Esguevillas, M. Pozo, F. Casas, I. Jiménez, P. Jiménez, M. Zaballos, A. Monzón, S. Varona, S. Juliá, M. Cuesta, I, Moreno, A.", + "biosample_accession_ENA": "", + "collecting_institution": "Hospital Clínico Universitario Virgen de la Arrixaca", + "collecting_institution_address": "Ctra. Madrid-Cartagena, s/n, El Palmar", + "collecting_institution_email": "", + "collecting_lab_sample_id": "11522092", + "collection_device": "", + "collector_name": "unknown", + "common_name": "Severe acute respiratory syndrome", + "design_description": "Design Description", + "diagnostic_pcr_Ct_value_1": "18", + "diagnostic_pcr_Ct_value_2": "", + "enrichment_protocol": "Amplicon", + "environmental_material": "", + "environmental_site": "Swab", + "experiment_alias": "2018102_R1.fastq.gz_2018102_R2.fastq.gz", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1": "2018102_R1.fastq.gz", + "fastq_r1_md5": "b5242d60471e5a5a97b35531dbbe8c30", + "fastq_r2": "2018102_R2.fastq.gz", + "fastq_r2_md5": "57525c5a1ec992098e652aa01b366d69", + "flowcell_kit": "", + "gene_name_1": "ORF E", + "gene_name_2": "", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "geo_loc_latitude": "37.9861", + "geo_loc_longitude": "-1.1303", + "geo_loc_state": "Murcia", + "gisaid_id": "EPI_ISL_862568", + "host_age": "36", + "host_common_name": "Human", + "host_disease": "COVID-19", + "host_gender": "Male", + "host_scientific_name": "Homo Sapiens", + "host_subject_id": "2018102", + "isolate_sample_id": "2018102", + "library_kit": "", + "library_layout": "Paired", + "library_preparation_kit": "Illumina DNA Prep", + "library_selection": "PCR", + "library_source": "Viral RNA", + "library_strategy": "Amplicon", + "microbiology_lab_sample_id": "2018102", + "number_of_samples_in_run": "", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "purpose_sampling": "Surveillance", + "r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "rna_extraction_protocol": "RT-PCR", + "runID": "MiSeaq_GN_195", + "run_alias": "2018102_R1.fastq.gz_2018102_R2.fastq.gz", + "sample_collection_date": "2020-12-22", + "sample_description": "Sample for surveillance", + "sample_received_date": "2020-12-23", + "sample_storage_conditions": "-80 C", + "sequence_file_R1_fastq": "2018102_R1.fastq.gz", + "sequence_file_R2_fastq": "2018102_R2.fastq.gz", + "sequencing_date": "2021-01-13", + "sequencing_instrument_model": "Illumina MiSeq", + "sequencing_instrument_platform": "Illumina", + "sequencing_sample_id": "2018102", + "study_alias": "RELECOV", + "study_id": "ERP137164", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N,", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "202074526", + "tax_id": "2697049", + "type": "betacoronavirus", + "virus_name": "" + }, + { + "amplicon_protocol": "ARTIC", + "amplicon_version": "", + "analysis_authors": "", + "anatomical_material": "Nasopharyngeal exudate", + "author_submitter": "", + "authors": "Iglesias-Caballero, M. Camarero, S. Molinero Calamita, M. González-Esguevillas, M. Pozo, F. Casas, I. Jiménez, P. Jiménez, M. Zaballos, A. Monzón, S. Varona, S. Juliá, M. Cuesta, I. Gutiérrez, G.", + "biosample_accession_ENA": "", + "collecting_institution": "Consejería de Sanidad", + "collecting_institution_address": "Avda. de Francia, 4.", + "collecting_institution_email": "", + "collecting_lab_sample_id": "558955", + "collection_device": "", + "collector_name": "unknown", + "common_name": "Severe acute respiratory syndrome", + "design_description": "Design Description", + "diagnostic_pcr_Ct_value_1": "19", + "diagnostic_pcr_Ct_value_2": "", + "enrichment_protocol": "Amplicon", + "environmental_material": "", + "environmental_site": "Swab", + "experiment_alias": "2018109_R1.fastq.gz_2018109_R2.fastq.gz", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1": "2018109_R1.fastq.gz", + "fastq_r1_md5": "a9dc47d987aeac8896481029c774a8df", + "fastq_r2": "2018109_R2.fastq.gz", + "fastq_r2_md5": "ca9d99833ff7774f058fb66fe0ca2ab3", + "flowcell_kit": "", + "gene_name_1": "ORF E", + "gene_name_2": "", + "geo_loc_city": "Toledo", + "geo_loc_country": "Spain", + "geo_loc_latitude": "39.8567", + "geo_loc_longitude": "-4.0244", + "geo_loc_state": "Castilla la Mancha", + "gisaid_id": "EPI_ISL_862546", + "host_age": "39", + "host_common_name": "Human", + "host_disease": "COVID-19", + "host_gender": "Female", + "host_scientific_name": "Homo Sapiens", + "host_subject_id": "2018109", + "isolate_sample_id": "2018109", + "library_kit": "", + "library_layout": "Paired", + "library_preparation_kit": "Illumina DNA Prep", + "library_selection": "PCR", + "library_source": "Viral RNA", + "library_strategy": "Amplicon", + "microbiology_lab_sample_id": "2018109", + "number_of_samples_in_run": "", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "purpose_sampling": "Surveillance", + "r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "rna_extraction_protocol": "RT-PCR", + "runID": "MiSeaq_GN_195", + "run_alias": "2018109_R1.fastq.gz_2018109_R2.fastq.gz", + "sample_collection_date": "2020-12-21", + "sample_description": "Sample for surveillance", + "sample_received_date": "2020-12-23", + "sample_storage_conditions": "-80 C", + "sequence_file_R1_fastq": "2018109_R1.fastq.gz", + "sequence_file_R2_fastq": "2018109_R2.fastq.gz", + "sequencing_date": "2021-01-13", + "sequencing_instrument_model": "Illumina MiSeq", + "sequencing_instrument_platform": "Illumina", + "sequencing_sample_id": "2018109", + "study_alias": "RELECOV", + "study_id": "ERP137164", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N,", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "202074508", + "tax_id": "2697049", + "type": "betacoronavirus", + "virus_name": "" + } +] diff --git a/relecov_tools/example_data/4.ena_upload_module/README b/relecov_tools/example_data/4.ena_upload_module/README new file mode 100644 index 00000000..bf9201b4 --- /dev/null +++ b/relecov_tools/example_data/4.ena_upload_module/README @@ -0,0 +1,8 @@ +This module will upload fastq.gz and metadata associated to ENA +This module input: +- e processed_metadata_mapped_ENA.json +- u ENA Webin user +- p ENA Webin passsword (if special charcters add % in front of the special character) +- c center name (e.g Instituto de Salud Carlos III) +- o output folder where accession xml files will begenerated +--dev (This option for development server TESTS) diff --git a/relecov_tools/example_data/4.ena_upload_module/processed_METADATA_LAB_TEST_2022_04_25_00_03_GISAID_mapped.json b/relecov_tools/example_data/4.ena_upload_module/processed_METADATA_LAB_TEST_2022_04_25_00_03_GISAID_mapped.json new file mode 100644 index 00000000..edf69533 --- /dev/null +++ b/relecov_tools/example_data/4.ena_upload_module/processed_METADATA_LAB_TEST_2022_04_25_00_03_GISAID_mapped.json @@ -0,0 +1,152 @@ +[ + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "P.º Isabel la Católica, 1-3", + "collection_date": "2021/04/13", + "gender": "Mujer", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 55, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222687", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2022" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "P.º Isabel la Católica, 1-3", + "collection_date": "2021/03/31", + "gender": "Hombre", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 59, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222688", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2023" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "P.º Isabel la Católica, 1-3", + "collection_date": "2022/01/13", + "gender": "Mujer", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 8, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222689", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2024" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "P.º Isabel la Católica, 1-3", + "collection_date": "2022/01/11", + "gender": "Mujer", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 9, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222690", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2025" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "P.º Isabel la Católica, 1-3", + "collection_date": "2022/01/10", + "gender": "Hombre", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 31, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222691", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2026" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "P.º Isabel la Católica, 1-3", + "collection_date": "2022/01/12", + "gender": "Mujer", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 27, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222692", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2027" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "S. Juan Bosco, 15", + "collection_date": "2022/01/02", + "gender": "Mujer", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 42, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222693", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2028" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "S. Juan Bosco, 15", + "collection_date": "2022/01/23", + "gender": "Hombre", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 40, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222694", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2029" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "S. Juan Bosco, 15", + "collection_date": "2022/01/18", + "gender": "Hombre", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 53, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222695", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2030" + }, + { + "authors": "Inmaculada Casas,Maria de la Montaña", + "collecting_address": "S. Juan Bosco, 15", + "collection_date": "2022/01/19", + "gender": "Male", + "host": "Homo Sapiens", + "location": "Spain", + "patient_age": 96, + "sequencing_technology": "Illumina MiSeq", + "specimen_source": "Nasopharynx aspirate", + "submitter": "EPI_ISL_11222696", + "submitting_lab": "Instituto de Salud Carlos III ", + "type": "betacoronavirus", + "virus_name": "hCoV-19/Spain/CT-HUJT-RB31776/2031" + } +] \ No newline at end of file diff --git a/relecov_tools/example_data/4.ena_upload_module/processed_METADATA_LAB_TEST_2022_04_28_21_05_ENA_mapped.json b/relecov_tools/example_data/4.ena_upload_module/processed_METADATA_LAB_TEST_2022_04_28_21_05_ENA_mapped.json new file mode 100644 index 00000000..684be7bc --- /dev/null +++ b/relecov_tools/example_data/4.ena_upload_module/processed_METADATA_LAB_TEST_2022_04_28_21_05_ENA_mapped.json @@ -0,0 +1,452 @@ +[ + { + "collecting_institution": "HU Virgen de la Arrixaca", + "collection_date": "2021-04-13", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "f2facbd1d6c7e0972074ced31ccbfd8e", + "fastq_r2_md5": "fd9948be8da7a38b0f80659dfc21aeea", + "file_type": "fastq", + "geographic location (latitude)": "37.9861", + "geographic location (longitude)": "-1.1303", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Female", + "instrument_model": "Illumina MiSeq", + "isolate": "1197860", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_001.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_001.fastq.gz", + "receipt_date": "2021-05-13", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197860", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197860", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_001.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_001.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU Virgen de la Arrixaca", + "collection_date": "2021-03-31", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "9a460a812086516536c1a1023ab5f7fe", + "fastq_r2_md5": "41cd79b8a167d85eb06309207fd120cf", + "file_type": "fastq", + "geographic location (latitude)": "37.9861", + "geographic location (longitude)": "-1.1303", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Male", + "instrument_model": "Illumina MiSeq", + "isolate": "1197824", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_002.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_002.fastq.gz", + "receipt_date": "2021-05-13", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197824", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197824", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_002.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_002.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU Virgen de la Arrixaca", + "collection_date": "2022-01-13", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "c78fcb585a0f44734ce8c2aebf3eff33", + "fastq_r2_md5": "8361d5a6b50279fbc8c93226ecc3568a", + "file_type": "fastq", + "geographic location (latitude)": "37.9861", + "geographic location (longitude)": "-1.1303", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Female", + "instrument_model": "Illumina MiSeq", + "isolate": "1197767", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_003.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_003.fastq.gz", + "receipt_date": "2022-01-18", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197767", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197767", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_003.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_003.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU Virgen de la Arrixaca", + "collection_date": "2022-01-11", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "aa557fc40d3a9e16f470ac595fb5eb3e", + "fastq_r2_md5": "a2dd5e08b75815dab892e392cb8468fc", + "file_type": "fastq", + "geographic location (latitude)": "37.9861", + "geographic location (longitude)": "-1.1303", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Male", + "instrument_model": "Illumina MiSeq", + "isolate": "8328199", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_004.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_004.fastq.gz", + "receipt_date": "2022-01-20", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "8328199", + "sample_storage_conditions": " -80ºC", + "sample_title": "8328199", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_004.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_004.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU de Donostia", + "collection_date": "2022-01-10", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "b0a13671031ef5aa7e0e6559a925a688", + "fastq_r2_md5": "8c420a603c81823e68436003825b1e1f", + "file_type": "fastq", + "geographic location (latitude)": "43.3200", + "geographic location (longitude)": "-1.9800", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Male", + "instrument_model": "Illumina MiSeq", + "isolate": "1197677", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_005.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_005.fastq.gz", + "receipt_date": "2022-01-20", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197677", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197677", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_005.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_005.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU de Donostia", + "collection_date": "2022-01-12", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "13e01498a0526303f5e32f3eef077b5a", + "fastq_r2_md5": "61d1f52e84237320d97fd7fc2da56116", + "file_type": "fastq", + "geographic location (latitude)": "43.3200", + "geographic location (longitude)": "-1.9800", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Male", + "instrument_model": "Illumina MiSeq", + "isolate": "1197678", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_006.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_006.fastq.gz", + "receipt_date": "2022-01-20", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197678", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197678", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_006.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_006.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU de Donostia", + "collection_date": "2022-01-02", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "f5b60a93b39a0b98781efb434d577a57", + "fastq_r2_md5": "e19f883b4e05045895f990c2296009a8", + "file_type": "fastq", + "geographic location (latitude)": "43.3200", + "geographic location (longitude)": "-1.9800", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Male", + "instrument_model": "Illumina MiSeq", + "isolate": "1197737", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_007.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_007.fastq.gz", + "receipt_date": "2022-01-20", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197737", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197737", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_007.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_007.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU de Donostia", + "collection_date": "2022-01-23", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "83043c429b0ab94375236cf7f51860f0", + "fastq_r2_md5": "c00859a1f83e2364cca953fb25da3cf2", + "file_type": "fastq", + "geographic location (latitude)": "43.3200", + "geographic location (longitude)": "-1.9800", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Female", + "instrument_model": "Illumina MiSeq", + "isolate": "1197663", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_008.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_008.fastq.gz", + "receipt_date": "2022-01-25", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197663", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197663", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_008.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_008.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU de Donostia", + "collection_date": "2022-01-18", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "bc720a024263a518f6a6cd9301b308ac", + "fastq_r2_md5": "6bf6d95b59c950691effcaf8a0e83068", + "file_type": "fastq", + "geographic location (latitude)": "43.3200", + "geographic location (longitude)": "-1.9800", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Male", + "instrument_model": "Illumina MiSeq", + "isolate": "1197689", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_009.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_009.fastq.gz", + "receipt_date": "2022-01-26", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197689", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197689", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_009.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_009.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + }, + { + "collecting_institution": "HU de Donostia", + "collection_date": "2022-01-19", + "collector_name": "Maria de la Montaña", + "common_name": "PEPITO", + "design_description": "Design Description", + "experiment_alias": "", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1_md5": "a86f4fa0c92d76df34fd000a29d1cdd3", + "fastq_r2_md5": "8c4309dc00d725496c48bee6da0e4bbf", + "file_type": "fastq", + "geographic location (latitude)": "43.3200", + "geographic location (longitude)": "-1.9800", + "geographic_location_(country_and/or_sea)": "Spain", + "host_common_name": "Human", + "host_scientific_name": "Homo Sapiens", + "host_sex": "Female", + "instrument_model": "Illumina MiSeq", + "isolate": "1197685", + "isolation source host-associated": "Nasopharynx aspirate", + "isolation source non-host-associated": "Other", + "library_layout": "SINGLE", + "library_name": "NEBNext® Fast DNA Library Prep Set for Ion Torrent™", + "library_selection": "RT-PCR", + "library_source": "RNA", + "library_strategy": "WGS", + "platform": "Illumina", + "purpose_sampling": "Whole Genome Sequencing", + "r1_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R1_0010.fastq.gz", + "r2_fastq_filepath": "/tmp/relecov/COD_test_01/20220422ABC123_S1_L001_R2_0010.fastq.gz", + "receipt_date": "2022-01-26", + "run_alias": "", + "sample_description": "Sample for surveillance", + "sample_name": "1197685", + "sample_storage_conditions": " -80ºC", + "sample_title": "1197685", + "scientific_name": "Severe acute respiratory syndrome coronavirus 2", + "sequence_file_R1_fastq": "ABC123_S1_L001_R1_0010.fastq.gz", + "sequence_file_R2_fastq": "ABC123_S1_L001_R2_0010.fastq.gz", + "study_abstract": "RELECOV Spanish Network for genomics surveillance", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "taxon_id": "2697049" + } +] \ No newline at end of file diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/README b/relecov_tools/example_data/5.read_bioinfo_metadata/README new file mode 100644 index 00000000..6f19d7d5 --- /dev/null +++ b/relecov_tools/example_data/5.read_bioinfo_metadata/README @@ -0,0 +1,16 @@ + +This module will generate a file name bioinfo_metadata.json with information to be uploaded to RELECOV DB +The input for this module are the following: +-m metadata_lab.xlsx +-i Input folder where all the fastq.gz, .md5, .vcf.gz, .consensus.fa, pangolin.csv are for each R1 and R2 +-o Output folder where the bioinfo_metadata.json will be generated +-p Exact name of the mapping_illumina.csv file (It should include the date at the end of the file name as follows mapping_illumina_20220624.csv) + +The input folder must include the following files named as stated here: +- summary_variants_metrics_mqc.csv +- mapping_illumina_XXXXXXXX.csv +- variants_long_table.csv +- consensus_genome_length.csv +- software_versions.yml +- pangolin_version.csv +- md5_check_file.csv diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/consensus_genome_length.csv b/relecov_tools/example_data/5.read_bioinfo_metadata/consensus_genome_length.csv new file mode 100755 index 00000000..7b348648 --- /dev/null +++ b/relecov_tools/example_data/5.read_bioinfo_metadata/consensus_genome_length.csv @@ -0,0 +1,91 @@ +30416 +30416 +30422 +30422 +30422 +30422 +30422 +30422 +30422 +30422 +30422 +30419 +30422 +30422 +30422 +30397 +30407 +30422 +30422 +30416 +30407 +30397 +30407 +30407 +30422 +30413 +30407 +30407 +30407 +30407 +30407 +30404 +30407 +30416 +30407 +30407 +30407 +30407 +30422 +30394 +30407 +30407 +30407 +30413 +30407 +30407 +30407 +30415 +30407 +30407 +30407 +30407 +30407 +30413 +30407 +30407 +30415 +30407 +30407 +30407 +30407 +30407 +30407 +30407 +30407 +30413 +30407 +30407 +30416 +30416 +30397 +30407 +30407 +30407 +30413 +30407 +30407 +30409 +30407 +30413 +30407 +30406 +30407 +30401 +30416 +30407 +30407 +30407 +30421 +30407 +30407 diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/lab_metadata_20220208.xlsx b/relecov_tools/example_data/5.read_bioinfo_metadata/lab_metadata_20220208.xlsx new file mode 100755 index 00000000..7725cae7 Binary files /dev/null and b/relecov_tools/example_data/5.read_bioinfo_metadata/lab_metadata_20220208.xlsx differ diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/mapping_illumina.tab b/relecov_tools/example_data/5.read_bioinfo_metadata/mapping_illumina.tab new file mode 100755 index 00000000..29efa472 --- /dev/null +++ b/relecov_tools/example_data/5.read_bioinfo_metadata/mapping_illumina.tab @@ -0,0 +1,97 @@ +run user host Virussequence sample totalreads readshostR1 readshost %readshost readsvirus %readsvirus unmappedreads %unmapedreads medianDPcoveragevirus Coverage>10x(%) Variantsinconsensusx10 MissenseVariants %Ns10x Lineage +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214821 285604 1545 3090 1,08 281428 98,54 1086 0,380247 287 92,0 19 9 8,44 B.1.177 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214822 306868 97177 194354 63,33 61642 20,09 50872 16,5778 6 41,0 8 3 58,83 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214823 196898 57130 114260 58,03 204 0,1 82434 41,8663 NA NA 0 0 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214824 248 41 82 33,06 138 55,65 28 11,2903 NA NA 0 0 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214825 277306 747 1494 0,54 271052 97,74 4760 1,71652 217 91,0 17 9 9,10 B.1.177 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214826 337052 415 830 0,25 334627 99,28 1595 0,473221 239 92,0 17 9 7,87 B.1.177 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214827 246530 27978 55956 22,70 79361 32,19 111213 45,1113 10 50,0 15 7 49,72 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214828 312802 28396 56792 18,16 218183 69,75 37827 12,093 33 68,0 14 7 32,31 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214829 379598 3419 6838 1,80 370149 97,51 2611 0,687833 249 89,0 20 8 10,65 B.1.177 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214830 354342 25175 50350 14,21 300202 84,72 3790 1,06959 169 88,0 22 10 12,15 B.1.177 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214831 253440 107619 215238 84,93 11221 4,43 26981 10,6459 NA 22,0 3 2 77,51 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214832 5052 1844 3688 73,00 364 7,21 1000 19,7941 NA NA 0 0 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214833 326700 61771 123542 37,82 88067 26,96 115091 35,2283 16 55,00000000000001 12 5 44,94 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214834 384218 36429 72858 18,96 296545 77,18 14815 3,85588 85 83,0 17 8 17,29 B.1.177 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214835 347078 42041 84082 24,23 162912 46,94 100084 28,8362 32 67,0 17 7 32,56 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214836 362840 74483 148966 41,06 115115 31,73 98759 27,2183 9 49,0 8 4 50,53 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214837 400430 56534 113068 28,24 223768 55,88 63594 15,8814 52 75,0 17 7 25,35 B.1.177 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 214838 113674 25661 51322 45,15 6240 5,49 56112 49,3622 1 27,0 5 3 73,34 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220338 214610 37082 74164 34,56 106457 49,6 33989 15,8376 28 68,0 35 23 31,88 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220339 310682 6731 13462 4,33 295685 95,17 1535 0,494074 401 91,0 49 35 8,77 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220407 188574 72874 145748 77,29 4001 2,12 38825 20,5887 NA 20,0 9 7 80,18 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220433 69680 7033 14066 20,19 16261 23,34 39353 56,4768 3 36,0 16 13 64,03 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220518 87796 15067 30134 34,32 27222 31,01 30440 34,6713 2 37,0 18 13 63,28 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220529 243588 24320 48640 19,97 192650 79,09 2298 0,943396 86 79,0 42 30 21,15 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220530 258694 20191 40382 15,61 164599 63,63 53713 20,7631 27 67,0 38 27 32,76 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220531 322478 7510 15020 4,66 306896 95,17 562 0,174275 317 90,0 57 44 9,95 BA.1.18 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220532 240540 44711 89422 37,18 145549 60,51 5569 2,31521 29 68,0 35 25 31,52 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220533 146454 67492 134984 92,17 7609 5,2 3861 2,63632 NA 15,0 7 5 85,29 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220534 383046 9624 19248 5,02 361544 94,39 2254 0,588441 505 91,0 58 43 8,91 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220535 372112 59321 118642 31,88 247472 66,5 5998 1,61188 39 72,0 36 25 28,36 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220536 342222 69820 139640 40,80 196456 57,41 6126 1,79007 27 66,0 35 23 33,56 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220537 296640 7817 15634 5,27 278999 94,05 2007 0,676578 171 85,0 51 36 14,74 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220538 316128 28736 57472 18,18 256420 81,11 2236 0,707308 154 84,0 52 38 16,06 BA.1.18 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220539 350584 24002 48004 13,69 295145 84,19 7435 2,12075 50 73,0 36 24 26,76 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220540 349706 131768 263536 75,36 73909 21,13 12261 3,50609 3 33,0 18 13 67,33 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220541 314886 34471 68942 21,89 243456 77,32 2488 0,790127 131 82,0 44 31 18,38 BA.1.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220542 290650 24450 48900 16,82 231509 79,65 10241 3,52348 17 56,00000000000001 29 21 43,64 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220543 393188 3174 6348 1,61 386045 98,18 795 0,202193 340 89,0 53 40 11,10 BA.1.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220544 301018 16814 33628 11,17 263474 87,53 3916 1,30092 44 72,0 43 33 28,08 BA.1.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220545 265640 6396 12792 4,82 240982 90,72 11866 4,46695 50 76,0 38 25 24,27 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220546 257054 17714 35428 13,78 219613 85,43 2013 0,783104 127 81,0 47 33 19,22 BA.1.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220599 189680 74903 149806 78,98 23086 12,17 16788 8,8507 6 43,0 19 17 56,97 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220600 245316 110739 221478 90,28 73 0,03 23765 9,68751 NA NA 0 0 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220601 296392 13505 27010 9,11 268212 90,49 1170 0,394747 798 97,0 59 42 2,97 BA.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220603 132 9 18 13,64 102 77,27 12 9,09091 NA NA 0 0 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220624 295422 43999 87998 29,79 180473 61,09 26951 9,12288 25 67,0 35 25 33,26 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220625 280118 42654 85308 30,45 183443 65,49 11367 4,05793 19 59,0 30 21 41,39 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220626 284748 4918 9836 3,45 272621 95,74 2291 0,804571 62 76,0 35 22 24,47 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220627 346396 2670 5340 1,54 340102 98,18 954 0,275407 303 90,0 49 33 10,41 BA.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220628 322910 4681 9362 2,90 312419 96,75 1129 0,349633 270 89,0 55 40 11,18 BA.1.15.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220629 415182 46596 93192 22,45 306635 73,86 15355 3,69838 25 65,0 34 25 34,69 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220630 295142 60733 121466 41,16 170297 57,7 3379 1,14487 81 72,0 34 23 28,01 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220631 274184 36346 72692 26,51 199758 72,86 1734 0,632422 122 81,0 47 31 18,92 AY.127 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220633 440478 7179 14358 3,26 422987 96,03 3133 0,711273 168 83,0 43 31 17,17 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220634 373722 60717 121434 32,49 228069 61,03 24219 6,48049 28 65,0 33 23 34,87 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220635 333686 33839 67678 20,28 247874 74,28 18134 5,43445 36 70,0 40 29 29,84 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220636 329688 8956 17912 5,43 246754 74,84 65022 19,7223 29 67,0 37 27 33,48 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220637 412358 5079 10158 2,46 396982 96,27 5218 1,26541 245 86,0 46 32 13,83 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220638 280140 3190 6380 2,28 273014 97,46 746 0,266295 195 85,0 56 38 14,94 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220639 293692 15694 31388 10,69 243186 82,8 19118 6,50954 22 64,0 31 22 35,73 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220640 283734 15102 30204 10,65 242119 85,33 11411 4,02172 41 71,0 35 23 28,83 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220642 287392 53325 106650 37,11 167518 58,29 13224 4,60138 15 57,99999999999999 29 17 42,35 B.1.617.2 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220644 329228 29537 59074 17,94 258356 78,47 11798 3,58353 31 67,0 34 25 33,42 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220646 372318 21905 43810 11,77 322145 86,52 6363 1,70902 64 78,0 38 26 22,36 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220647 433376 8059 16118 3,72 396231 91,43 21027 4,85191 170 86,0 51 36 14,41 BA.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220648 343850 86366 172732 50,23 148232 43,11 22886 6,65581 13 54,0 27 19 45,72 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220649 357146 25733 51466 14,41 298948 83,7 6732 1,88494 31 68,0 39 28 32,12 BA.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220650 345044 10516 21032 6,10 320092 92,77 3920 1,13609 66 73,0 36 26 27,50 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220651 375890 45387 90774 24,15 270823 72,05 14293 3,80244 24 65,0 33 24 34,80 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220652 324144 12938 25876 7,98 293776 90,63 4492 1,3858 44 72,0 39 27 27,86 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220653 281982 61894 123788 43,90 120219 42,63 37975 13,4672 8 48,0 25 19 52,46 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220654 264482 9708 19416 7,34 241570 91,34 3496 1,32183 57 76,0 39 27 23,71 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220655 342522 4194 8388 2,45 332968 97,21 1166 0,340416 144 84,0 50 36 15,92 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220656 342082 71678 143356 41,91 186800 54,61 11926 3,4863 7 45,0 24 17 55,26 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220657 289618 24404 48808 16,85 233722 80,7 7088 2,44736 18 59,0 28 21 41,14 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220658 288798 32526 65052 22,53 217510 75,32 6236 2,15929 26 68,0 36 26 32,35 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220659 325092 7722 15444 4,75 307924 94,72 1724 0,530311 127 81,0 43 30 18,81 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220660 314206 13118 26236 8,35 283672 90,28 4298 1,36789 129 84,0 43 28 16,46 BA.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220661 377508 8713 17426 4,62 357815 94,78 2267 0,600517 146 81,0 47 32 18,59 BA.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220663 367268 113353 226706 61,73 104983 28,58 35579 9,68748 4 41,0 28 23 58,98 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220664 310374 3372 6744 2,17 302546 97,48 1084 0,349256 230 87,0 51 38 13,50 BA.1.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220665 335192 11738 23476 7,00 290215 86,58 21501 6,41453 47 72,0 38 26 28,33 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220666 240438 67516 135032 56,16 92624 38,52 12782 5,31613 4 40,0 21 14 59,96 B.1.617.2 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220667 310152 3164 6328 2,04 302582 97,56 1242 0,400449 127 81,0 49 31 18,87 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220668 259344 33241 66482 25,63 177798 68,56 15064 5,8085 15 57,99999999999999 33 23 42,41 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220669 310808 25247 50494 16,25 254109 81,76 6205 1,99641 32 68,0 36 25 32,00 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220670 410196 21372 42744 10,42 362154 88,29 5298 1,29158 90 78,0 43 31 21,51 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220671 281328 4791 9582 3,41 270074 96,0 1672 0,594324 161 83,0 45 33 17,22 BA.1.17 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220672 286140 16646 33292 11,63 240636 84,1 12212 4,26784 35 70,0 37 23 30,19 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220675 310914 51986 103972 33,44 187328 60,25 19614 6,3085 12 54,0 30 22 46,41 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220677 305936 18288 36576 11,96 265405 86,75 3955 1,29275 47 71,0 38 26 29,04 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220678 346738 9926 19852 5,73 323905 93,41 2981 0,859727 98 81,0 42 30 19,02 BA.1.1.1 +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220679 276880 16703 33406 12,07 233349 84,28 10125 3,65682 63 75,0 40 27 24,71 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220680 363996 102852 205704 56,51 138444 38,03 19848 5,45281 11 51,0 26 21 49,07 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220684 327920 15093 30186 9,21 292768 89,28 4966 1,51439 63 75,0 35 25 25,05 Unassigned +MiSeq_GEN_267_20220208_ICasas virology Analysis NC_045512.2 220685 276336 18454 36908 13,36 233558 84,52 5870 2,12423 56 75,0 41 29 25,49 BA.1.1.1 diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/md5sum_MiSeq_GEN_267_20220208_ICasas.md5 b/relecov_tools/example_data/5.read_bioinfo_metadata/md5sum_MiSeq_GEN_267_20220208_ICasas.md5 new file mode 100755 index 00000000..c8d6d0fc --- /dev/null +++ b/relecov_tools/example_data/5.read_bioinfo_metadata/md5sum_MiSeq_GEN_267_20220208_ICasas.md5 @@ -0,0 +1,192 @@ +dbcc703ccb7da3002fee6c0486199009 214821_S12_R1_001.fastq.gz +b76fba963664b532004c4ce7153ae14f 214821_S12_R2_001.fastq.gz +4837ac15de51cfeda4596b09f5876ac8 214822_S13_R1_001.fastq.gz +f0bf2700ae3eadc9a3f01c515546bf4a 214822_S13_R2_001.fastq.gz +213a150461737cb0af48fddd370f4ceb 214823_S1_R1_001.fastq.gz +1b58f58f861c81ed94401a319c7ae9ce 214823_S1_R2_001.fastq.gz +cf00a386d54734ee7a60932f487d946c 214824_S14_R1_001.fastq.gz +eebdabee75e327f49d9e1741d22c40c3 214824_S14_R2_001.fastq.gz +c4b42c9ca78676b244f6fd3356a39bbd 214825_S15_R1_001.fastq.gz +7356129837732007e08e94a7699c5f7e 214825_S15_R2_001.fastq.gz +19c93d83eb76748429c4f466ee094314 214826_S16_R1_001.fastq.gz +3c245df1ecc9456ab121337847a8282d 214826_S16_R2_001.fastq.gz +43205fbe2b44e0bb2aaa3c67a4825c6b 214827_S17_R1_001.fastq.gz +b95ddabc2cb0337a225c7998ec3a11fd 214827_S17_R2_001.fastq.gz +7523b3b8f6129259f6004ef85b479926 214828_S18_R1_001.fastq.gz +fa80eea72d21d5d686af19b779e00cd7 214828_S18_R2_001.fastq.gz +ccb040c582e6c68c9efe84fca0e51c95 214829_S19_R1_001.fastq.gz +047dd05f8008dc7fa0c9f2c5d7f51677 214829_S19_R2_001.fastq.gz +f440d83caf0456f3af2bba389dbe9663 214830_S20_R1_001.fastq.gz +345fadf48d0f1b4e14951d82145f985a 214830_S20_R2_001.fastq.gz +8b674c46bded0f8e5e9b89cff4a7c1de 214831_S21_R1_001.fastq.gz +5414313cf3edb5bc6ab9c74a81237277 214831_S21_R2_001.fastq.gz +a118e8522bf3b2cf34020fe4f44536a0 214832_S22_R1_001.fastq.gz +7e8ee7de84d4a0c83da2fa4d86744795 214832_S22_R2_001.fastq.gz +13a584fb7800fa9d900d848555bec36f 214833_S23_R1_001.fastq.gz +634d553b612db4a272537b445827c3ae 214833_S23_R2_001.fastq.gz +a93fc6c69a31da28cbbde419f704d2e3 214834_S24_R1_001.fastq.gz +71b565731008044ca6115528230b1bcc 214834_S24_R2_001.fastq.gz +548177db795b4cb098bd27d7e8182ce5 214835_S25_R1_001.fastq.gz +8208d8f05c4dd2dedc7f8b017d1f00b4 214835_S25_R2_001.fastq.gz +e6c4dd1c4cb471b3c4dd09ee40f10591 214836_S26_R1_001.fastq.gz +5addc2c98ff7d6458a4b319e25731654 214836_S26_R2_001.fastq.gz +c1765535970d038196f72ece4a9b8624 214837_S27_R1_001.fastq.gz +17670b749961800dfdc995cf3c4c6faf 214837_S27_R2_001.fastq.gz +72480d9c23afea096a89ed456c9efdb9 214838_S2_R1_001.fastq.gz +6cddef3197226f00bd7eb8c437e94900 214838_S2_R2_001.fastq.gz +c9f6715f0fe6933126aa9a36d8190102 220338_S3_R1_001.fastq.gz +6ab5c2b6fa4005479da188e7bb7d584b 220338_S3_R2_001.fastq.gz +eea39c299ca945536c2d0583bc358d41 220339_S4_R1_001.fastq.gz +391b790d0d105fd9e8c6c037df8a18d4 220339_S4_R2_001.fastq.gz +ed9a7efa18f6a228c246953e1cd2aa64 220407_S5_R1_001.fastq.gz +1400d0c52ad2dc2231c1fe87e0bd1dde 220407_S5_R2_001.fastq.gz +1044ad9179937f00d76aa07b7c3f8ddc 220433_S6_R1_001.fastq.gz +3d39eeffbd2983251f7153b1792cb865 220433_S6_R2_001.fastq.gz +52d22b2aa4ad47f110cdd5527cc46e58 220518_S7_R1_001.fastq.gz +9884025a080d74f187017475c2e140f7 220518_S7_R2_001.fastq.gz +2b7815a5748ed76de208cac55e9265da 220529_S28_R1_001.fastq.gz +38ce949c2a98a1bf4dbcd444483860e1 220529_S28_R2_001.fastq.gz +d9337ab6c46b757b0cf76e725b1fed45 220530_S29_R1_001.fastq.gz +f474a72ba984af48d96b95ee3d9201a6 220530_S29_R2_001.fastq.gz +ffe0db11dd065f384e13c6be26599e84 220531_S30_R1_001.fastq.gz +e0b1d1d6557f03cfca04005721244270 220531_S30_R2_001.fastq.gz +98c6011d1e0537c3f02bec11cd3c2aef 220532_S31_R1_001.fastq.gz +fc770dc2431972bf859d64b3fb7b24cd 220532_S31_R2_001.fastq.gz +6dd04393363653aa94aadb887dcfbb5a 220533_S32_R1_001.fastq.gz +e752ffb157d139701ecf2e765ce4642f 220533_S32_R2_001.fastq.gz +003a1179f4afd449ba2f04f65d5bc071 220534_S33_R1_001.fastq.gz +632355ba64f7de8c471d98028394a792 220534_S33_R2_001.fastq.gz +abd5dea7f0ebb92d4dc44b6777aa14df 220535_S34_R1_001.fastq.gz +38ad2aaf11d71d43e8c9e2996419c9c7 220535_S34_R2_001.fastq.gz +943d099043ef8d374519526972cb1e10 220536_S35_R1_001.fastq.gz +95540b3c187f2be5b390e028a53744f0 220536_S35_R2_001.fastq.gz +4f8c1abacae760cc659902491acf01be 220537_S36_R1_001.fastq.gz +ba90ceea113af1b1154cfc88791130f4 220537_S36_R2_001.fastq.gz +262b2daea97f27716b45e4e42769fac2 220538_S37_R1_001.fastq.gz +001cde855df87b0bd1a53ee2f2b6b3ef 220538_S37_R2_001.fastq.gz +0da2a117841ac93bf98b592f80d0701b 220539_S38_R1_001.fastq.gz +3b9d109620122fe4f8d0395474cffa73 220539_S38_R2_001.fastq.gz +b7b0f6e63aa5f35bfed0c8e0af929346 220540_S39_R1_001.fastq.gz +ad64eff1e7994f31758f3539a68ee8f4 220540_S39_R2_001.fastq.gz +07d0690f4a552688557f5dff824ee164 220541_S40_R1_001.fastq.gz +79ec4ae16bfedc05b0ed0d2ab1cf5dc3 220541_S40_R2_001.fastq.gz +9df1b3c139ed67e0b47ae0bc2055f3d2 220542_S41_R1_001.fastq.gz +11e101b892f6429d4762f7e8acb8f8fa 220542_S41_R2_001.fastq.gz +781a331bd3c3083fc068928fa9e62b61 220543_S42_R1_001.fastq.gz +761f5cf6ec9e70a3c6052e6717181987 220543_S42_R2_001.fastq.gz +3a7d06d1d0eeb293377fbc40b8331a78 220544_S43_R1_001.fastq.gz +354eb8ebd7da78d0e4061ff45510b247 220544_S43_R2_001.fastq.gz +58cbb67f568d4f59d0a11d4df132412c 220545_S44_R1_001.fastq.gz +45f3f30a76c6c9099cc10a54a55326ac 220545_S44_R2_001.fastq.gz +54dfda3d4b9ec99067f1b57166c733f0 220546_S45_R1_001.fastq.gz +e03a1e702b7b0ff54bb57f95e47e99a2 220546_S45_R2_001.fastq.gz +5a476df56ca10e519da0937245b5f843 220599_S8_R1_001.fastq.gz +b0bf94bbd7290a0b2193025cda14dc02 220599_S8_R2_001.fastq.gz +721c349a644be2b184dc599c70b9d286 220600_S9_R1_001.fastq.gz +ba22af6782ec196a43e76cec80a36741 220600_S9_R2_001.fastq.gz +d6bd2082fb4fe550b1627c5b534776ec 220601_S10_R1_001.fastq.gz +f395c916a68376183577b7810af918bc 220601_S10_R2_001.fastq.gz +a997404b1926f1bd2330d95e4092bf06 220603_S11_R1_001.fastq.gz +aed63e4ee67de18ce6ff50c0a2fa3674 220603_S11_R2_001.fastq.gz +f867c8e39ff543445b4bbf1ca774e1ac 220624_S46_R1_001.fastq.gz +d0cd564bec34963ef961abfc7e9206fa 220624_S46_R2_001.fastq.gz +e71e817a639c9a688ee281aff5d958e2 220625_S47_R1_001.fastq.gz +a2f2cd43085caa4b198b5b7ffa0b0a09 220625_S47_R2_001.fastq.gz +6f8e8886752788c05b779ab4c2dae7f7 220626_S48_R1_001.fastq.gz +1c57c19841c6d25daa917135696dfabb 220626_S48_R2_001.fastq.gz +8b56a2a1d181eefc5469790c16f5d175 220627_S49_R1_001.fastq.gz +e4fbdcea92c7e7e762c494bdec1216dc 220627_S49_R2_001.fastq.gz +4c712e5ae5a5097aef16ab14123bff84 220628_S50_R1_001.fastq.gz +5ecbf9332f8f085bca8ff71095161d27 220628_S50_R2_001.fastq.gz +5f7f51e074fc61cc678ef706634dc2f2 220629_S51_R1_001.fastq.gz +c243d5d71e4dab63d4b5e5399c9336ae 220629_S51_R2_001.fastq.gz +bfaa0e4aef054c0bb369f935278e8364 220630_S52_R1_001.fastq.gz +df070c9a8e583211f198bc67bb16b3bc 220630_S52_R2_001.fastq.gz +3286b5c58719476e8832de107c366bc4 220631_S53_R1_001.fastq.gz +ca6a1e4503d3892e60f86b24ac26e7c1 220631_S53_R2_001.fastq.gz +893f9aa89c1cb03e02b7caf66c71ca80 220633_S54_R1_001.fastq.gz +33c3e32357450b511a5657b0c22aa9fb 220633_S54_R2_001.fastq.gz +53382ddf2ccadc39d6022e073a293c61 220634_S55_R1_001.fastq.gz +a49c009e9eeb68c46b8dafd0da05eadb 220634_S55_R2_001.fastq.gz +211658e701aaf7fb483a9a2b64a0f917 220635_S56_R1_001.fastq.gz +51bf5c329fc240c8e8f1baefc37d5c1b 220635_S56_R2_001.fastq.gz +1284ba20e1bee6a584d3f10042ffc38b 220636_S57_R1_001.fastq.gz +72334caba85e114c076aef467e6987e1 220636_S57_R2_001.fastq.gz +a36aa48dd53b5938a160ac094fd753cb 220637_S58_R1_001.fastq.gz +1c7c1bb51da20c0e8c9661436f598cc2 220637_S58_R2_001.fastq.gz +b6ecf898c0b6e15c7cded228718da4f8 220638_S59_R1_001.fastq.gz +d4d3b6325ef1ca17f6d36953b050f532 220638_S59_R2_001.fastq.gz +fa2af2a995a99809496fd34602a16542 220639_S60_R1_001.fastq.gz +a89de5a74985de324670f41ef730b29b 220639_S60_R2_001.fastq.gz +55f36a57fd7e621483b6653a84e6edad 220640_S61_R1_001.fastq.gz +40da6eac77dcd3c82f9531e3bc5f4e9c 220640_S61_R2_001.fastq.gz +424e46a2427c6c8f7c80aae322f22bc3 220642_S62_R1_001.fastq.gz +cd7a5f03488794eeec2b42e3c5dd73dd 220642_S62_R2_001.fastq.gz +727dcb0ceeef62ce68f9482188180ed8 220644_S63_R1_001.fastq.gz +68173f24d15e48a74b7b46a0937a5436 220644_S63_R2_001.fastq.gz +ea407b7488895db5d210dcd5fa2e8e07 220646_S64_R1_001.fastq.gz +425f1969ebcdd626729c94668f6c9985 220646_S64_R2_001.fastq.gz +93db59751dfb1685e149bfea5d1fa40d 220647_S65_R1_001.fastq.gz +dc234549b290f2649e7ae285aee6ff5b 220647_S65_R2_001.fastq.gz +86dc347a67cbc91692e8a44be43a1c49 220648_S66_R1_001.fastq.gz +ca841d76f56036e00b663be25fe32313 220648_S66_R2_001.fastq.gz +d26d54d5f3771b92405cd0bed11e22f4 220649_S67_R1_001.fastq.gz +d820aca88a3637fb44bb648d9a6a03c1 220649_S67_R2_001.fastq.gz +bfda7de91c9f4a1b938e2fd8809cb37b 220650_S68_R1_001.fastq.gz +2b596c41ad86057f9b45c60e1e551139 220650_S68_R2_001.fastq.gz +762264514323d8ba65ba3cf1b9ad7947 220651_S69_R1_001.fastq.gz +f9f7118d4eacdcff059f92a352e8f832 220651_S69_R2_001.fastq.gz +73e730f342ec9788d715639c982d34be 220652_S70_R1_001.fastq.gz +aa16ec3d36d7b457696f81ddd489340b 220652_S70_R2_001.fastq.gz +899d9eedeb862ed2b766b2ef38cdef89 220653_S71_R1_001.fastq.gz +d0613e93a129a7f935f44e9f8fe6dad7 220653_S71_R2_001.fastq.gz +1cd9636649fce7e6f8172fc4696362b6 220654_S72_R1_001.fastq.gz +43eabe0ccbc64a6762f07691b6cb9b78 220654_S72_R2_001.fastq.gz +cb1756e84c0c854814aa8ccfa6ef9965 220655_S73_R1_001.fastq.gz +c1c5d439d06d28839122cee8ec2a8a26 220655_S73_R2_001.fastq.gz +c582439856aaa10e58cde21b07323dbe 220656_S74_R1_001.fastq.gz +d4645048ab66627ec855d1231b4207a6 220656_S74_R2_001.fastq.gz +2cd60ea9dc7358578f2b11f3e2712898 220657_S75_R1_001.fastq.gz +4de125f0f89a02caf060259ff7ba97fd 220657_S75_R2_001.fastq.gz +b3c9abf04e41fb878ebde52383f27cd1 220658_S76_R1_001.fastq.gz +b8d3fb213057c4d1f909e187789ad8f1 220658_S76_R2_001.fastq.gz +efbc2790b3cf0ff305a2c6c949c3101c 220659_S77_R1_001.fastq.gz +12d26a620109b24172e517a03edca796 220659_S77_R2_001.fastq.gz +5da90c106b6c5791dc1058da6fb88d58 220660_S78_R1_001.fastq.gz +fae8c6af23e3fac5a9a448712c4613b6 220660_S78_R2_001.fastq.gz +507dba14ebad10c2c2e697bd506b3b17 220661_S79_R1_001.fastq.gz +00762bb19883db46289ef9047e9aa662 220661_S79_R2_001.fastq.gz +b2f14cba5c1a9c8b2ed589afb69238f3 220663_S80_R1_001.fastq.gz +8325daa2cb930e554c1a576ae529555b 220663_S80_R2_001.fastq.gz +2b7fca4ad9e1cf5b4d719337538b2ece 220664_S81_R1_001.fastq.gz +43d5cb91ffc5575a15a276c22eaf8db8 220664_S81_R2_001.fastq.gz +c79e735d65c1122bcb54e63adf003e98 220665_S82_R1_001.fastq.gz +a4cf0e9a3dbdefd46a928cf18acf5f08 220665_S82_R2_001.fastq.gz +9a60ceb8c32a4d67947c1ffcb3334223 220666_S83_R1_001.fastq.gz +f11417d562a870b154d8034c60cb997f 220666_S83_R2_001.fastq.gz +2791ff16b7e94f806b2c00442890112c 220667_S84_R1_001.fastq.gz +fe3a55379689e22cf111e1287c32f45f 220667_S84_R2_001.fastq.gz +87479e96b34106748b1cb342dce3d24e 220668_S85_R1_001.fastq.gz +01fc8610639039fae3a5039aa0f8ce3e 220668_S85_R2_001.fastq.gz +0374abb4d11b9bc701faf6e20e0b39b9 220669_S86_R1_001.fastq.gz +866f493a5fca69d1219839e25a2c59c1 220669_S86_R2_001.fastq.gz +ce29b2a53257a6682ca4d29590fb1397 220670_S87_R1_001.fastq.gz +2cb2bfa6bdd8d37e073ce386a51eb222 220670_S87_R2_001.fastq.gz +1901c8ee421c8ff3a461e73b9990548d 220671_S88_R1_001.fastq.gz +63075cf1ff8543ed1f685c34f145af53 220671_S88_R2_001.fastq.gz +f853f727c6c77062f9f0eb388b975cac 220672_S89_R1_001.fastq.gz +9508309f8b2d201cb8fcd9a96e4592b2 220672_S89_R2_001.fastq.gz +21c8ef96c84e40fecfbb3749e28b759c 220675_S90_R1_001.fastq.gz +dad21b92354af748bacf678f6ad1c6a6 220675_S90_R2_001.fastq.gz +9a56b40a023af7395a3610284425d3d2 220677_S91_R1_001.fastq.gz +0bcc1af3d0663e0cfa0edfedd533155e 220677_S91_R2_001.fastq.gz +bf9a3a7ec821fcf98a3e7383150175ed 220678_S92_R1_001.fastq.gz +a2bd2aafc26b8ff21e95dfea74c4e38f 220678_S92_R2_001.fastq.gz +4f253c4b5432f2ab3e446190703da03b 220679_S93_R1_001.fastq.gz +f4b182052d1fb7f0690719572d5e37d3 220679_S93_R2_001.fastq.gz +10528a48df6f6310d5b53fa402a9446f 220680_S94_R1_001.fastq.gz +994e273a5f941151c52727fa27732e03 220680_S94_R2_001.fastq.gz +87a281cb1cc7a5f246aeaf5ed33b2d3f 220684_S95_R1_001.fastq.gz +be0059af911542a8329249cc6fd4b50e 220684_S95_R2_001.fastq.gz +ce9c74bb150e98d62e1fa63912202fc8 220685_S96_R1_001.fastq.gz +0fe9bf77de1a6643fc77bba4bfef1a7b 220685_S96_R2_001.fastq.gz diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/software_versions.yml b/relecov_tools/example_data/5.read_bioinfo_metadata/software_versions.yml new file mode 100755 index 00000000..056108ff --- /dev/null +++ b/relecov_tools/example_data/5.read_bioinfo_metadata/software_versions.yml @@ -0,0 +1,89 @@ +ASCIIGENOME: + asciigenome: 1.16.0 + bedtools: 2.30.0 +BCFTOOLS_CONSENSUS: + bcftools: '1.14' +BCFTOOLS_FILTER: + bcftools: '1.14' +BCFTOOLS_QUERY: + bcftools: '1.14' +BCFTOOLS_STATS: + bcftools: '1.14' +BEDTOOLS_MASKFASTA: + bedtools: 2.30.0 +BEDTOOLS_MERGE: + bedtools: 2.30.0 +BOWTIE2_ALIGN: + bowtie2: 2.4.4 + pigz: '2.6' + samtools: '1.14' +BOWTIE2_BUILD: + bowtie2: 2.4.4 +COLLAPSE_PRIMERS: + python: 3.9.5 +CUSTOM_DUMPSOFTWAREVERSIONS: + python: 3.9.5 + yaml: 5.4.1 +CUSTOM_GETCHROMSIZES: + custom: '1.14' +FASTP: + fastp: 0.23.2 +FASTQC_RAW: + fastqc: 0.11.9 +FASTQC_TRIM: + fastqc: 0.11.9 +IVAR_TRIM: + ivar: 1.3.1 +IVAR_VARIANTS: + ivar: 1.3.1 +IVAR_VARIANTS_TO_VCF: + python: 3.9.9 +KRAKEN2_KRAKEN2: + kraken2: 2.1.2 + pigz: '2.6' +MAKE_BED_MASK: + python: 3.9.5 + samtools: '1.14' +MAKE_VARIANTS_LONG_TABLE: + python: 3.9.9 +MOSDEPTH_AMPLICON: + mosdepth: 0.3.3 +MOSDEPTH_GENOME: + mosdepth: 0.3.3 +PICARD_COLLECTMULTIPLEMETRICS: + picard: 2.26.10 +PLOT_BASE_DENSITY: + r-base: 4.0.3 +PLOT_MOSDEPTH_REGIONS_AMPLICON: + r-base: 4.0.3 +PLOT_MOSDEPTH_REGIONS_GENOME: + r-base: 4.0.3 +RENAME_FASTA_HEADER: + sed: '4.7' +SAMPLESHEET_CHECK: + python: 3.9.5 +SAMTOOLS_FLAGSTAT: + samtools: '1.14' +SAMTOOLS_IDXSTATS: + samtools: '1.14' +SAMTOOLS_INDEX: + samtools: '1.14' +SAMTOOLS_SORT: + samtools: '1.14' +SAMTOOLS_STATS: + samtools: '1.14' +SNPEFF_ANN: + snpeff: 5.0e +SNPEFF_BUILD: + snpeff: 5.0e +SNPSIFT_EXTRACTFIELDS: + snpsift: '4.3' +TABIX_BGZIP: + tabix: '1.12' +TABIX_TABIX: + tabix: '1.12' +UNTAR_KRAKEN2_DB: + untar: '1.30' +Workflow: + Nextflow: 21.10.6 + nf-core/viralrecon: 2.4.1 diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/summary_variants_metrics_mqc.csv b/relecov_tools/example_data/5.read_bioinfo_metadata/summary_variants_metrics_mqc.csv new file mode 100755 index 00000000..2edeb8d9 --- /dev/null +++ b/relecov_tools/example_data/5.read_bioinfo_metadata/summary_variants_metrics_mqc.csv @@ -0,0 +1,97 @@ +Sample,# Input reads,# Trimmed reads (fastp),% Non-host reads (Kraken 2),% Mapped reads,# Mapped reads,# Trimmed reads (iVar),Coverage median,% Coverage > 1x,% Coverage > 10x,# SNPs,# INDELs,# Missense variants,# Ns per 100kb consensus,Pangolin lineage,Nextclade clade +214821,316942,285604,98.9180823797986,98.54,281428,281038,287,98.0,92.0,20,1,12,8442.32,NA,NA +214822,382722,306868,36.6652762751411,20.09,61642,61447,6,67.0,41.0,16,2,12,58831.99,NA,NA +214823,298848,196898,41.969953986327944,0.1,204,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +214824,364,248,66.93548387096774,55.65,138,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +214825,306712,277306,99.46124497847144,97.74,271052,270695,217,98.0,91.0,18,NA,11,9102.77,NA,NA +214826,380062,337052,99.75374719627833,99.28,334627,334101,239,99.0,92.0,18,NA,11,7868.78,NA,NA +214827,493956,246530,77.30255952622399,32.19,79361,77172,10,70.0,50.0,21,NA,12,49724.11,NA,NA +214828,386996,312802,81.84410585610067,69.75,218183,217770,33,89.0,68.0,17,NA,9,32314.48,NA,NA +214829,432110,379598,98.19862064605186,97.51,370149,369582,249,98.0,89.0,22,NA,9,10654.45,NA,NA +214830,399552,354342,85.79056391847423,84.72,300202,299799,169,97.0,88.0,22,NA,10,12145.94,NA,NA +214831,319564,253440,15.07339015151515,4.43,11221,11153,NA,49.0,22.0,18,NA,8,77513.96,NA,NA +214832,12036,5052,26.999208234362627,7.21,364,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +214833,450792,326700,62.184879093970004,26.96,88067,87525,16,82.0,55.00000000000001,19,NA,9,44935.29,NA,NA +214834,443852,384218,81.03732776704892,77.18,296545,296058,85,96.0,83.0,18,1,9,17289.24,NA,NA +214835,482676,347078,75.77432162222901,46.94,162912,162195,32,86.0,67.0,25,3,13,32555.18,NA,NA +214836,479856,362840,58.94443831992062,31.73,115115,114688,9,75.0,49.0,16,NA,9,50530.05,NA,NA +214837,469580,400430,71.76335439402642,55.88,223768,223377,52,94.0,75.0,25,1,12,25351.97,NA,NA +214838,156098,113674,54.851593152347945,5.49,6240,6198,1,56.99999999999999,27.0,9,1,3,73340.47,NA,NA +220338,265944,214610,65.44243045524439,49.6,106457,106295,28,90.0,68.0,31,3,22,31881.92,NA,NA +220339,346512,310682,95.66695206030603,95.17,295685,295275,401,100.0,91.0,47,3,33,8766.06,NA,NA +220407,233538,188574,22.710447887831833,2.12,4001,3966,NA,44.0,20.0,8,NA,6,80175.9,NA,NA +220433,179434,69680,79.81343283582089,23.34,16261,16188,3,63.0,36.0,19,NA,16,64030.36,NA,NA +220518,163942,87796,65.67725181101645,31.01,27222,27130,2,61.0,37.0,17,3,13,63283.94,NA,NA +220529,278216,243588,80.03185707013482,79.09,192650,192328,86,93.0,79.0,39,3,29,21148.96,NA,NA +220530,302760,258694,84.39005156671588,63.63,164599,164351,27,89.0,67.0,39,3,30,32758.79,NA,NA +220531,361270,322478,95.34231792556392,95.17,306896,306433,317,98.0,90.0,54,3,43,9950.48,NA,NA +220532,282166,240540,62.82447825725451,60.51,145549,145279,29,86.0,68.0,35,5,27,31517.67,NA,NA +220533,172350,146454,7.83181067092739,5.2,7609,7589,NA,41.0,15.0,6,NA,4,85292.45,NA,NA +220534,424878,383046,94.97501605551292,94.39,361544,361008,505,98.0,91.0,53,5,42,8908.14,NA,NA +220535,431498,372112,68.11658855398375,66.5,247472,247043,39,88.0,72.0,38,3,26,28359.21,NA,NA +220536,400400,342222,59.196077400050264,57.41,196456,196125,27,88.0,66.0,33,3,23,33555.27,NA,NA +220537,336448,296640,94.72963861920172,94.05,278999,278467,171,96.0,85.0,48,3,35,14735.01,NA,NA +220538,357378,316128,81.82002226946047,81.11,256420,255981,154,95.0,84.0,47,3,35,16056.61,NA,NA +220539,404678,350584,86.30741847888095,84.19,295145,294673,50,92.0,73.0,34,3,23,26759.9,NA,NA +220540,412964,349706,24.64069818647664,21.13,73909,73754,3,64.0,33.0,17,2,14,67334.78,NA,NA +220541,379132,314886,78.10572715204867,77.32,243456,242998,131,96.0,82.0,43,3,32,18378.61,NA,NA +220542,332120,290650,83.17564080509203,79.65,231509,231185,17,81.0,56.00000000000001,28,2,21,43636.49,NA,NA +220543,439190,393188,98.38550515275135,98.18,386045,385462,340,97.0,89.0,52,3,41,11098.1,NA,NA +220544,344446,301018,88.82857503537994,87.53,263474,263058,44,89.0,72.0,44,3,35,28081.5,NA,NA +220545,301966,265640,95.18446017166089,90.72,240982,240678,50,93.0,76.0,37,3,26,24270.61,NA,NA +220546,291190,257054,86.21768188785236,85.43,219613,219266,127,95.0,81.0,47,3,33,19218.42,NA,NA +220599,227788,189680,21.021720792914383,12.17,23086,23047,6,75.0,43.0,20,NA,18,56967.53,NA,NA +220600,296648,245316,9.717262632686005,0.03,73,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +220601,344828,296392,90.88706847688198,90.49,268212,267704,798,100.0,97.0,52,7,40,2972.29,NA,NA +220603,300,132,86.36363636363636,77.27,102,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +220624,350480,295422,70.21278036165215,61.09,180473,180168,25,91.0,67.0,35,3,26,33264.19,NA,NA +220625,325138,280118,69.54569145859959,65.49,183443,183135,19,83.0,59.0,28,3,21,41391.19,NA,NA +220626,322846,284748,96.54571761698062,95.74,272621,271976,62,93.0,76.0,34,4,22,24468.01,NA,NA +220627,390592,346396,98.45841175995103,98.18,340102,339471,303,99.0,90.0,44,5,31,10406.77,NA,NA +220628,368766,322910,97.10074014431265,96.75,312419,311876,270,98.0,89.0,56,3,41,11175.05,NA,NA +220629,486080,415182,77.55394019971963,73.86,306635,306018,25,85.0,65.0,37,5,27,34692.85,NA,NA +220630,336320,295142,58.844894999695065,57.7,170297,170017,81,92.0,72.0,35,3,25,28007.9,NA,NA +220631,309884,274184,73.4878767542964,72.86,199758,199489,122,96.0,81.0,46,2,31,18918.92,NA,NA +220633,496616,440478,96.74035933690219,96.03,422987,422312,168,96.0,83.0,39,3,29,17170.77,NA,NA +220634,436114,373722,67.5068633904346,61.03,228069,227720,28,86.0,65.0,31,3,23,34870.18,NA,NA +220635,389300,333686,79.7180582943246,74.28,247874,247463,36,90.0,70.0,39,4,28,29841.41,NA,NA +220636,383082,329688,94.56698454296183,74.84,246754,246352,29,87.0,67.0,42,4,30,33481.66,NA,NA +220637,465292,412358,97.53660654091833,96.27,396982,396323,245,98.0,86.0,43,3,30,13828.29,NA,NA +220638,315490,280140,97.72256728778468,97.46,273014,272569,195,97.0,85.0,53,5,38,14942.8,NA,NA +220639,342404,293692,89.31261321384308,82.8,243186,242722,22,85.0,64.0,30,4,23,35726.71,NA,NA +220640,327148,283734,89.35481824525789,85.33,242119,241777,41,93.0,71.0,35,3,22,28827.62,NA,NA +220642,337862,287392,62.89040752700144,58.29,167518,167292,15,83.0,57.99999999999999,30,3,20,42353.49,NA,NA +220644,385638,329228,82.05681169280862,78.47,258356,257961,31,87.0,67.0,35,6,25,33424.79,NA,NA +220646,427090,372318,88.2331770153471,86.52,322145,321629,64,92.0,78.0,36,3,26,22356.8,NA,NA +220647,494176,433376,96.28082773388466,91.43,396231,395606,170,95.0,86.0,51,3,38,14413.81,NA,NA +220648,414028,343850,49.765304638650576,43.11,148232,147958,13,75.0,54.0,26,4,19,45717.34,NA,NA +220649,422718,357146,85.58964681110807,83.7,298948,298393,31,90.0,68.0,40,4,28,32119.91,NA,NA +220650,398930,345044,93.90454550723966,92.77,320092,319599,66,93.0,73.0,36,3,25,27495.99,NA,NA +220651,457958,375890,75.85091383117401,72.05,270823,270288,24,86.0,65.0,38,3,28,34803.27,NA,NA +220652,375618,324144,92.01712818994028,90.63,293776,293298,44,92.0,72.0,41,3,27,27857.33,NA,NA +220653,332128,281982,56.100744019121784,42.63,120219,120006,8,74.0,48.0,28,3,21,52462.03,NA,NA +220654,300436,264482,92.65885769163876,91.34,241570,241203,57,92.0,76.0,38,3,26,23705.17,NA,NA +220655,393176,342522,97.55110620631667,97.21,332968,332396,144,96.0,84.0,50,3,35,15919.43,NA,NA +220656,409114,342082,58.093088791576285,54.61,186800,186038,7,74.0,45.0,26,2,19,55263.07,NA,NA +220657,337772,289618,83.14745630451145,80.7,233722,233362,18,83.0,59.0,28,2,22,41141.25,NA,NA +220658,334450,288798,77.47491326117218,75.32,217510,217129,26,87.0,68.0,34,3,27,32347.13,NA,NA +220659,375278,325092,95.24934480085638,94.72,307924,307410,127,94.0,81.0,42,3,29,18810.22,NA,NA +220660,355660,314206,91.65006397077077,90.28,283672,283214,129,94.0,84.0,45,3,29,16464.8,NA,NA +220661,435370,377508,95.38393888341439,94.78,357815,357204,146,96.0,81.0,45,4,31,18592.75,NA,NA +220663,448948,367268,38.27232429724343,28.58,104983,104809,4,65.0,41.0,29,1,23,58981.74,NA,NA +220664,354478,310374,97.8271375824006,97.48,302546,302086,230,97.0,87.0,49,3,38,13497.06,NA,NA +220665,393704,335192,92.99625289386381,86.58,290215,289736,47,91.0,72.0,41,4,27,28329.1,NA,NA +220666,298722,240438,43.83916019930294,38.52,92624,92465,4,68.0,40.0,22,4,17,59956.51,NA,NA +220667,356530,310152,97.95971007763936,97.56,302582,301989,127,94.0,81.0,46,3,29,18867.1,NA,NA +220668,322372,259344,74.36532173483867,68.56,177798,177421,15,80.0,57.99999999999999,34,3,22,42413.19,NA,NA +220669,377304,310808,83.75395742709325,81.76,254109,253502,32,89.0,68.0,37,3,26,32002.81,NA,NA +220670,466540,410196,89.57961559839687,88.29,362154,361638,90,94.0,78.0,39,4,30,21514.37,NA,NA +220671,335280,281328,96.59401126087698,96.0,270074,269588,161,97.0,83.0,43,3,31,17224.3,NA,NA +220672,344566,286140,88.36513594743832,84.1,240636,240096,35,87.0,70.0,36,4,23,30192.09,NA,NA +220675,378180,310914,66.55924146226931,60.25,187328,186944,12,78.0,54.0,30,2,22,46409.34,NA,NA +220677,357032,305936,88.044558338999,86.75,265405,264932,47,91.0,71.0,38,3,28,29038.41,NA,NA +220678,407204,346738,94.27463964145839,93.41,323905,323270,98,95.0,81.0,38,3,28,19021.01,NA,NA +220679,316188,276880,87.93484542039873,84.28,233349,232957,63,91.0,75.0,40,3,27,24712.26,NA,NA +220680,444904,363996,43.487291069132624,38.03,138444,138242,11,78.0,51.0,28,3,24,49070.3,NA,NA +220684,378006,327920,90.79470602585997,89.28,292768,292274,63,93.0,75.0,33,3,25,25046.84,NA,NA +220685,319610,276336,86.64379595854322,84.52,233558,233170,56,91.0,75.0,39,3,29,25488.49,NA,NA diff --git a/relecov_tools/example_data/5.read_bioinfo_metadata/variants_long_table.csv b/relecov_tools/example_data/5.read_bioinfo_metadata/variants_long_table.csv new file mode 100755 index 00000000..90148433 --- /dev/null +++ b/relecov_tools/example_data/5.read_bioinfo_metadata/variants_long_table.csv @@ -0,0 +1,3341 @@ +SAMPLE,CHROM,POS,REF,ALT,FILTER,DP,REF_DP,ALT_DP,AF,GENE,EFFECT,HGVS_C,HGVS_P,HGVS_P_1LETTER,CALLER,LINEAGE +214821,NC_045512.2,11132,G,T,PASS,366,0,363,0.99,orf1ab,missense_variant,c.10867G>T,p.Ala3623Ser,p.A3623S,ivar,B.1.177 +214821,NC_045512.2,14408,C,T,PASS,333,2,331,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,B.1.177 +214821,NC_045512.2,16726,C,T,PASS,697,1,695,1.0,orf1ab,synonymous_variant,c.16461C>T,p.Tyr5487Tyr,p.Y5487Y,ivar,B.1.177 +214821,NC_045512.2,17658,G,T,PASS,3710,6,3690,0.99,orf1ab,missense_variant,c.17393G>T,p.Cys5798Phe,p.C5798F,ivar,B.1.177 +214821,NC_045512.2,20132,C,T,PASS,83,0,83,1.0,orf1ab,missense_variant,c.19867C>T,p.Pro6623Ser,p.P6623S,ivar,B.1.177 +214821,NC_045512.2,21255,G,C,PASS,78,0,78,1.0,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,B.1.177 +214821,NC_045512.2,21786,G,T,PASS,276,0,275,1.0,S,missense_variant,c.224G>T,p.Gly75Val,p.G75V,ivar,B.1.177 +214821,NC_045512.2,23403,A,G,PASS,2590,2,2588,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.177 +214821,NC_045512.2,24138,C,T,PASS,1594,913,679,0.43,S,missense_variant,c.2576C>T,p.Thr859Ile,p.T859I,ivar,B.1.177 +214821,NC_045512.2,26801,C,G,PASS,896,0,892,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,B.1.177 +214821,NC_045512.2,28932,C,T,PASS,242,5,236,0.98,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,B.1.177 +214821,NC_045512.2,29645,G,T,PASS,84,0,82,0.98,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,B.1.177 +214821,NC_045512.2,3037,C,T,PASS,330,0,330,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.177 +214821,NC_045512.2,4230,C,T,PASS,57,0,57,1.0,orf1ab,missense_variant,c.3965C>T,p.Thr1322Ile,p.T1322I,ivar,B.1.177 +214821,NC_045512.2,445,T,C,PASS,8440,24,8415,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,B.1.177 +214821,NC_045512.2,504,CTCATGG,C,PASS,7769,7715,6191,0.8,orf1ab,disruptive_inframe_deletion,c.245_250delGTCATG,p.Gly82_His83del,p.G82_H83del,ivar,B.1.177 +214821,NC_045512.2,541,C,T,PASS,7835,36,7791,0.99,orf1ab,synonymous_variant,c.276C>T,p.Leu92Leu,p.L92L,ivar,B.1.177 +214821,NC_045512.2,5548,C,T,PASS,28,0,27,0.96,orf1ab,synonymous_variant,c.5283C>T,p.Thr1761Thr,p.T1761T,ivar,B.1.177 +214821,NC_045512.2,6842,T,C,ft,13,9,4,0.31,orf1ab,missense_variant,c.6577T>C,p.Ser2193Pro,p.S2193P,ivar,B.1.177 +214821,NC_045512.2,8125,T,G,PASS,3054,5,3048,1.0,orf1ab,synonymous_variant,c.7860T>G,p.Leu2620Leu,p.L2620L,ivar,B.1.177 +214822,NC_045512.2,1199,TG,T,PASS,364,361,97,0.27,orf1ab,frameshift_variant,c.935delG,p.Cys312fs,p.C312fs,ivar,Unassigned +214822,NC_045512.2,13179,G,T,PASS,735,546,189,0.26,orf1ab,missense_variant,c.12914G>T,p.Gly4305Val,p.G4305V,ivar,Unassigned +214822,NC_045512.2,17658,G,T,PASS,2579,0,2575,1.0,orf1ab,missense_variant,c.17393G>T,p.Cys5798Phe,p.C5798F,ivar,Unassigned +214822,NC_045512.2,20132,C,T,PASS,10,0,10,1.0,orf1ab,missense_variant,c.19867C>T,p.Pro6623Ser,p.P6623S,ivar,Unassigned +214822,NC_045512.2,20473,A,C,PASS,423,175,247,0.58,orf1ab,missense_variant,c.20208A>C,p.Lys6736Asn,p.K6736N,ivar,Unassigned +214822,NC_045512.2,2076,G,T,PASS,80,47,33,0.41,orf1ab,missense_variant,c.1811G>T,p.Gly604Val,p.G604V,ivar,Unassigned +214822,NC_045512.2,23403,A,G,PASS,44,0,44,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214822,NC_045512.2,25217,G,T,PASS,334,113,221,0.66,S,missense_variant,c.3655G>T,p.Gly1219Cys,p.G1219C,ivar,Unassigned +214822,NC_045512.2,25424,G,T,PASS,228,109,119,0.52,ORF3a,missense_variant,c.32G>T,p.Gly11Val,p.G11V,ivar,Unassigned +214822,NC_045512.2,25451,T,C,ft,12,8,4,0.33,ORF3a,missense_variant,c.59T>C,p.Ile20Thr,p.I20T,ivar,Unassigned +214822,NC_045512.2,26801,C,G,PASS,21,0,21,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,Unassigned +214822,NC_045512.2,2748,A,G,PASS,852,534,315,0.37,orf1ab,missense_variant,c.2483A>G,p.Asp828Gly,p.D828G,ivar,Unassigned +214822,NC_045512.2,3514,G,T,PASS,59,40,19,0.32,orf1ab,missense_variant,c.3249G>T,p.Met1083Ile,p.M1083I,ivar,Unassigned +214822,NC_045512.2,445,T,C,PASS,7802,4,7798,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214822,NC_045512.2,504,CTCATGG,C,PASS,6981,6922,5465,0.78,orf1ab,disruptive_inframe_deletion,c.245_250delGTCATG,p.Gly82_His83del,p.G82_H83del,ivar,Unassigned +214822,NC_045512.2,541,C,T,PASS,7271,38,7229,0.99,orf1ab,synonymous_variant,c.276C>T,p.Leu92Leu,p.L92L,ivar,Unassigned +214822,NC_045512.2,8125,T,G,PASS,13,0,13,1.0,orf1ab,synonymous_variant,c.7860T>G,p.Leu2620Leu,p.L2620L,ivar,Unassigned +214825,NC_045512.2,10845,T,C,ft,16,12,4,0.25,orf1ab,missense_variant,c.10580T>C,p.Met3527Thr,p.M3527T,ivar,B.1.177 +214825,NC_045512.2,11132,G,T,PASS,108,0,108,1.0,orf1ab,missense_variant,c.10867G>T,p.Ala3623Ser,p.A3623S,ivar,B.1.177 +214825,NC_045512.2,14408,C,T,PASS,256,0,256,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,B.1.177 +214825,NC_045512.2,16726,C,T,PASS,500,3,496,0.99,orf1ab,synonymous_variant,c.16461C>T,p.Tyr5487Tyr,p.Y5487Y,ivar,B.1.177 +214825,NC_045512.2,17658,G,T,PASS,4879,1,4849,0.99,orf1ab,missense_variant,c.17393G>T,p.Cys5798Phe,p.C5798F,ivar,B.1.177 +214825,NC_045512.2,20132,C,T,PASS,62,0,62,1.0,orf1ab,missense_variant,c.19867C>T,p.Pro6623Ser,p.P6623S,ivar,B.1.177 +214825,NC_045512.2,21255,G,C,PASS,57,0,57,1.0,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,B.1.177 +214825,NC_045512.2,21786,G,T,PASS,247,0,247,1.0,S,missense_variant,c.224G>T,p.Gly75Val,p.G75V,ivar,B.1.177 +214825,NC_045512.2,22227,C,T,PASS,17,0,17,1.0,S,missense_variant,c.665C>T,p.Ala222Val,p.A222V,ivar,B.1.177 +214825,NC_045512.2,23403,A,G,PASS,2049,10,2039,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.177 +214825,NC_045512.2,26801,C,G,PASS,241,0,241,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,B.1.177 +214825,NC_045512.2,28932,C,T,PASS,173,0,172,0.99,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,B.1.177 +214825,NC_045512.2,29645,G,T,PASS,21,0,21,1.0,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,B.1.177 +214825,NC_045512.2,3037,C,T,PASS,243,3,239,0.98,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.177 +214825,NC_045512.2,445,T,C,PASS,10199,19,10180,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,B.1.177 +214825,NC_045512.2,541,C,T,PASS,9612,41,9559,0.99,orf1ab,synonymous_variant,c.276C>T,p.Leu92Leu,p.L92L,ivar,B.1.177 +214825,NC_045512.2,5548,C,T,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.5283C>T,p.Thr1761Thr,p.T1761T,ivar,B.1.177 +214826,NC_045512.2,11132,G,T,PASS,238,0,238,1.0,orf1ab,missense_variant,c.10867G>T,p.Ala3623Ser,p.A3623S,ivar,B.1.177 +214826,NC_045512.2,14408,C,T,PASS,428,2,426,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,B.1.177 +214826,NC_045512.2,16726,C,T,PASS,621,0,621,1.0,orf1ab,synonymous_variant,c.16461C>T,p.Tyr5487Tyr,p.Y5487Y,ivar,B.1.177 +214826,NC_045512.2,17658,G,T,PASS,6934,3,6903,1.0,orf1ab,missense_variant,c.17393G>T,p.Cys5798Phe,p.C5798F,ivar,B.1.177 +214826,NC_045512.2,20132,C,T,PASS,101,0,101,1.0,orf1ab,missense_variant,c.19867C>T,p.Pro6623Ser,p.P6623S,ivar,B.1.177 +214826,NC_045512.2,21255,G,C,PASS,91,0,91,1.0,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,B.1.177 +214826,NC_045512.2,21786,G,T,PASS,464,4,460,0.99,S,missense_variant,c.224G>T,p.Gly75Val,p.G75V,ivar,B.1.177 +214826,NC_045512.2,22227,C,T,PASS,31,0,31,1.0,S,missense_variant,c.665C>T,p.Ala222Val,p.A222V,ivar,B.1.177 +214826,NC_045512.2,23403,A,G,PASS,1826,2,1824,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.177 +214826,NC_045512.2,26801,C,G,PASS,363,0,362,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,B.1.177 +214826,NC_045512.2,28932,C,T,PASS,196,2,193,0.98,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,B.1.177 +214826,NC_045512.2,29645,G,T,PASS,46,0,46,1.0,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,B.1.177 +214826,NC_045512.2,3037,C,T,PASS,154,0,154,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.177 +214826,NC_045512.2,4158,C,T,PASS,112,81,31,0.28,orf1ab,missense_variant,c.3893C>T,p.Ala1298Val,p.A1298V,ivar,B.1.177 +214826,NC_045512.2,445,T,C,PASS,10150,5,10145,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,B.1.177 +214826,NC_045512.2,541,C,T,PASS,9501,38,9460,1.0,orf1ab,synonymous_variant,c.276C>T,p.Leu92Leu,p.L92L,ivar,B.1.177 +214826,NC_045512.2,5548,C,T,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.5283C>T,p.Thr1761Thr,p.T1761T,ivar,B.1.177 +214827,NC_045512.2,11132,G,T,PASS,36,0,36,1.0,orf1ab,missense_variant,c.10867G>T,p.Ala3623Ser,p.A3623S,ivar,Unassigned +214827,NC_045512.2,1123,A,T,PASS,161,3,158,0.98,orf1ab,synonymous_variant,c.858A>T,p.Pro286Pro,p.P286P,ivar,Unassigned +214827,NC_045512.2,14408,C,T,PASS,28,0,28,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +214827,NC_045512.2,16726,C,T,PASS,47,0,47,1.0,orf1ab,synonymous_variant,c.16461C>T,p.Tyr5487Tyr,p.Y5487Y,ivar,Unassigned +214827,NC_045512.2,17658,G,T,PASS,6726,0,6694,1.0,orf1ab,missense_variant,c.17393G>T,p.Cys5798Phe,p.C5798F,ivar,Unassigned +214827,NC_045512.2,21786,G,T,PASS,49,0,49,1.0,S,missense_variant,c.224G>T,p.Gly75Val,p.G75V,ivar,Unassigned +214827,NC_045512.2,23403,A,G,PASS,336,0,336,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214827,NC_045512.2,24368,G,C,PASS,73,23,49,0.67,S,missense_variant,c.2806G>C,p.Asp936His,p.D936H,ivar,Unassigned +214827,NC_045512.2,25112,G,T,PASS,90,10,80,0.89,S,missense_variant,c.3550G>T,p.Asp1184Tyr,p.D1184Y,ivar,Unassigned +214827,NC_045512.2,26297,T,C,PASS,652,476,176,0.27,E,missense_variant,c.53T>C,p.Leu18Pro,p.L18P,ivar,Unassigned +214827,NC_045512.2,26410,T,C,ft,10,6,4,0.4,E,missense_variant,c.166T>C,p.Phe56Leu,p.F56L,ivar,Unassigned +214827,NC_045512.2,26801,C,G,PASS,12,1,11,0.92,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,Unassigned +214827,NC_045512.2,28932,C,T,PASS,80,0,80,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,Unassigned +214827,NC_045512.2,3037,C,T,PASS,48,0,48,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +214827,NC_045512.2,445,T,C,PASS,3394,8,3386,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214827,NC_045512.2,541,C,T,PASS,3216,10,3206,1.0,orf1ab,synonymous_variant,c.276C>T,p.Leu92Leu,p.L92L,ivar,Unassigned +214827,NC_045512.2,7521,C,T,PASS,29,3,26,0.9,orf1ab,missense_variant,c.7256C>T,p.Thr2419Ile,p.T2419I,ivar,Unassigned +214827,NC_045512.2,76,T,A,ft,14,10,4,0.29,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +214827,NC_045512.2,78,T,G,ft,15,11,4,0.27,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +214827,NC_045512.2,8302,A,T,PASS,28,17,10,0.36,orf1ab,missense_variant,c.8037A>T,p.Lys2679Asn,p.K2679N,ivar,Unassigned +214828,NC_045512.2,11132,G,T,PASS,47,0,47,1.0,orf1ab,missense_variant,c.10867G>T,p.Ala3623Ser,p.A3623S,ivar,Unassigned +214828,NC_045512.2,14408,C,T,PASS,26,0,26,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +214828,NC_045512.2,14646,T,A,PASS,53,25,28,0.53,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +214828,NC_045512.2,16002,T,G,PASS,34,21,13,0.38,orf1ab,stop_gained,c.15737T>G,p.Leu5246*,p.L5246*,ivar,Unassigned +214828,NC_045512.2,16726,C,T,PASS,38,0,38,1.0,orf1ab,synonymous_variant,c.16461C>T,p.Tyr5487Tyr,p.Y5487Y,ivar,Unassigned +214828,NC_045512.2,17658,G,T,PASS,12609,1,12562,1.0,orf1ab,missense_variant,c.17393G>T,p.Cys5798Phe,p.C5798F,ivar,Unassigned +214828,NC_045512.2,19378,T,C,PASS,42,17,25,0.6,orf1ab,synonymous_variant,c.19113T>C,p.Ile6371Ile,p.I6371I,ivar,Unassigned +214828,NC_045512.2,20132,C,T,PASS,23,0,23,1.0,orf1ab,missense_variant,c.19867C>T,p.Pro6623Ser,p.P6623S,ivar,Unassigned +214828,NC_045512.2,21786,G,T,PASS,24,0,24,1.0,S,missense_variant,c.224G>T,p.Gly75Val,p.G75V,ivar,Unassigned +214828,NC_045512.2,23403,A,G,PASS,611,0,611,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214828,NC_045512.2,26801,C,G,PASS,178,0,178,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,Unassigned +214828,NC_045512.2,27856,T,A,PASS,53,13,40,0.75,ORF7b,missense_variant,c.101T>A,p.Leu34Gln,p.L34Q,ivar,Unassigned +214828,NC_045512.2,28932,C,T,PASS,210,0,210,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,Unassigned +214828,NC_045512.2,3037,C,T,PASS,30,0,30,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +214828,NC_045512.2,445,T,C,PASS,14026,6,14017,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214828,NC_045512.2,541,C,T,PASS,13166,68,13075,0.99,orf1ab,synonymous_variant,c.276C>T,p.Leu92Leu,p.L92L,ivar,Unassigned +214829,NC_045512.2,10833,C,T,PASS,17,0,17,1.0,orf1ab,missense_variant,c.10568C>T,p.Ala3523Val,p.A3523V,ivar,B.1.177 +214829,NC_045512.2,11824,C,T,PASS,73,0,73,1.0,orf1ab,synonymous_variant,c.11559C>T,p.Ile3853Ile,p.I3853I,ivar,B.1.177 +214829,NC_045512.2,14408,C,T,PASS,148,0,148,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,B.1.177 +214829,NC_045512.2,16308,C,T,PASS,433,3,428,0.99,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,B.1.177 +214829,NC_045512.2,21255,G,C,PASS,73,0,73,1.0,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,B.1.177 +214829,NC_045512.2,222,C,T,PASS,158,1,157,0.99,orf1ab,upstream_gene_variant,c.-44C>T,.,.,ivar,B.1.177 +214829,NC_045512.2,22482,C,T,PASS,39,20,19,0.49,S,missense_variant,c.920C>T,p.Thr307Ile,p.T307I,ivar,B.1.177 +214829,NC_045512.2,23403,A,G,PASS,2780,6,2774,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.177 +214829,NC_045512.2,24370,C,T,PASS,443,4,439,0.99,S,synonymous_variant,c.2808C>T,p.Asp936Asp,p.D936D,ivar,B.1.177 +214829,NC_045512.2,26801,C,G,PASS,694,0,694,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,B.1.177 +214829,NC_045512.2,28093,C,T,PASS,592,1,590,1.0,ORF8,missense_variant,c.200C>T,p.Ser67Phe,p.S67F,ivar,B.1.177 +214829,NC_045512.2,28486,C,T,PASS,493,2,491,1.0,N,synonymous_variant,c.213C>T,p.Gly71Gly,p.G71G,ivar,B.1.177 +214829,NC_045512.2,28932,C,T,PASS,556,1,554,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,B.1.177 +214829,NC_045512.2,29402,G,T,PASS,5976,2,5958,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,B.1.177 +214829,NC_045512.2,29578,C,T,PASS,99,5,94,0.95,ORF10,synonymous_variant,c.21C>T,p.Phe7Phe,p.F7F,ivar,B.1.177 +214829,NC_045512.2,29645,G,T,PASS,117,0,116,0.99,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,B.1.177 +214829,NC_045512.2,3037,C,T,PASS,260,0,260,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.177 +214829,NC_045512.2,445,T,C,PASS,13196,11,13183,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,B.1.177 +214829,NC_045512.2,527,C,T,PASS,11571,51,11487,0.99,orf1ab,synonymous_variant,c.262C>T,p.Leu88Leu,p.L88L,ivar,B.1.177 +214829,NC_045512.2,5869,C,T,PASS,192,0,192,1.0,orf1ab,synonymous_variant,c.5604C>T,p.Tyr1868Tyr,p.Y1868Y,ivar,B.1.177 +214829,NC_045512.2,8326,C,T,PASS,2135,898,1233,0.58,orf1ab,synonymous_variant,c.8061C>T,p.Asp2687Asp,p.D2687D,ivar,B.1.177 +214830,NC_045512.2,10833,C,T,PASS,17,0,17,1.0,orf1ab,missense_variant,c.10568C>T,p.Ala3523Val,p.A3523V,ivar,B.1.177 +214830,NC_045512.2,11824,C,T,PASS,54,0,54,1.0,orf1ab,synonymous_variant,c.11559C>T,p.Ile3853Ile,p.I3853I,ivar,B.1.177 +214830,NC_045512.2,14408,C,T,PASS,160,2,158,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,B.1.177 +214830,NC_045512.2,16308,C,T,PASS,487,3,484,0.99,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,B.1.177 +214830,NC_045512.2,21255,G,C,PASS,56,0,56,1.0,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,B.1.177 +214830,NC_045512.2,222,C,T,PASS,52,0,52,1.0,orf1ab,upstream_gene_variant,c.-44C>T,.,.,ivar,B.1.177 +214830,NC_045512.2,22482,C,T,PASS,31,0,31,1.0,S,missense_variant,c.920C>T,p.Thr307Ile,p.T307I,ivar,B.1.177 +214830,NC_045512.2,23403,A,G,PASS,2950,2,2944,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.177 +214830,NC_045512.2,24370,C,T,PASS,234,1,233,1.0,S,synonymous_variant,c.2808C>T,p.Asp936Asp,p.D936D,ivar,B.1.177 +214830,NC_045512.2,26801,C,G,PASS,442,0,442,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,B.1.177 +214830,NC_045512.2,28093,C,T,PASS,632,8,622,0.98,ORF8,missense_variant,c.200C>T,p.Ser67Phe,p.S67F,ivar,B.1.177 +214830,NC_045512.2,28486,C,T,PASS,414,2,410,0.99,N,synonymous_variant,c.213C>T,p.Gly71Gly,p.G71G,ivar,B.1.177 +214830,NC_045512.2,28932,C,T,PASS,811,1,809,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,B.1.177 +214830,NC_045512.2,29402,G,T,PASS,7244,1,7211,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,B.1.177 +214830,NC_045512.2,29578,C,T,PASS,101,0,101,1.0,ORF10,synonymous_variant,c.21C>T,p.Phe7Phe,p.F7F,ivar,B.1.177 +214830,NC_045512.2,29645,G,T,PASS,106,0,106,1.0,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,B.1.177 +214830,NC_045512.2,3037,C,T,PASS,259,0,259,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.177 +214830,NC_045512.2,445,T,C,PASS,12135,22,12111,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,B.1.177 +214830,NC_045512.2,5274,C,T,PASS,12,0,12,1.0,orf1ab,missense_variant,c.5009C>T,p.Ala1670Val,p.A1670V,ivar,B.1.177 +214830,NC_045512.2,527,C,T,PASS,11747,79,11647,0.99,orf1ab,synonymous_variant,c.262C>T,p.Leu88Leu,p.L88L,ivar,B.1.177 +214830,NC_045512.2,5869,C,T,PASS,58,2,54,0.93,orf1ab,synonymous_variant,c.5604C>T,p.Tyr1868Tyr,p.Y1868Y,ivar,B.1.177 +214831,NC_045512.2,1009,C,A,ft,12,7,5,0.42,orf1ab,missense_variant,c.744C>A,p.Ser248Arg,p.S248R,ivar,Unassigned +214831,NC_045512.2,13408,T,C,ft,13,9,4,0.31,orf1ab,synonymous_variant,c.13143T>C,p.Cys4381Cys,p.C4381C,ivar,Unassigned +214831,NC_045512.2,14646,T,A,ft,15,9,6,0.4,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +214831,NC_045512.2,14647,A,C,ft,15,9,6,0.4,orf1ab,synonymous_variant,c.14382A>C,p.Leu4794Leu,p.L4794L,ivar,Unassigned +214831,NC_045512.2,14661,T,G,PASS,13,7,6,0.46,orf1ab,missense_variant,c.14396T>G,p.Phe4799Cys,p.F4799C,ivar,Unassigned +214831,NC_045512.2,15604,A,T,ft,11,7,4,0.36,orf1ab,synonymous_variant,c.15339A>T,p.Ala5113Ala,p.A5113A,ivar,Unassigned +214831,NC_045512.2,17375,C,T,PASS,344,195,148,0.43,orf1ab,stop_gained,c.17110C>T,p.Gln5704*,p.Q5704*,ivar,Unassigned +214831,NC_045512.2,17469,G,T,PASS,472,337,127,0.27,orf1ab,missense_variant,c.17204G>T,p.Cys5735Phe,p.C5735F,ivar,Unassigned +214831,NC_045512.2,23333,T,C,PASS,54,30,24,0.44,S,missense_variant,c.1771T>C,p.Ser591Pro,p.S591P,ivar,Unassigned +214831,NC_045512.2,23403,A,G,PASS,59,0,59,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214831,NC_045512.2,28301,C,A,ft,10,6,4,0.4,N,synonymous_variant,c.28C>A,p.Arg10Arg,p.R10R,ivar,Unassigned +214831,NC_045512.2,29402,G,T,PASS,143,2,141,0.99,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,Unassigned +214831,NC_045512.2,3154,T,A,ft,11,8,3,0.27,orf1ab,synonymous_variant,c.2889T>A,p.Gly963Gly,p.G963G,ivar,Unassigned +214831,NC_045512.2,3443,G,T,PASS,17,9,8,0.47,orf1ab,missense_variant,c.3178G>T,p.Ala1060Ser,p.A1060S,ivar,Unassigned +214831,NC_045512.2,3748,T,C,ft,12,8,4,0.33,orf1ab,synonymous_variant,c.3483T>C,p.Ser1161Ser,p.S1161S,ivar,Unassigned +214831,NC_045512.2,445,T,C,PASS,1048,6,1042,0.99,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214831,NC_045512.2,527,C,T,PASS,951,313,636,0.67,orf1ab,synonymous_variant,c.262C>T,p.Leu88Leu,p.L88L,ivar,Unassigned +214833,NC_045512.2,14408,C,T,PASS,15,0,15,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +214833,NC_045512.2,14595,T,G,ft,23,17,6,0.26,orf1ab,missense_variant,c.14330T>G,p.Val4777Gly,p.V4777G,ivar,Unassigned +214833,NC_045512.2,16308,C,T,PASS,50,0,50,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,Unassigned +214833,NC_045512.2,17213,C,T,PASS,14,4,10,0.71,orf1ab,synonymous_variant,c.16948C>T,p.Leu5650Leu,p.L5650L,ivar,Unassigned +214833,NC_045512.2,21563,A,G,PASS,36,27,9,0.25,S,start_lost,c.1A>G,p.Met1?,p.M1?,ivar,Unassigned +214833,NC_045512.2,23403,A,G,PASS,171,0,171,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214833,NC_045512.2,24370,C,T,PASS,15,0,15,1.0,S,synonymous_variant,c.2808C>T,p.Asp936Asp,p.D936D,ivar,Unassigned +214833,NC_045512.2,26114,A,G,ft,15,11,4,0.27,ORF3a,missense_variant,c.722A>G,p.Glu241Gly,p.E241G,ivar,Unassigned +214833,NC_045512.2,26663,T,C,PASS,33,19,14,0.42,M,synonymous_variant,c.141T>C,p.Tyr47Tyr,p.Y47Y,ivar,Unassigned +214833,NC_045512.2,26801,C,G,PASS,50,0,50,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,Unassigned +214833,NC_045512.2,28093,C,T,PASS,54,2,51,0.94,ORF8,missense_variant,c.200C>T,p.Ser67Phe,p.S67F,ivar,Unassigned +214833,NC_045512.2,28932,C,T,PASS,45,0,45,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,Unassigned +214833,NC_045512.2,29402,G,T,PASS,1250,2,1245,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,Unassigned +214833,NC_045512.2,2966,A,G,PASS,36,23,13,0.36,orf1ab,missense_variant,c.2701A>G,p.Ser901Gly,p.S901G,ivar,Unassigned +214833,NC_045512.2,3037,C,T,PASS,28,0,28,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +214833,NC_045512.2,445,T,C,PASS,5706,6,5700,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214833,NC_045512.2,5869,C,T,ft,16,12,4,0.25,orf1ab,synonymous_variant,c.5604C>T,p.Tyr1868Tyr,p.Y1868Y,ivar,Unassigned +214833,NC_045512.2,7948,C,T,PASS,109,25,84,0.77,orf1ab,synonymous_variant,c.7683C>T,p.Tyr2561Tyr,p.Y2561Y,ivar,Unassigned +214834,NC_045512.2,11824,C,T,PASS,18,0,18,1.0,orf1ab,synonymous_variant,c.11559C>T,p.Ile3853Ile,p.I3853I,ivar,B.1.177 +214834,NC_045512.2,14408,C,T,PASS,137,0,137,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,B.1.177 +214834,NC_045512.2,16308,C,T,PASS,255,0,255,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,B.1.177 +214834,NC_045512.2,21255,G,C,PASS,44,0,42,0.95,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,B.1.177 +214834,NC_045512.2,222,C,T,PASS,17,0,17,1.0,orf1ab,upstream_gene_variant,c.-44C>T,.,.,ivar,B.1.177 +214834,NC_045512.2,23403,A,G,PASS,1324,1,1323,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.177 +214834,NC_045512.2,24370,C,T,PASS,87,1,86,0.99,S,synonymous_variant,c.2808C>T,p.Asp936Asp,p.D936D,ivar,B.1.177 +214834,NC_045512.2,26801,C,G,PASS,95,0,95,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,B.1.177 +214834,NC_045512.2,28093,C,T,PASS,232,3,229,0.99,ORF8,missense_variant,c.200C>T,p.Ser67Phe,p.S67F,ivar,B.1.177 +214834,NC_045512.2,28932,C,T,PASS,295,1,294,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,B.1.177 +214834,NC_045512.2,29402,G,T,PASS,4981,0,4965,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,B.1.177 +214834,NC_045512.2,29578,C,T,PASS,13,0,13,1.0,ORF10,synonymous_variant,c.21C>T,p.Phe7Phe,p.F7F,ivar,B.1.177 +214834,NC_045512.2,29645,G,T,PASS,17,0,17,1.0,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,B.1.177 +214834,NC_045512.2,29681,TTA,T,PASS,20,20,12,0.6,S,downstream_gene_variant,c.*4298_*4299delTA,.,.,ivar,B.1.177 +214834,NC_045512.2,3037,C,T,PASS,72,1,71,0.99,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.177 +214834,NC_045512.2,3092,C,T,PASS,76,16,60,0.79,orf1ab,missense_variant,c.2827C>T,p.Pro943Ser,p.P943S,ivar,B.1.177 +214834,NC_045512.2,445,T,C,PASS,21304,21,21277,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,B.1.177 +214834,NC_045512.2,8807,G,A,ft,10,7,3,0.3,orf1ab,missense_variant,c.8542G>A,p.Asp2848Asn,p.D2848N,ivar,B.1.177 +214835,NC_045512.2,10741,CT,C,PASS,25,25,22,0.88,orf1ab,frameshift_variant,c.10479delT,p.Phe3493fs,p.F3493fs,ivar,Unassigned +214835,NC_045512.2,11808,G,C,ft,12,8,4,0.33,orf1ab,missense_variant,c.11543G>C,p.Gly3848Ala,p.G3848A,ivar,Unassigned +214835,NC_045512.2,11824,C,T,PASS,12,0,12,1.0,orf1ab,synonymous_variant,c.11559C>T,p.Ile3853Ile,p.I3853I,ivar,Unassigned +214835,NC_045512.2,14408,C,T,PASS,62,0,62,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +214835,NC_045512.2,16308,C,T,PASS,142,1,139,0.98,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,Unassigned +214835,NC_045512.2,16988,G,A,PASS,28,18,10,0.36,orf1ab,missense_variant,c.16723G>A,p.Ala5575Thr,p.A5575T,ivar,Unassigned +214835,NC_045512.2,19307,A,G,PASS,32,23,9,0.28,orf1ab,missense_variant,c.19042A>G,p.Asn6348Asp,p.N6348D,ivar,Unassigned +214835,NC_045512.2,21255,G,C,PASS,11,0,11,1.0,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,Unassigned +214835,NC_045512.2,23403,A,G,PASS,183,0,183,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214835,NC_045512.2,23826,A,G,ft,15,11,4,0.27,S,missense_variant,c.2264A>G,p.Gln755Arg,p.Q755R,ivar,Unassigned +214835,NC_045512.2,24370,C,T,PASS,36,1,35,0.97,S,synonymous_variant,c.2808C>T,p.Asp936Asp,p.D936D,ivar,Unassigned +214835,NC_045512.2,26239,G,T,PASS,57,27,30,0.53,E,upstream_gene_variant,c.-6G>T,.,.,ivar,Unassigned +214835,NC_045512.2,26801,C,G,PASS,48,0,48,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,Unassigned +214835,NC_045512.2,27518,G,A,PASS,84,60,24,0.29,ORF7a,missense_variant,c.125G>A,p.Gly42Asp,p.G42D,ivar,Unassigned +214835,NC_045512.2,28093,C,T,PASS,31,1,30,0.97,ORF8,missense_variant,c.200C>T,p.Ser67Phe,p.S67F,ivar,Unassigned +214835,NC_045512.2,28932,C,T,PASS,138,1,137,0.99,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,Unassigned +214835,NC_045512.2,29402,G,T,PASS,2177,0,2173,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,Unassigned +214835,NC_045512.2,29578,C,T,PASS,24,0,24,1.0,ORF10,synonymous_variant,c.21C>T,p.Phe7Phe,p.F7F,ivar,Unassigned +214835,NC_045512.2,29645,G,T,PASS,25,0,25,1.0,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,Unassigned +214835,NC_045512.2,29681,TTA,T,PASS,22,22,13,0.59,S,downstream_gene_variant,c.*4298_*4299delTA,.,.,ivar,Unassigned +214835,NC_045512.2,3037,C,T,PASS,26,0,26,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +214835,NC_045512.2,3092,C,T,PASS,27,15,12,0.44,orf1ab,missense_variant,c.2827C>T,p.Pro943Ser,p.P943S,ivar,Unassigned +214835,NC_045512.2,4079,AGT,A,PASS,74,74,60,0.81,orf1ab,frameshift_variant,c.3816_3817delTG,p.Ser1272fs,p.S1272fs,ivar,Unassigned +214835,NC_045512.2,445,T,C,PASS,10789,17,10764,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214835,NC_045512.2,5869,C,T,PASS,51,36,15,0.29,orf1ab,synonymous_variant,c.5604C>T,p.Tyr1868Tyr,p.Y1868Y,ivar,Unassigned +214835,NC_045512.2,7639,C,T,PASS,45,33,12,0.27,orf1ab,synonymous_variant,c.7374C>T,p.Phe2458Phe,p.F2458F,ivar,Unassigned +214835,NC_045512.2,8596,A,T,PASS,238,177,61,0.26,orf1ab,synonymous_variant,c.8331A>T,p.Thr2777Thr,p.T2777T,ivar,Unassigned +214836,NC_045512.2,14408,C,T,PASS,51,0,51,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +214836,NC_045512.2,17017,G,A,PASS,23,15,8,0.35,orf1ab,missense_variant,c.16752G>A,p.Met5584Ile,p.M5584I,ivar,Unassigned +214836,NC_045512.2,18774,A,G,PASS,37,27,10,0.27,orf1ab,missense_variant,c.18509A>G,p.Asn6170Ser,p.N6170S,ivar,Unassigned +214836,NC_045512.2,21747,T,G,PASS,35,23,12,0.34,S,missense_variant,c.185T>G,p.Val62Gly,p.V62G,ivar,Unassigned +214836,NC_045512.2,23403,A,G,PASS,149,0,149,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214836,NC_045512.2,24370,C,T,PASS,74,1,73,0.99,S,synonymous_variant,c.2808C>T,p.Asp936Asp,p.D936D,ivar,Unassigned +214836,NC_045512.2,26263,G,C,PASS,14,4,10,0.71,E,missense_variant,c.19G>C,p.Glu7Gln,p.E7Q,ivar,Unassigned +214836,NC_045512.2,27767,T,C,ft,11,8,3,0.27,ORF7b,synonymous_variant,c.12T>C,p.Leu4Leu,p.L4L,ivar,Unassigned +214836,NC_045512.2,28093,C,T,PASS,80,0,80,1.0,ORF8,missense_variant,c.200C>T,p.Ser67Phe,p.S67F,ivar,Unassigned +214836,NC_045512.2,28932,C,T,PASS,43,1,42,0.98,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,Unassigned +214836,NC_045512.2,29402,G,T,PASS,4413,0,4393,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,Unassigned +214836,NC_045512.2,3037,C,T,PASS,18,0,18,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +214836,NC_045512.2,3092,C,T,ft,19,14,5,0.26,orf1ab,missense_variant,c.2827C>T,p.Pro943Ser,p.P943S,ivar,Unassigned +214836,NC_045512.2,445,T,C,PASS,3540,3,3537,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214836,NC_045512.2,8317,A,G,PASS,45,33,12,0.27,orf1ab,synonymous_variant,c.8052A>G,p.Thr2684Thr,p.T2684T,ivar,Unassigned +214837,NC_045512.2,10257,T,C,ft,16,12,4,0.25,orf1ab,missense_variant,c.9992T>C,p.Val3331Ala,p.V3331A,ivar,B.1.177 +214837,NC_045512.2,14408,C,T,PASS,55,0,55,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,B.1.177 +214837,NC_045512.2,14946,T,TG,ft,14,14,4,0.29,orf1ab,frameshift_variant,c.14683dupG,p.Val4895fs,p.V4895fs,ivar,B.1.177 +214837,NC_045512.2,14958,T,A,ft,14,10,4,0.29,orf1ab,stop_gained,c.14693T>A,p.Leu4898*,p.L4898*,ivar,B.1.177 +214837,NC_045512.2,16308,C,T,PASS,75,0,75,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,B.1.177 +214837,NC_045512.2,21255,G,C,PASS,19,0,19,1.0,orf1ab,missense_variant,c.20990G>C,p.Arg6997Pro,p.R6997P,ivar,B.1.177 +214837,NC_045512.2,222,C,T,PASS,35,0,35,1.0,orf1ab,upstream_gene_variant,c.-44C>T,.,.,ivar,B.1.177 +214837,NC_045512.2,23403,A,G,PASS,728,0,728,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.177 +214837,NC_045512.2,24370,C,T,PASS,63,1,62,0.98,S,synonymous_variant,c.2808C>T,p.Asp936Asp,p.D936D,ivar,B.1.177 +214837,NC_045512.2,26801,C,G,PASS,163,0,163,1.0,M,synonymous_variant,c.279C>G,p.Leu93Leu,p.L93L,ivar,B.1.177 +214837,NC_045512.2,28093,C,T,PASS,85,0,85,1.0,ORF8,missense_variant,c.200C>T,p.Ser67Phe,p.S67F,ivar,B.1.177 +214837,NC_045512.2,28486,C,T,PASS,108,19,89,0.82,N,synonymous_variant,c.213C>T,p.Gly71Gly,p.G71G,ivar,B.1.177 +214837,NC_045512.2,28932,C,T,PASS,118,0,118,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,B.1.177 +214837,NC_045512.2,29402,G,T,PASS,1594,0,1584,0.99,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,B.1.177 +214837,NC_045512.2,29578,C,T,PASS,14,0,14,1.0,ORF10,synonymous_variant,c.21C>T,p.Phe7Phe,p.F7F,ivar,B.1.177 +214837,NC_045512.2,29645,G,T,PASS,17,0,17,1.0,ORF10,missense_variant,c.88G>T,p.Val30Leu,p.V30L,ivar,B.1.177 +214837,NC_045512.2,3037,C,T,PASS,59,0,59,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.177 +214837,NC_045512.2,3958,A,C,PASS,178,56,122,0.69,orf1ab,missense_variant,c.3693A>C,p.Lys1231Asn,p.K1231N,ivar,B.1.177 +214837,NC_045512.2,3963,A,T,PASS,194,56,138,0.71,orf1ab,missense_variant,c.3698A>T,p.Lys1233Ile,p.K1233I,ivar,B.1.177 +214837,NC_045512.2,4082,G,A,PASS,39,16,23,0.59,orf1ab,missense_variant,c.3817G>A,p.Asp1273Asn,p.D1273N,ivar,B.1.177 +214837,NC_045512.2,445,T,C,PASS,19439,22,19412,1.0,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,B.1.177 +214837,NC_045512.2,527,C,T,PASS,17846,3108,14730,0.83,orf1ab,synonymous_variant,c.262C>T,p.Leu88Leu,p.L88L,ivar,B.1.177 +214837,NC_045512.2,5869,C,T,PASS,29,0,29,1.0,orf1ab,synonymous_variant,c.5604C>T,p.Tyr1868Tyr,p.Y1868Y,ivar,B.1.177 +214837,NC_045512.2,76,T,A,ft,14,10,4,0.29,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,B.1.177 +214837,NC_045512.2,78,T,G,ft,14,10,4,0.29,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,B.1.177 +214838,NC_045512.2,16276,T,C,ft,10,6,4,0.4,orf1ab,synonymous_variant,c.16011T>C,p.His5337His,p.H5337H,ivar,Unassigned +214838,NC_045512.2,23403,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +214838,NC_045512.2,28932,C,T,PASS,10,0,10,1.0,N,missense_variant,c.659C>T,p.Ala220Val,p.A220V,ivar,Unassigned +214838,NC_045512.2,28951,A,G,ft,12,9,3,0.25,N,synonymous_variant,c.678A>G,p.Arg226Arg,p.R226R,ivar,Unassigned +214838,NC_045512.2,29402,G,T,PASS,60,0,60,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,Unassigned +214838,NC_045512.2,445,T,C,PASS,256,3,253,0.99,orf1ab,synonymous_variant,c.180T>C,p.Val60Val,p.V60V,ivar,Unassigned +214838,NC_045512.2,527,C,T,PASS,197,18,179,0.91,orf1ab,synonymous_variant,c.262C>T,p.Leu88Leu,p.L88L,ivar,Unassigned +214838,NC_045512.2,7948,C,T,ft,10,7,3,0.3,orf1ab,synonymous_variant,c.7683C>T,p.Tyr2561Tyr,p.Y2561Y,ivar,Unassigned +214838,NC_045512.2,9711,CCA,C,PASS,40,40,14,0.35,orf1ab,frameshift_variant,c.9449_9450delCA,p.Thr3150fs,p.T3150fs,ivar,Unassigned +220338,NC_045512.2,10029,C,T,PASS,13,0,13,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220338,NC_045512.2,10449,C,A,PASS,19,0,19,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220338,NC_045512.2,11282,AGTTTGTCTG,A,PASS,80,79,66,0.82,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220338,NC_045512.2,11537,A,G,PASS,39,0,39,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220338,NC_045512.2,13195,T,C,PASS,727,1,726,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220338,NC_045512.2,14408,C,T,PASS,10,0,10,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220338,NC_045512.2,16744,G,A,PASS,185,0,185,1.0,orf1ab,synonymous_variant,c.16479G>A,p.Leu5493Leu,p.L5493L,ivar,Unassigned +220338,NC_045512.2,21762,C,T,PASS,10,0,10,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220338,NC_045512.2,21764,ATACATG,A,PASS,10,10,10,1.0,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220338,NC_045512.2,21846,C,T,PASS,19,0,19,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220338,NC_045512.2,23403,A,G,PASS,732,2,730,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220338,NC_045512.2,23525,C,T,PASS,709,4,704,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220338,NC_045512.2,23599,T,G,PASS,254,0,254,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220338,NC_045512.2,23604,C,A,PASS,244,0,242,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220338,NC_045512.2,23854,C,A,PASS,10,0,10,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220338,NC_045512.2,23948,G,T,PASS,97,0,96,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220338,NC_045512.2,24130,C,A,PASS,162,0,162,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220338,NC_045512.2,24424,A,T,PASS,75,0,75,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220338,NC_045512.2,24469,T,A,PASS,127,0,127,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220338,NC_045512.2,24503,C,T,PASS,137,1,136,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220338,NC_045512.2,25000,C,T,PASS,12,0,12,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220338,NC_045512.2,25584,C,T,PASS,26,0,25,0.96,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220338,NC_045512.2,26270,C,T,PASS,91,0,91,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220338,NC_045512.2,26530,A,G,PASS,11,0,11,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220338,NC_045512.2,26577,C,G,PASS,11,0,11,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220338,NC_045512.2,27259,A,C,PASS,38,0,38,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220338,NC_045512.2,28271,A,T,PASS,36,0,36,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220338,NC_045512.2,28311,C,T,PASS,39,0,39,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220338,NC_045512.2,2832,A,G,PASS,24,0,24,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220338,NC_045512.2,28361,GGAGAACGCA,G,PASS,32,32,27,0.84,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220338,NC_045512.2,28881,GG,AA,PASS,1617,29,1583,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220338,NC_045512.2,28883,G,C,PASS,1606,0,1604,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220338,NC_045512.2,811,C,T,PASS,21,0,21,1.0,orf1ab,synonymous_variant,c.546C>T,p.Tyr182Tyr,p.Y182Y,ivar,Unassigned +220338,NC_045512.2,8393,G,A,PASS,1157,5,1150,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220339,NC_045512.2,10029,C,T,PASS,72,0,72,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220339,NC_045512.2,10449,C,A,PASS,257,0,257,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220339,NC_045512.2,11282,AGTTTGTCTG,A,PASS,507,502,426,0.84,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220339,NC_045512.2,11537,A,G,PASS,561,0,559,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220339,NC_045512.2,13195,T,C,PASS,3850,4,3846,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220339,NC_045512.2,14408,C,T,PASS,235,0,235,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220339,NC_045512.2,15240,C,T,PASS,83,0,83,1.0,orf1ab,missense_variant,c.14975C>T,p.Thr4992Ile,p.T4992I,ivar,BA.1.17 +220339,NC_045512.2,18163,A,G,PASS,92,0,92,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220339,NC_045512.2,21762,C,T,PASS,187,2,185,0.99,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220339,NC_045512.2,21764,ATACATG,A,PASS,192,189,166,0.86,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220339,NC_045512.2,21846,C,T,PASS,155,0,154,0.99,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220339,NC_045512.2,22578,G,A,PASS,38,0,38,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220339,NC_045512.2,22673,TC,CT,PASS,20,0,20,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220339,NC_045512.2,22679,T,C,PASS,23,0,23,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220339,NC_045512.2,22686,C,T,PASS,24,0,24,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220339,NC_045512.2,23075,T,C,PASS,12,0,12,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.17 +220339,NC_045512.2,23202,C,A,PASS,28,0,28,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220339,NC_045512.2,23403,A,G,PASS,2573,5,2568,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220339,NC_045512.2,23525,C,T,PASS,2372,11,2360,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220339,NC_045512.2,23599,T,G,PASS,877,0,877,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220339,NC_045512.2,23604,C,A,PASS,845,0,842,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220339,NC_045512.2,23854,C,A,PASS,175,2,171,0.98,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220339,NC_045512.2,23948,G,T,PASS,1194,2,1189,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220339,NC_045512.2,24130,C,A,PASS,1645,0,1631,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220339,NC_045512.2,24424,A,T,PASS,520,0,514,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220339,NC_045512.2,24469,T,A,PASS,983,2,973,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220339,NC_045512.2,24503,C,T,PASS,1108,16,1091,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220339,NC_045512.2,25000,C,T,PASS,98,1,97,0.99,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220339,NC_045512.2,25584,C,T,PASS,263,0,261,0.99,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220339,NC_045512.2,26270,C,T,PASS,738,4,731,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220339,NC_045512.2,26530,A,G,PASS,278,0,278,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220339,NC_045512.2,26568,C,T,PASS,248,2,245,0.99,M,missense_variant,c.46C>T,p.Leu16Phe,p.L16F,ivar,BA.1.17 +220339,NC_045512.2,26577,C,G,PASS,245,0,245,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220339,NC_045512.2,26709,G,A,PASS,271,0,269,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220339,NC_045512.2,27259,A,C,PASS,717,0,715,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220339,NC_045512.2,27807,C,T,PASS,258,2,256,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220339,NC_045512.2,28271,A,T,PASS,1012,1,1010,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220339,NC_045512.2,28311,C,T,PASS,951,3,944,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220339,NC_045512.2,2832,A,G,PASS,344,0,344,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220339,NC_045512.2,28361,GGAGAACGCA,G,PASS,754,751,516,0.68,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220339,NC_045512.2,28881,GG,AA,PASS,1968,19,1949,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220339,NC_045512.2,28883,G,C,PASS,1960,1,1959,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220339,NC_045512.2,29772,T,C,PASS,160,1,159,0.99,S,downstream_gene_variant,c.*4388T>C,.,.,ivar,BA.1.17 +220339,NC_045512.2,3037,C,T,PASS,42,0,42,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220339,NC_045512.2,5386,T,G,PASS,38,0,38,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.17 +220339,NC_045512.2,5672,C,T,PASS,269,2,267,0.99,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220339,NC_045512.2,5924,G,A,PASS,113,1,112,0.99,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220339,NC_045512.2,76,T,A,PASS,48,36,12,0.25,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.17 +220339,NC_045512.2,78,T,G,PASS,48,36,12,0.25,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.17 +220339,NC_045512.2,8393,G,A,PASS,2673,17,2656,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220339,NC_045512.2,8652,T,C,PASS,2561,2,2559,1.0,orf1ab,missense_variant,c.8387T>C,p.Met2796Thr,p.M2796T,ivar,BA.1.17 +220407,NC_045512.2,13195,T,C,PASS,34,0,34,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220407,NC_045512.2,23403,A,G,PASS,51,0,51,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220407,NC_045512.2,23525,C,T,PASS,42,0,42,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220407,NC_045512.2,23599,T,G,PASS,12,0,12,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220407,NC_045512.2,23604,C,A,PASS,10,0,10,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220407,NC_045512.2,28881,GG,AA,PASS,133,1,132,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220407,NC_045512.2,28883,G,C,PASS,133,0,132,0.99,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220407,NC_045512.2,8393,G,A,PASS,27,0,27,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220433,NC_045512.2,13195,T,C,PASS,59,2,57,0.97,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220433,NC_045512.2,1437,C,T,PASS,15,0,15,1.0,orf1ab,missense_variant,c.1172C>T,p.Ser391Phe,p.S391F,ivar,Unassigned +220433,NC_045512.2,1451,A,T,PASS,15,8,7,0.47,orf1ab,missense_variant,c.1186A>T,p.Ile396Phe,p.I396F,ivar,Unassigned +220433,NC_045512.2,21846,C,T,PASS,13,0,13,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220433,NC_045512.2,23403,A,G,PASS,18,0,18,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220433,NC_045512.2,23525,C,T,PASS,11,0,11,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220433,NC_045512.2,23987,C,T,PASS,13,4,9,0.69,S,missense_variant,c.2425C>T,p.Pro809Ser,p.P809S,ivar,Unassigned +220433,NC_045512.2,24123,A,C,PASS,19,8,11,0.58,S,missense_variant,c.2561A>C,p.Lys854Thr,p.K854T,ivar,Unassigned +220433,NC_045512.2,24130,C,A,PASS,18,0,18,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220433,NC_045512.2,24469,T,A,PASS,13,0,13,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220433,NC_045512.2,24503,C,T,PASS,18,0,18,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220433,NC_045512.2,26270,C,T,PASS,234,0,234,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220433,NC_045512.2,26577,C,G,PASS,11,0,11,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220433,NC_045512.2,26862,C,A,ft,13,8,5,0.38,M,missense_variant,c.340C>A,p.Pro114Thr,p.P114T,ivar,Unassigned +220433,NC_045512.2,28271,A,T,PASS,67,0,67,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220433,NC_045512.2,28311,C,T,PASS,83,2,81,0.98,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220433,NC_045512.2,28881,GG,AA,PASS,429,0,425,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220433,NC_045512.2,28883,G,C,PASS,431,0,429,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220433,NC_045512.2,8393,G,A,PASS,366,0,366,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220518,NC_045512.2,12834,AT,A,ft,12,12,5,0.42,orf1ab,frameshift_variant,c.12572delT,p.Leu4191fs,p.L4191fs,ivar,Unassigned +220518,NC_045512.2,13195,T,C,PASS,68,0,68,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220518,NC_045512.2,23403,A,G,PASS,35,0,35,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220518,NC_045512.2,23525,C,T,PASS,51,0,51,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220518,NC_045512.2,23599,T,G,PASS,28,0,28,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220518,NC_045512.2,23604,C,A,PASS,28,0,28,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220518,NC_045512.2,23948,G,T,PASS,17,0,17,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220518,NC_045512.2,24130,C,A,PASS,23,0,23,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220518,NC_045512.2,24469,T,A,PASS,32,0,32,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220518,NC_045512.2,24503,C,T,PASS,46,0,46,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220518,NC_045512.2,26270,C,T,PASS,340,2,336,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220518,NC_045512.2,27259,A,C,PASS,12,0,12,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220518,NC_045512.2,28271,A,T,PASS,92,0,92,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220518,NC_045512.2,28311,C,T,PASS,104,2,102,0.98,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220518,NC_045512.2,28361,GGAGAACGCA,G,PASS,81,81,30,0.37,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220518,NC_045512.2,28881,GG,AA,PASS,569,4,565,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220518,NC_045512.2,28883,G,C,PASS,566,0,565,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220518,NC_045512.2,3183,A,G,PASS,48,22,26,0.54,orf1ab,missense_variant,c.2918A>G,p.Glu973Gly,p.E973G,ivar,Unassigned +220518,NC_045512.2,514,TGTTATG,T,PASS,205,205,169,0.82,orf1ab,conservative_inframe_deletion,c.253_258delATGGTT,p.Met85_Val86del,p.M85_V86del,ivar,Unassigned +220518,NC_045512.2,8393,G,A,PASS,779,2,777,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220529,NC_045512.2,10029,C,T,PASS,13,0,13,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220529,NC_045512.2,10449,C,A,PASS,44,0,44,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220529,NC_045512.2,11282,AGTTTGTCTG,A,PASS,182,178,164,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220529,NC_045512.2,11537,A,G,PASS,173,0,173,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220529,NC_045512.2,13195,T,C,PASS,4328,6,4322,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220529,NC_045512.2,14408,C,T,PASS,79,4,75,0.95,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220529,NC_045512.2,16064,A,G,PASS,116,0,116,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220529,NC_045512.2,16308,C,T,PASS,220,0,220,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,Unassigned +220529,NC_045512.2,18163,A,G,PASS,14,0,14,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,Unassigned +220529,NC_045512.2,21762,C,T,PASS,119,0,119,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220529,NC_045512.2,21764,ATACATG,A,PASS,122,119,108,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220529,NC_045512.2,21846,C,T,PASS,100,2,96,0.96,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220529,NC_045512.2,22578,G,A,PASS,13,0,13,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,Unassigned +220529,NC_045512.2,22599,G,A,PASS,10,0,10,1.0,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,Unassigned +220529,NC_045512.2,22679,T,C,PASS,11,0,11,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,Unassigned +220529,NC_045512.2,22686,C,T,PASS,11,0,11,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,Unassigned +220529,NC_045512.2,23403,A,G,PASS,2006,2,2004,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220529,NC_045512.2,23525,C,T,PASS,1968,2,1964,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220529,NC_045512.2,23599,T,G,PASS,712,0,712,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220529,NC_045512.2,23604,C,A,PASS,685,0,679,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220529,NC_045512.2,23854,C,A,PASS,25,0,25,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220529,NC_045512.2,23948,G,T,PASS,253,0,253,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220529,NC_045512.2,24130,C,A,PASS,342,0,338,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220529,NC_045512.2,24424,A,T,PASS,97,0,97,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220529,NC_045512.2,24469,T,A,PASS,548,1,547,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220529,NC_045512.2,24503,C,T,PASS,682,21,661,0.97,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220529,NC_045512.2,25000,C,T,PASS,60,0,60,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220529,NC_045512.2,25584,C,T,PASS,38,1,37,0.97,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220529,NC_045512.2,26270,C,T,PASS,138,0,138,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220529,NC_045512.2,26530,A,G,PASS,69,2,67,0.97,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220529,NC_045512.2,26577,C,G,PASS,93,0,93,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220529,NC_045512.2,26709,G,A,PASS,90,2,88,0.98,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220529,NC_045512.2,27259,A,C,PASS,80,0,80,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220529,NC_045512.2,27807,C,T,PASS,52,0,52,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220529,NC_045512.2,28271,A,T,PASS,775,0,775,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220529,NC_045512.2,28311,C,T,PASS,809,5,802,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220529,NC_045512.2,2832,A,G,PASS,42,0,42,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220529,NC_045512.2,28361,GGAGAACGCA,G,PASS,659,657,484,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220529,NC_045512.2,28881,GG,AA,PASS,2822,28,2789,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220529,NC_045512.2,28883,G,C,PASS,2814,0,2809,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220529,NC_045512.2,3037,C,T,PASS,22,0,22,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220529,NC_045512.2,8393,G,A,PASS,2312,8,2302,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220530,NC_045512.2,10029,C,T,PASS,11,0,11,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220530,NC_045512.2,10449,C,A,PASS,13,0,13,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220530,NC_045512.2,11282,AGTTTGTCTG,A,PASS,34,34,28,0.82,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220530,NC_045512.2,11537,A,G,PASS,46,0,46,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220530,NC_045512.2,13195,T,C,PASS,1638,0,1638,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220530,NC_045512.2,14408,C,T,PASS,16,0,16,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220530,NC_045512.2,16064,A,G,PASS,11,0,11,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220530,NC_045512.2,16308,C,T,PASS,44,0,44,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,Unassigned +220530,NC_045512.2,21762,C,T,PASS,13,0,13,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220530,NC_045512.2,21764,ATACATG,A,PASS,13,13,13,1.0,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220530,NC_045512.2,21846,C,T,PASS,19,0,19,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220530,NC_045512.2,23403,A,G,PASS,470,2,468,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220530,NC_045512.2,23525,C,T,PASS,495,2,493,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220530,NC_045512.2,23599,T,G,PASS,216,0,216,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220530,NC_045512.2,23604,C,A,PASS,215,0,213,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220530,NC_045512.2,23854,C,A,PASS,12,0,12,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220530,NC_045512.2,23948,G,T,PASS,31,0,31,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220530,NC_045512.2,24130,C,A,PASS,31,0,31,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220530,NC_045512.2,24424,A,T,PASS,39,0,37,0.95,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220530,NC_045512.2,24469,T,A,PASS,158,0,158,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220530,NC_045512.2,24503,C,T,PASS,191,2,189,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220530,NC_045512.2,25000,C,T,PASS,16,0,16,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220530,NC_045512.2,25584,C,T,PASS,26,0,26,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220530,NC_045512.2,26270,C,T,PASS,161,0,161,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220530,NC_045512.2,26530,A,G,PASS,46,0,46,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220530,NC_045512.2,26577,C,G,PASS,80,0,80,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220530,NC_045512.2,26709,G,A,PASS,61,0,61,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220530,NC_045512.2,27259,A,C,PASS,64,0,64,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220530,NC_045512.2,27406,C,T,PASS,65,0,65,1.0,ORF7a,missense_variant,c.13C>T,p.Leu5Phe,p.L5F,ivar,Unassigned +220530,NC_045512.2,27784,A,T,ft,16,11,5,0.31,ORF7b,missense_variant,c.29A>T,p.Tyr10Phe,p.Y10F,ivar,Unassigned +220530,NC_045512.2,27790,G,C,ft,13,8,5,0.38,ORF7b,missense_variant,c.35G>C,p.Cys12Ser,p.C12S,ivar,Unassigned +220530,NC_045512.2,27807,C,T,PASS,12,0,12,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220530,NC_045512.2,28271,A,T,PASS,189,0,188,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220530,NC_045512.2,28311,C,T,PASS,206,0,206,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220530,NC_045512.2,2832,A,G,PASS,78,0,78,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220530,NC_045512.2,28361,GGAGAACGCA,G,PASS,171,171,134,0.78,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220530,NC_045512.2,28403,G,A,PASS,112,78,34,0.3,N,missense_variant,c.130G>A,p.Gly44Ser,p.G44S,ivar,Unassigned +220530,NC_045512.2,2857,C,T,PASS,48,36,12,0.25,orf1ab,synonymous_variant,c.2592C>T,p.Leu864Leu,p.L864L,ivar,Unassigned +220530,NC_045512.2,28881,GG,AA,PASS,3635,36,3594,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220530,NC_045512.2,28883,G,C,PASS,3618,3,3608,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220530,NC_045512.2,4964,A,G,ft,10,6,4,0.4,orf1ab,missense_variant,c.4699A>G,p.Thr1567Ala,p.T1567A,ivar,Unassigned +220530,NC_045512.2,8393,G,A,PASS,1049,2,1046,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220531,NC_045512.2,10029,C,T,PASS,65,0,65,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.18 +220531,NC_045512.2,10449,C,A,PASS,294,0,293,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.18 +220531,NC_045512.2,11282,AGTTTGTCTG,A,PASS,608,606,530,0.87,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.18 +220531,NC_045512.2,11537,A,G,PASS,587,0,587,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.18 +220531,NC_045512.2,13195,T,C,PASS,5269,16,5253,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.18 +220531,NC_045512.2,14408,C,T,PASS,234,1,231,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.18 +220531,NC_045512.2,15336,T,C,ft,10,5,5,0.5,orf1ab,missense_variant,c.15071T>C,p.Leu5024Ser,p.L5024S,ivar,BA.1.18 +220531,NC_045512.2,15359,G,A,PASS,10,4,6,0.6,orf1ab,missense_variant,c.15094G>A,p.Ala5032Thr,p.A5032T,ivar,BA.1.18 +220531,NC_045512.2,18163,A,G,PASS,46,0,46,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.18 +220531,NC_045512.2,2172,A,G,PASS,3340,15,3325,1.0,orf1ab,missense_variant,c.1907A>G,p.Lys636Arg,p.K636R,ivar,BA.1.18 +220531,NC_045512.2,21762,C,T,PASS,443,0,443,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.18 +220531,NC_045512.2,21764,ATACATG,A,PASS,449,444,403,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.18 +220531,NC_045512.2,21846,C,T,PASS,384,3,381,0.99,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.18 +220531,NC_045512.2,22578,G,A,PASS,57,0,57,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.18 +220531,NC_045512.2,22673,TC,CT,PASS,39,0,39,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.18 +220531,NC_045512.2,22679,T,C,PASS,46,0,46,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.18 +220531,NC_045512.2,22686,C,T,PASS,45,0,45,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.18 +220531,NC_045512.2,22882,T,G,PASS,13,0,13,1.0,S,missense_variant,c.1320T>G,p.Asn440Lys,p.N440K,ivar,BA.1.18 +220531,NC_045512.2,22898,G,A,PASS,13,0,13,1.0,S,missense_variant,c.1336G>A,p.Gly446Ser,p.G446S,ivar,BA.1.18 +220531,NC_045512.2,22992,G,A,PASS,11,0,11,1.0,S,missense_variant,c.1430G>A,p.Ser477Asn,p.S477N,ivar,BA.1.18 +220531,NC_045512.2,22995,C,A,PASS,11,0,11,1.0,S,missense_variant,c.1433C>A,p.Thr478Lys,p.T478K,ivar,BA.1.18 +220531,NC_045512.2,23013,A,C,PASS,11,0,11,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.18 +220531,NC_045512.2,23040,A,G,PASS,15,0,15,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.18 +220531,NC_045512.2,23048,G,A,PASS,18,0,18,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.18 +220531,NC_045512.2,23055,A,G,PASS,18,0,18,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.18 +220531,NC_045512.2,23063,A,T,PASS,17,0,17,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.18 +220531,NC_045512.2,23075,T,C,PASS,20,0,20,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.18 +220531,NC_045512.2,23202,C,A,PASS,48,0,48,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.18 +220531,NC_045512.2,23403,A,G,PASS,3684,4,3680,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.18 +220531,NC_045512.2,23525,C,T,PASS,3562,10,3542,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.18 +220531,NC_045512.2,23599,T,G,PASS,1310,0,1308,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.18 +220531,NC_045512.2,23604,C,A,PASS,1248,0,1245,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.18 +220531,NC_045512.2,23854,C,A,PASS,153,0,153,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.18 +220531,NC_045512.2,23948,G,T,PASS,1231,0,1229,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.18 +220531,NC_045512.2,24130,C,A,PASS,1843,2,1824,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.18 +220531,NC_045512.2,24424,A,T,PASS,589,0,589,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.18 +220531,NC_045512.2,24469,T,A,PASS,1156,4,1150,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.18 +220531,NC_045512.2,24503,C,T,PASS,1344,14,1330,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.18 +220531,NC_045512.2,25000,C,T,PASS,165,1,164,0.99,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.18 +220531,NC_045512.2,25584,C,T,PASS,263,0,263,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.18 +220531,NC_045512.2,25855,G,T,PASS,263,0,262,1.0,ORF3a,missense_variant,c.463G>T,p.Asp155Tyr,p.D155Y,ivar,BA.1.18 +220531,NC_045512.2,25931,CT,AC,PASS,276,1,275,1.0,ORF3a,missense_variant,c.539_540delCTinsAC,p.Ser180Tyr,p.S180Y,ivar,BA.1.18 +220531,NC_045512.2,26270,C,T,PASS,761,2,759,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.18 +220531,NC_045512.2,26530,A,G,PASS,192,0,192,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.18 +220531,NC_045512.2,26577,C,G,PASS,211,0,211,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.18 +220531,NC_045512.2,26709,G,A,PASS,190,1,189,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.18 +220531,NC_045512.2,27259,A,C,PASS,631,0,624,0.99,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.18 +220531,NC_045512.2,27807,C,T,PASS,247,2,245,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.18 +220531,NC_045512.2,27933,G,A,PASS,448,3,442,0.99,ORF8,missense_variant,c.40G>A,p.Ala14Thr,p.A14T,ivar,BA.1.18 +220531,NC_045512.2,28271,A,T,PASS,1392,0,1390,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.18 +220531,NC_045512.2,28311,C,T,PASS,1393,1,1382,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.18 +220531,NC_045512.2,2832,A,G,PASS,216,0,216,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.18 +220531,NC_045512.2,28361,GGAGAACGCA,G,PASS,1099,1092,795,0.72,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.18 +220531,NC_045512.2,28881,GG,AA,PASS,2048,32,2014,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.18 +220531,NC_045512.2,28883,G,C,PASS,2025,2,2020,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.18 +220531,NC_045512.2,3037,C,T,PASS,58,0,58,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.18 +220531,NC_045512.2,5386,T,G,PASS,34,0,34,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.18 +220531,NC_045512.2,5730,C,T,PASS,289,0,289,1.0,orf1ab,missense_variant,c.5465C>T,p.Thr1822Ile,p.T1822I,ivar,BA.1.18 +220531,NC_045512.2,8393,G,A,PASS,3368,17,3350,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.18 +220532,NC_045512.2,10449,C,A,PASS,26,0,26,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220532,NC_045512.2,11282,AGTTTGTCTG,A,PASS,38,38,33,0.87,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220532,NC_045512.2,11537,A,G,PASS,58,0,58,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220532,NC_045512.2,13195,T,C,PASS,3057,4,3053,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220532,NC_045512.2,14090,A,T,ft,13,9,4,0.31,orf1ab,missense_variant,c.13825A>T,p.Met4609Leu,p.M4609L,ivar,Unassigned +220532,NC_045512.2,14408,C,T,PASS,33,0,32,0.97,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220532,NC_045512.2,15933,C,T,PASS,69,4,65,0.94,orf1ab,missense_variant,c.15668C>T,p.Thr5223Ile,p.T5223I,ivar,Unassigned +220532,NC_045512.2,21762,C,T,PASS,51,0,51,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220532,NC_045512.2,21764,ATACATG,A,PASS,51,51,46,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220532,NC_045512.2,21846,C,T,PASS,72,0,72,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220532,NC_045512.2,23403,A,G,PASS,753,1,752,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220532,NC_045512.2,23525,C,T,PASS,730,0,730,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220532,NC_045512.2,23599,T,G,PASS,267,0,267,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220532,NC_045512.2,23604,C,A,PASS,263,0,261,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220532,NC_045512.2,23854,C,A,PASS,18,0,18,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220532,NC_045512.2,23948,G,T,PASS,81,0,81,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220532,NC_045512.2,24130,C,A,PASS,131,0,131,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220532,NC_045512.2,24424,A,T,PASS,26,0,26,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220532,NC_045512.2,24469,T,A,PASS,242,0,242,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220532,NC_045512.2,24503,C,T,PASS,289,2,287,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220532,NC_045512.2,25000,C,T,PASS,13,0,13,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220532,NC_045512.2,25584,C,T,PASS,13,0,13,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220532,NC_045512.2,26270,C,T,PASS,33,0,33,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220532,NC_045512.2,26530,A,G,PASS,31,0,31,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220532,NC_045512.2,26577,C,G,PASS,31,0,31,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220532,NC_045512.2,26654,G,GT,PASS,28,28,9,0.32,M,frameshift_variant,c.137dupT,p.Leu46fs,p.L46fs,ivar,Unassigned +220532,NC_045512.2,26709,G,A,PASS,33,0,33,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220532,NC_045512.2,27259,A,C,PASS,40,0,40,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220532,NC_045512.2,27807,C,T,PASS,22,3,19,0.86,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220532,NC_045512.2,28271,A,T,PASS,365,0,365,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220532,NC_045512.2,28311,C,T,PASS,335,0,335,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220532,NC_045512.2,2832,A,G,PASS,31,0,31,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220532,NC_045512.2,28361,GGAGAACGCA,G,PASS,279,279,202,0.72,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220532,NC_045512.2,28512,C,T,PASS,110,79,29,0.26,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,Unassigned +220532,NC_045512.2,28881,GG,AA,PASS,2846,42,2800,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220532,NC_045512.2,28883,G,C,PASS,2820,3,2814,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220532,NC_045512.2,4097,T,TC,PASS,25,25,8,0.32,orf1ab,frameshift_variant,c.3832_3833insC,p.Phe1278fs,p.F1278fs,ivar,Unassigned +220532,NC_045512.2,4101,T,C,PASS,25,17,8,0.32,orf1ab,missense_variant,c.3836T>C,p.Leu1279Ser,p.L1279S,ivar,Unassigned +220532,NC_045512.2,5672,C,T,PASS,11,0,11,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220532,NC_045512.2,8393,G,A,PASS,1208,5,1203,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220533,NC_045512.2,13195,T,C,PASS,35,0,35,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220533,NC_045512.2,23403,A,G,PASS,19,0,19,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220533,NC_045512.2,23525,C,T,PASS,15,0,15,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220533,NC_045512.2,28881,GG,AA,PASS,335,4,331,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220533,NC_045512.2,28883,G,C,PASS,331,0,331,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220533,NC_045512.2,8393,G,A,PASS,102,0,102,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220534,NC_045512.2,10029,C,T,PASS,80,1,79,0.99,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220534,NC_045512.2,10449,C,A,PASS,302,0,300,0.99,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220534,NC_045512.2,11282,AGTTTGTCTG,A,PASS,1212,1193,1059,0.87,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220534,NC_045512.2,11537,A,G,PASS,576,0,576,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220534,NC_045512.2,13195,T,C,PASS,6107,17,6090,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220534,NC_045512.2,14408,C,T,PASS,315,0,315,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220534,NC_045512.2,18163,A,G,PASS,58,0,58,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220534,NC_045512.2,19374,C,T,PASS,426,3,423,0.99,orf1ab,missense_variant,c.19109C>T,p.Ser6370Phe,p.S6370F,ivar,BA.1.17 +220534,NC_045512.2,20743,A,C,PASS,3288,0,3285,1.0,orf1ab,missense_variant,c.20478A>C,p.Lys6826Asn,p.K6826N,ivar,BA.1.17 +220534,NC_045512.2,21762,C,T,PASS,429,4,425,0.99,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220534,NC_045512.2,21764,ATACATG,A,PASS,434,429,398,0.92,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220534,NC_045512.2,21846,C,T,PASS,452,2,450,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220534,NC_045512.2,22193,AATT,A,PASS,19,19,16,0.84,S,disruptive_inframe_deletion,c.632_634delATT,p.Asn211_Leu212delinsIle,p.N211_L212delinsI,ivar,BA.1.17 +220534,NC_045512.2,22204,T,TGAGCCAGAA,PASS,16,16,13,0.81,S,disruptive_inframe_insertion,c.644_645insGCCAGAAGA,p.Arg214_Asp215insGluProGlu,p.R214_D215insEPE,ivar,BA.1.17 +220534,NC_045512.2,22578,G,A,PASS,87,0,87,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220534,NC_045512.2,22673,TC,CT,PASS,68,0,68,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220534,NC_045512.2,22679,T,C,PASS,95,0,95,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220534,NC_045512.2,22686,C,T,PASS,94,0,94,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220534,NC_045512.2,22882,T,G,PASS,13,0,13,1.0,S,missense_variant,c.1320T>G,p.Asn440Lys,p.N440K,ivar,BA.1.17 +220534,NC_045512.2,22898,G,A,PASS,11,0,11,1.0,S,missense_variant,c.1336G>A,p.Gly446Ser,p.G446S,ivar,BA.1.17 +220534,NC_045512.2,22992,G,A,PASS,20,0,20,1.0,S,missense_variant,c.1430G>A,p.Ser477Asn,p.S477N,ivar,BA.1.17 +220534,NC_045512.2,22995,C,A,PASS,20,0,20,1.0,S,missense_variant,c.1433C>A,p.Thr478Lys,p.T478K,ivar,BA.1.17 +220534,NC_045512.2,23013,A,C,PASS,24,0,24,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.17 +220534,NC_045512.2,23040,A,G,PASS,33,0,33,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.17 +220534,NC_045512.2,23048,G,A,PASS,33,1,32,0.97,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.17 +220534,NC_045512.2,23055,A,G,PASS,35,0,35,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.17 +220534,NC_045512.2,23063,A,T,PASS,38,0,38,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.17 +220534,NC_045512.2,23075,T,C,PASS,42,0,42,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.17 +220534,NC_045512.2,23202,C,A,PASS,77,0,77,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220534,NC_045512.2,23403,A,G,PASS,3887,4,3880,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220534,NC_045512.2,23525,C,T,PASS,3730,6,3714,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220534,NC_045512.2,23599,T,G,PASS,1409,0,1404,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220534,NC_045512.2,23604,C,A,PASS,1356,0,1350,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220534,NC_045512.2,23854,C,A,PASS,252,1,250,0.99,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220534,NC_045512.2,23948,G,T,PASS,1209,0,1206,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220534,NC_045512.2,24130,C,A,PASS,1902,2,1884,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220534,NC_045512.2,24424,A,T,PASS,860,1,856,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220534,NC_045512.2,24469,T,A,PASS,1573,3,1565,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220534,NC_045512.2,24503,C,T,PASS,1795,32,1760,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220534,NC_045512.2,25000,C,T,PASS,198,0,198,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220534,NC_045512.2,25584,C,T,PASS,452,9,441,0.98,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220534,NC_045512.2,26270,C,T,PASS,1216,6,1208,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220534,NC_045512.2,26530,A,G,PASS,353,0,353,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220534,NC_045512.2,26577,C,G,PASS,356,0,355,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220534,NC_045512.2,26709,G,A,PASS,372,2,369,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220534,NC_045512.2,27259,A,C,PASS,803,0,799,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220534,NC_045512.2,27807,C,T,PASS,288,0,288,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220534,NC_045512.2,28271,A,T,PASS,1664,5,1652,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220534,NC_045512.2,28311,C,T,PASS,1664,11,1651,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220534,NC_045512.2,2832,A,G,PASS,326,0,326,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220534,NC_045512.2,28361,GGAGAACGCA,G,PASS,1461,1458,1071,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220534,NC_045512.2,28881,GG,AA,PASS,2169,23,2143,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220534,NC_045512.2,28883,G,C,PASS,2150,0,2147,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220534,NC_045512.2,3037,C,T,PASS,104,0,104,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220534,NC_045512.2,5386,T,G,PASS,33,0,33,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.17 +220534,NC_045512.2,5672,C,T,PASS,610,2,608,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220534,NC_045512.2,5924,G,A,PASS,241,0,241,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220534,NC_045512.2,8393,G,A,PASS,3994,13,3975,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220534,NC_045512.2,8715,C,T,PASS,28,20,8,0.29,orf1ab,missense_variant,c.8450C>T,p.Thr2817Ile,p.T2817I,ivar,BA.1.17 +220535,NC_045512.2,10449,C,A,PASS,23,0,23,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220535,NC_045512.2,11282,AGTTTGTCTG,A,PASS,76,76,65,0.86,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220535,NC_045512.2,11537,A,G,PASS,61,0,61,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220535,NC_045512.2,13195,T,C,PASS,2278,1,2277,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220535,NC_045512.2,14408,C,T,PASS,40,0,40,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220535,NC_045512.2,14646,T,A,PASS,41,27,12,0.29,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +220535,NC_045512.2,16064,A,G,PASS,42,0,42,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220535,NC_045512.2,16308,C,T,PASS,133,0,133,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,Unassigned +220535,NC_045512.2,21762,C,T,PASS,42,0,42,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220535,NC_045512.2,21764,ATACATG,A,PASS,43,42,36,0.84,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220535,NC_045512.2,21846,C,T,PASS,55,0,55,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220535,NC_045512.2,23403,A,G,PASS,1687,0,1687,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220535,NC_045512.2,23525,C,T,PASS,1723,4,1719,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220535,NC_045512.2,23599,T,G,PASS,665,0,665,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220535,NC_045512.2,23604,C,A,PASS,648,0,645,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220535,NC_045512.2,23948,G,T,PASS,59,0,59,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220535,NC_045512.2,24130,C,A,PASS,93,0,93,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220535,NC_045512.2,24424,A,T,PASS,39,0,39,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220535,NC_045512.2,24469,T,A,PASS,245,1,244,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220535,NC_045512.2,24503,C,T,PASS,281,1,280,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220535,NC_045512.2,25000,C,T,PASS,19,0,19,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220535,NC_045512.2,25584,C,T,PASS,27,0,27,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220535,NC_045512.2,26270,C,T,PASS,194,0,194,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220535,NC_045512.2,26530,A,G,PASS,114,0,114,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220535,NC_045512.2,26577,C,G,PASS,120,1,119,0.99,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220535,NC_045512.2,26709,G,A,PASS,93,0,93,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220535,NC_045512.2,27259,A,C,PASS,78,0,78,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220535,NC_045512.2,27807,C,T,PASS,42,0,42,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220535,NC_045512.2,28271,A,T,PASS,684,0,684,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220535,NC_045512.2,28311,C,T,PASS,667,16,651,0.98,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220535,NC_045512.2,2832,A,G,PASS,25,0,25,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220535,NC_045512.2,28361,GGAGAACGCA,G,PASS,498,496,337,0.68,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220535,NC_045512.2,28512,C,T,PASS,234,171,62,0.26,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,Unassigned +220535,NC_045512.2,28881,GG,AA,PASS,6177,65,6104,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220535,NC_045512.2,28883,G,C,PASS,6141,2,6131,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220535,NC_045512.2,3037,C,T,PASS,11,0,11,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220535,NC_045512.2,4273,T,A,PASS,31,13,18,0.58,orf1ab,synonymous_variant,c.4008T>A,p.Gly1336Gly,p.G1336G,ivar,Unassigned +220535,NC_045512.2,76,T,A,ft,11,7,4,0.36,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220535,NC_045512.2,78,T,G,ft,11,7,4,0.36,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220535,NC_045512.2,8393,G,A,PASS,2646,16,2630,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220535,NC_045512.2,9572,G,A,PASS,1287,8,1277,0.99,orf1ab,missense_variant,c.9307G>A,p.Gly3103Ser,p.G3103S,ivar,Unassigned +220536,NC_045512.2,10449,C,A,PASS,12,0,12,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220536,NC_045512.2,11282,AGTTTGTCTG,A,PASS,36,35,32,0.89,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220536,NC_045512.2,11537,A,G,PASS,25,0,25,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220536,NC_045512.2,13195,T,C,PASS,1638,4,1634,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220536,NC_045512.2,14408,C,T,PASS,33,0,33,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220536,NC_045512.2,14646,T,A,PASS,43,27,16,0.37,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +220536,NC_045512.2,21595,C,T,PASS,167,0,167,1.0,S,synonymous_variant,c.33C>T,p.Val11Val,p.V11V,ivar,Unassigned +220536,NC_045512.2,21762,C,T,PASS,31,2,29,0.94,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220536,NC_045512.2,21764,ATACATG,A,PASS,32,32,32,1.0,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220536,NC_045512.2,21846,C,T,PASS,34,0,34,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220536,NC_045512.2,21859,C,T,PASS,31,0,31,1.0,S,synonymous_variant,c.297C>T,p.Asn99Asn,p.N99N,ivar,Unassigned +220536,NC_045512.2,23403,A,G,PASS,1494,4,1490,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220536,NC_045512.2,23525,C,T,PASS,1441,16,1425,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220536,NC_045512.2,23599,T,G,PASS,586,0,586,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220536,NC_045512.2,23604,C,A,PASS,571,0,571,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220536,NC_045512.2,23948,G,T,PASS,38,0,38,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220536,NC_045512.2,24130,C,A,PASS,51,0,51,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220536,NC_045512.2,24424,A,T,PASS,23,0,23,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220536,NC_045512.2,24469,T,A,PASS,203,0,203,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220536,NC_045512.2,24503,C,T,PASS,240,0,240,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220536,NC_045512.2,25000,C,T,PASS,17,0,17,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220536,NC_045512.2,25577,T,C,PASS,14,0,14,1.0,ORF3a,missense_variant,c.185T>C,p.Ile62Thr,p.I62T,ivar,Unassigned +220536,NC_045512.2,26270,C,T,PASS,99,0,99,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220536,NC_045512.2,26530,A,G,PASS,36,0,36,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220536,NC_045512.2,26577,C,G,PASS,40,0,40,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220536,NC_045512.2,26709,G,A,PASS,42,0,42,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220536,NC_045512.2,27259,A,C,PASS,29,0,29,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220536,NC_045512.2,27807,C,T,PASS,12,0,12,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220536,NC_045512.2,28271,A,T,PASS,310,2,308,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220536,NC_045512.2,28311,C,T,PASS,327,2,325,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220536,NC_045512.2,2832,A,G,PASS,15,0,15,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220536,NC_045512.2,28361,GGAGAACGCA,G,PASS,223,222,162,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220536,NC_045512.2,28881,GG,AA,PASS,5160,55,5100,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220536,NC_045512.2,28883,G,C,PASS,5134,3,5127,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220536,NC_045512.2,3037,C,T,PASS,14,0,14,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220536,NC_045512.2,8393,G,A,PASS,2570,9,2558,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220537,NC_045512.2,10029,C,T,PASS,30,0,30,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220537,NC_045512.2,10449,C,A,PASS,81,0,79,0.98,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220537,NC_045512.2,11282,AGTTTGTCTG,A,PASS,360,355,322,0.89,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220537,NC_045512.2,11537,A,G,PASS,303,0,303,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220537,NC_045512.2,13195,T,C,PASS,5220,4,5213,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220537,NC_045512.2,14408,C,T,PASS,123,0,123,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220537,NC_045512.2,18163,A,G,PASS,13,0,13,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220537,NC_045512.2,21762,C,T,PASS,202,1,201,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220537,NC_045512.2,21764,ATACATG,A,PASS,207,202,188,0.91,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220537,NC_045512.2,21846,C,T,PASS,220,0,220,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220537,NC_045512.2,22478,T,C,ft,15,11,4,0.27,S,missense_variant,c.916T>C,p.Phe306Leu,p.F306L,ivar,BA.1.17 +220537,NC_045512.2,22578,G,A,PASS,22,0,22,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220537,NC_045512.2,22673,TC,CT,PASS,18,0,18,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220537,NC_045512.2,22679,T,C,PASS,22,0,22,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220537,NC_045512.2,22686,C,T,PASS,22,0,22,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220537,NC_045512.2,23048,G,A,PASS,10,0,10,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.17 +220537,NC_045512.2,23055,A,G,PASS,10,0,10,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.17 +220537,NC_045512.2,23063,A,T,PASS,11,0,11,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.17 +220537,NC_045512.2,23075,T,C,PASS,12,0,12,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.17 +220537,NC_045512.2,23202,C,A,PASS,47,0,47,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220537,NC_045512.2,23403,A,G,PASS,3085,4,3081,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220537,NC_045512.2,23525,C,T,PASS,2906,14,2884,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220537,NC_045512.2,23599,T,G,PASS,1118,0,1116,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220537,NC_045512.2,23604,C,A,PASS,1080,0,1077,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220537,NC_045512.2,23854,C,A,PASS,74,0,74,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220537,NC_045512.2,23948,G,T,PASS,689,2,686,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220537,NC_045512.2,24130,C,A,PASS,1007,0,997,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220537,NC_045512.2,24424,A,T,PASS,270,2,268,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220537,NC_045512.2,24469,T,A,PASS,895,0,888,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220537,NC_045512.2,24503,C,T,PASS,1054,21,1033,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220537,NC_045512.2,25000,C,T,PASS,93,0,93,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220537,NC_045512.2,25584,C,T,PASS,117,0,117,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220537,NC_045512.2,26270,C,T,PASS,578,0,576,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220537,NC_045512.2,26530,A,G,PASS,226,0,226,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220537,NC_045512.2,26577,C,G,PASS,274,0,274,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220537,NC_045512.2,26709,G,A,PASS,253,0,253,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220537,NC_045512.2,27259,A,C,PASS,315,0,315,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220537,NC_045512.2,27384,T,C,PASS,451,0,451,1.0,ORF6,synonymous_variant,c.183T>C,p.Asp61Asp,p.D61D,ivar,BA.1.17 +220537,NC_045512.2,27807,C,T,PASS,91,0,91,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220537,NC_045512.2,28271,A,T,PASS,918,2,913,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220537,NC_045512.2,28311,C,T,PASS,933,0,929,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220537,NC_045512.2,2832,A,G,PASS,97,0,97,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220537,NC_045512.2,28361,GGAGAACGCA,G,PASS,774,770,558,0.72,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220537,NC_045512.2,28881,GG,AA,PASS,3048,38,3005,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220537,NC_045512.2,28883,G,C,PASS,3032,4,3020,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220537,NC_045512.2,3037,C,T,PASS,49,0,49,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220537,NC_045512.2,5386,T,G,PASS,15,0,15,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.17 +220537,NC_045512.2,5672,C,T,PASS,149,0,149,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220537,NC_045512.2,5924,G,A,PASS,77,0,77,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220537,NC_045512.2,635,C,T,PASS,9649,48,9569,0.99,orf1ab,missense_variant,c.370C>T,p.Arg124Cys,p.R124C,ivar,BA.1.17 +220537,NC_045512.2,7984,T,C,PASS,1058,2,1056,1.0,orf1ab,synonymous_variant,c.7719T>C,p.Asp2573Asp,p.D2573D,ivar,BA.1.17 +220537,NC_045512.2,8393,G,A,PASS,2866,7,2859,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220538,NC_045512.2,10029,C,T,PASS,23,0,23,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.18 +220538,NC_045512.2,10449,C,A,PASS,82,0,82,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.18 +220538,NC_045512.2,11282,AGTTTGTCTG,A,PASS,493,491,443,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.18 +220538,NC_045512.2,11537,A,G,PASS,252,0,252,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.18 +220538,NC_045512.2,13195,T,C,PASS,4749,6,4741,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.18 +220538,NC_045512.2,14408,C,T,PASS,81,0,79,0.98,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.18 +220538,NC_045512.2,18163,A,G,PASS,28,0,28,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.18 +220538,NC_045512.2,2172,A,G,PASS,1213,5,1207,1.0,orf1ab,missense_variant,c.1907A>G,p.Lys636Arg,p.K636R,ivar,BA.1.18 +220538,NC_045512.2,21762,C,T,PASS,158,0,158,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.18 +220538,NC_045512.2,21764,ATACATG,A,PASS,165,160,145,0.88,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.18 +220538,NC_045512.2,21846,C,T,PASS,170,0,170,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.18 +220538,NC_045512.2,22578,G,A,PASS,33,0,33,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.18 +220538,NC_045512.2,22673,TC,CT,PASS,19,1,18,0.95,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.18 +220538,NC_045512.2,22679,T,C,PASS,25,0,25,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.18 +220538,NC_045512.2,22686,C,T,PASS,25,0,25,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.18 +220538,NC_045512.2,23040,A,G,PASS,12,0,12,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.18 +220538,NC_045512.2,23048,G,A,PASS,13,0,13,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.18 +220538,NC_045512.2,23055,A,G,PASS,13,0,13,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.18 +220538,NC_045512.2,23063,A,T,PASS,13,0,13,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.18 +220538,NC_045512.2,23075,T,C,PASS,10,0,10,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.18 +220538,NC_045512.2,23202,C,A,PASS,22,0,22,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.18 +220538,NC_045512.2,23403,A,G,PASS,3286,10,3275,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.18 +220538,NC_045512.2,23525,C,T,PASS,3216,2,3208,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.18 +220538,NC_045512.2,23599,T,G,PASS,1320,1,1317,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.18 +220538,NC_045512.2,23604,C,A,PASS,1275,0,1266,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.18 +220538,NC_045512.2,23854,C,A,PASS,51,0,50,0.98,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.18 +220538,NC_045512.2,23948,G,T,PASS,254,0,253,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.18 +220538,NC_045512.2,24130,C,A,PASS,419,0,417,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.18 +220538,NC_045512.2,24424,A,T,PASS,173,0,171,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.18 +220538,NC_045512.2,24469,T,A,PASS,675,2,669,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.18 +220538,NC_045512.2,24503,C,T,PASS,821,13,808,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.18 +220538,NC_045512.2,25000,C,T,PASS,104,0,104,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.18 +220538,NC_045512.2,25584,C,T,PASS,103,0,103,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.18 +220538,NC_045512.2,25855,G,T,PASS,79,0,79,1.0,ORF3a,missense_variant,c.463G>T,p.Asp155Tyr,p.D155Y,ivar,BA.1.18 +220538,NC_045512.2,25931,CT,AC,PASS,88,0,88,1.0,ORF3a,missense_variant,c.539_540delCTinsAC,p.Ser180Tyr,p.S180Y,ivar,BA.1.18 +220538,NC_045512.2,26270,C,T,PASS,315,0,315,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.18 +220538,NC_045512.2,26530,A,G,PASS,141,0,141,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.18 +220538,NC_045512.2,26577,C,G,PASS,152,0,152,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.18 +220538,NC_045512.2,26709,G,A,PASS,160,0,160,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.18 +220538,NC_045512.2,27259,A,C,PASS,327,0,327,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.18 +220538,NC_045512.2,27519,C,T,PASS,303,0,303,1.0,ORF7a,synonymous_variant,c.126C>T,p.Gly42Gly,p.G42G,ivar,BA.1.18 +220538,NC_045512.2,27807,C,T,PASS,118,0,118,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.18 +220538,NC_045512.2,28271,A,T,PASS,985,4,981,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.18 +220538,NC_045512.2,28311,C,T,PASS,995,6,987,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.18 +220538,NC_045512.2,2832,A,G,PASS,59,0,59,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.18 +220538,NC_045512.2,28361,GGAGAACGCA,G,PASS,847,843,615,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.18 +220538,NC_045512.2,28881,GG,AA,PASS,2536,35,2498,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.18 +220538,NC_045512.2,28883,G,C,PASS,2511,2,2506,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.18 +220538,NC_045512.2,3037,C,T,PASS,42,0,42,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.18 +220538,NC_045512.2,5386,T,G,PASS,11,0,9,0.82,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.18 +220538,NC_045512.2,5730,C,T,PASS,90,1,89,0.99,orf1ab,missense_variant,c.5465C>T,p.Thr1822Ile,p.T1822I,ivar,BA.1.18 +220538,NC_045512.2,8393,G,A,PASS,3456,17,3437,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.18 +220539,NC_045512.2,10449,C,A,PASS,32,0,32,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220539,NC_045512.2,11282,AGTTTGTCTG,A,PASS,101,100,83,0.82,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220539,NC_045512.2,11455,C,T,PASS,127,0,127,1.0,orf1ab,synonymous_variant,c.11190C>T,p.Ala3730Ala,p.A3730A,ivar,Unassigned +220539,NC_045512.2,11537,A,G,PASS,82,0,82,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220539,NC_045512.2,13195,T,C,PASS,4012,6,4006,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220539,NC_045512.2,14408,C,T,PASS,72,0,72,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220539,NC_045512.2,21762,C,T,PASS,89,0,89,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220539,NC_045512.2,21764,ATACATG,A,PASS,89,89,82,0.92,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220539,NC_045512.2,21846,C,T,PASS,73,0,73,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220539,NC_045512.2,23403,A,G,PASS,1894,4,1890,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220539,NC_045512.2,23525,C,T,PASS,1971,3,1968,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220539,NC_045512.2,23599,T,G,PASS,834,0,834,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220539,NC_045512.2,23604,C,A,PASS,809,0,808,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220539,NC_045512.2,23854,C,A,PASS,14,1,12,0.86,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220539,NC_045512.2,23948,G,T,PASS,146,0,146,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220539,NC_045512.2,23987,C,T,PASS,177,2,175,0.99,S,missense_variant,c.2425C>T,p.Pro809Ser,p.P809S,ivar,Unassigned +220539,NC_045512.2,24130,C,A,PASS,230,0,228,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220539,NC_045512.2,24424,A,T,PASS,57,0,57,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220539,NC_045512.2,24469,T,A,PASS,456,0,456,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220539,NC_045512.2,24503,C,T,PASS,539,6,532,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220539,NC_045512.2,25000,C,T,PASS,35,2,33,0.94,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220539,NC_045512.2,25584,C,T,PASS,37,0,37,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220539,NC_045512.2,26270,C,T,PASS,144,0,144,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220539,NC_045512.2,26530,A,G,PASS,35,0,35,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220539,NC_045512.2,26577,C,G,PASS,49,0,49,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220539,NC_045512.2,26709,G,A,PASS,21,0,21,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220539,NC_045512.2,27259,A,C,PASS,73,0,73,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220539,NC_045512.2,27807,C,T,PASS,37,0,37,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220539,NC_045512.2,2785,T,C,PASS,30,22,8,0.27,orf1ab,synonymous_variant,c.2520T>C,p.Asn840Asn,p.N840N,ivar,Unassigned +220539,NC_045512.2,28271,A,T,PASS,299,0,299,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220539,NC_045512.2,28311,C,T,PASS,288,2,286,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220539,NC_045512.2,2832,A,G,PASS,35,0,35,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220539,NC_045512.2,28361,GGAGAACGCA,G,PASS,204,202,147,0.72,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220539,NC_045512.2,28881,GG,AA,PASS,5835,60,5773,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220539,NC_045512.2,28883,G,C,PASS,5804,4,5791,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220539,NC_045512.2,3037,C,T,PASS,24,0,24,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220539,NC_045512.2,8393,G,A,PASS,3152,5,3147,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220540,NC_045512.2,11282,AGTTTGTCTG,A,PASS,12,12,12,1.0,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220540,NC_045512.2,13195,T,C,PASS,87,0,87,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220540,NC_045512.2,14646,T,A,PASS,12,4,8,0.67,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +220540,NC_045512.2,2010,A,G,ft,10,6,4,0.4,orf1ab,missense_variant,c.1745A>G,p.Asp582Gly,p.D582G,ivar,Unassigned +220540,NC_045512.2,23403,A,G,PASS,282,0,282,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220540,NC_045512.2,23525,C,T,PASS,293,0,293,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220540,NC_045512.2,23599,T,G,PASS,121,0,121,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220540,NC_045512.2,23604,C,A,PASS,117,0,117,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220540,NC_045512.2,24130,C,A,PASS,12,0,12,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220540,NC_045512.2,26270,C,T,PASS,40,0,40,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220540,NC_045512.2,26530,A,G,PASS,12,0,12,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220540,NC_045512.2,26577,C,G,PASS,15,0,15,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220540,NC_045512.2,28271,A,T,PASS,41,2,39,0.95,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220540,NC_045512.2,28311,C,T,PASS,45,0,45,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220540,NC_045512.2,28361,GGAGAACGCA,G,PASS,33,33,25,0.76,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220540,NC_045512.2,28881,GG,AA,PASS,3161,17,3140,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220540,NC_045512.2,28883,G,C,PASS,3153,4,3146,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220540,NC_045512.2,8393,G,A,PASS,606,0,606,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220540,NC_045512.2,9572,G,A,PASS,96,2,94,0.98,orf1ab,missense_variant,c.9307G>A,p.Gly3103Ser,p.G3103S,ivar,Unassigned +220541,NC_045512.2,10029,C,T,PASS,11,0,11,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.1.1 +220541,NC_045512.2,10449,C,A,PASS,83,0,82,0.99,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1.1 +220541,NC_045512.2,11282,AGTTTGTCTG,A,PASS,120,120,116,0.97,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1.1 +220541,NC_045512.2,11537,A,G,PASS,197,0,197,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1.1 +220541,NC_045512.2,13195,T,C,PASS,5495,3,5491,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1.1 +220541,NC_045512.2,14408,C,T,PASS,104,0,104,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1.1 +220541,NC_045512.2,16064,A,G,PASS,228,0,228,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1.1.1 +220541,NC_045512.2,16308,C,T,PASS,423,1,422,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,BA.1.1.1 +220541,NC_045512.2,18163,A,G,PASS,22,0,22,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.1.1 +220541,NC_045512.2,21762,C,T,PASS,147,0,147,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1.1 +220541,NC_045512.2,21764,ATACATG,A,PASS,147,147,141,0.96,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1.1 +220541,NC_045512.2,21846,C,T,PASS,142,0,142,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1.1 +220541,NC_045512.2,22578,G,A,PASS,12,0,12,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.1.1 +220541,NC_045512.2,22599,G,A,PASS,15,0,15,1.0,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,BA.1.1.1 +220541,NC_045512.2,22679,T,C,PASS,11,0,11,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.1.1 +220541,NC_045512.2,22686,C,T,PASS,11,0,11,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.1.1 +220541,NC_045512.2,23403,A,G,PASS,2543,1,2541,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1.1 +220541,NC_045512.2,23525,C,T,PASS,2437,14,2421,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1.1 +220541,NC_045512.2,23599,T,G,PASS,936,0,936,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1.1 +220541,NC_045512.2,23604,C,A,PASS,908,2,901,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1.1 +220541,NC_045512.2,23854,C,A,PASS,41,0,41,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1.1 +220541,NC_045512.2,23948,G,T,PASS,581,0,580,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1.1 +220541,NC_045512.2,24130,C,A,PASS,761,0,760,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1.1 +220541,NC_045512.2,24424,A,T,PASS,135,0,135,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1.1 +220541,NC_045512.2,24469,T,A,PASS,759,0,758,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1.1 +220541,NC_045512.2,24503,C,T,PASS,912,18,894,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1.1 +220541,NC_045512.2,25000,C,T,PASS,39,0,39,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1.1 +220541,NC_045512.2,25584,C,T,PASS,60,2,58,0.97,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1.1 +220541,NC_045512.2,26270,C,T,PASS,319,0,317,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1.1 +220541,NC_045512.2,26530,A,G,PASS,157,0,157,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1.1 +220541,NC_045512.2,26577,C,G,PASS,174,0,174,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1.1 +220541,NC_045512.2,26709,G,A,PASS,152,2,150,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1.1 +220541,NC_045512.2,27259,A,C,PASS,165,0,165,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1.1 +220541,NC_045512.2,27807,C,T,PASS,120,0,120,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1.1 +220541,NC_045512.2,28271,A,T,PASS,1013,1,1010,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1.1 +220541,NC_045512.2,28311,C,T,PASS,991,3,987,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1.1 +220541,NC_045512.2,2832,A,G,PASS,41,0,41,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1.1 +220541,NC_045512.2,28361,GGAGAACGCA,G,PASS,729,726,511,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1.1 +220541,NC_045512.2,28512,C,T,PASS,407,288,106,0.26,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,BA.1.1.1 +220541,NC_045512.2,28881,GG,AA,PASS,3023,30,2990,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1.1 +220541,NC_045512.2,28883,G,C,PASS,3005,0,2998,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1.1 +220541,NC_045512.2,3037,C,T,PASS,33,0,33,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1.1 +220541,NC_045512.2,5386,T,G,PASS,16,0,16,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.1.1 +220541,NC_045512.2,728,G,A,PASS,79,58,20,0.25,orf1ab,missense_variant,c.463G>A,p.Glu155Lys,p.E155K,ivar,BA.1.1.1 +220541,NC_045512.2,8393,G,A,PASS,3521,3,3515,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1.1 +220541,NC_045512.2,9572,G,A,PASS,2527,14,2509,0.99,orf1ab,missense_variant,c.9307G>A,p.Gly3103Ser,p.G3103S,ivar,BA.1.1.1 +220542,NC_045512.2,10449,C,A,PASS,23,0,23,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220542,NC_045512.2,11537,A,G,PASS,83,0,83,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220542,NC_045512.2,13195,T,C,PASS,3374,13,3359,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220542,NC_045512.2,14408,C,T,PASS,61,0,61,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220542,NC_045512.2,16064,A,G,PASS,32,0,32,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220542,NC_045512.2,16308,C,T,PASS,94,0,94,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,Unassigned +220542,NC_045512.2,21762,C,T,PASS,64,0,64,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220542,NC_045512.2,21764,ATACATG,A,PASS,65,64,64,0.98,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220542,NC_045512.2,21846,C,T,PASS,65,0,65,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220542,NC_045512.2,23403,A,G,PASS,1214,0,1214,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220542,NC_045512.2,23525,C,T,PASS,1267,3,1261,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220542,NC_045512.2,23599,T,G,PASS,536,1,534,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220542,NC_045512.2,23604,C,A,PASS,511,0,510,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220542,NC_045512.2,23948,G,T,PASS,34,0,34,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220542,NC_045512.2,24130,C,A,PASS,86,0,86,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220542,NC_045512.2,24469,T,A,PASS,299,0,297,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220542,NC_045512.2,24503,C,T,PASS,373,5,368,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220542,NC_045512.2,26530,A,G,PASS,11,0,11,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220542,NC_045512.2,26577,C,G,PASS,13,0,13,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220542,NC_045512.2,26709,G,A,PASS,17,0,17,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220542,NC_045512.2,27807,C,T,PASS,35,0,35,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220542,NC_045512.2,28271,A,T,PASS,768,0,767,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220542,NC_045512.2,28311,C,T,PASS,724,2,722,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220542,NC_045512.2,28361,GGAGAACGCA,G,PASS,451,450,317,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220542,NC_045512.2,28512,C,T,PASS,106,64,42,0.4,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,Unassigned +220542,NC_045512.2,28881,GG,AA,PASS,6562,63,6491,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220542,NC_045512.2,28883,G,C,PASS,6525,2,6516,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220542,NC_045512.2,3037,C,T,PASS,13,1,12,0.92,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220542,NC_045512.2,4276,C,T,PASS,40,0,40,1.0,orf1ab,synonymous_variant,c.4011C>T,p.Tyr1337Tyr,p.Y1337Y,ivar,Unassigned +220542,NC_045512.2,8393,G,A,PASS,1787,5,1782,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220543,NC_045512.2,10029,C,T,PASS,66,0,66,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.1.1 +220543,NC_045512.2,10449,C,A,PASS,291,0,290,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1.1 +220543,NC_045512.2,11282,AGTTTGTCTG,A,PASS,357,354,314,0.88,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1.1 +220543,NC_045512.2,11537,A,G,PASS,556,1,554,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1.1 +220543,NC_045512.2,13195,T,C,PASS,7272,15,7253,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1.1 +220543,NC_045512.2,14408,C,T,PASS,487,3,484,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1.1 +220543,NC_045512.2,15336,T,C,PASS,19,12,7,0.37,orf1ab,missense_variant,c.15071T>C,p.Leu5024Ser,p.L5024S,ivar,BA.1.1.1 +220543,NC_045512.2,15359,G,A,PASS,15,7,8,0.53,orf1ab,missense_variant,c.15094G>A,p.Ala5032Thr,p.A5032T,ivar,BA.1.1.1 +220543,NC_045512.2,16064,A,G,PASS,636,1,635,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1.1.1 +220543,NC_045512.2,16308,C,T,PASS,1330,1,1329,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,BA.1.1.1 +220543,NC_045512.2,18163,A,G,PASS,43,0,43,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.1.1 +220543,NC_045512.2,21306,C,T,PASS,138,4,134,0.97,orf1ab,missense_variant,c.21041C>T,p.Ala7014Val,p.A7014V,ivar,BA.1.1.1 +220543,NC_045512.2,21762,C,T,PASS,541,1,540,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1.1 +220543,NC_045512.2,21764,ATACATG,A,PASS,555,541,494,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1.1 +220543,NC_045512.2,21846,C,T,PASS,533,2,531,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1.1 +220543,NC_045512.2,22578,G,A,PASS,17,0,17,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.1.1 +220543,NC_045512.2,22599,G,A,PASS,15,0,15,1.0,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,BA.1.1.1 +220543,NC_045512.2,22673,TC,CT,PASS,17,0,17,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.1.1 +220543,NC_045512.2,22679,T,C,PASS,27,0,27,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.1.1 +220543,NC_045512.2,22686,C,T,PASS,26,0,26,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.1.1 +220543,NC_045512.2,23013,A,C,PASS,10,0,10,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.1.1 +220543,NC_045512.2,23040,A,G,PASS,10,0,10,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.1.1 +220543,NC_045512.2,23048,G,A,PASS,10,0,10,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.1.1 +220543,NC_045512.2,23055,A,G,PASS,10,0,10,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.1.1 +220543,NC_045512.2,23063,A,T,PASS,10,0,10,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.1.1 +220543,NC_045512.2,23075,T,C,PASS,10,0,10,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.1.1 +220543,NC_045512.2,23202,C,A,PASS,33,0,33,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1.1 +220543,NC_045512.2,23403,A,G,PASS,5345,6,5339,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1.1 +220543,NC_045512.2,23525,C,T,PASS,5055,8,5044,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1.1 +220543,NC_045512.2,23599,T,G,PASS,1780,0,1780,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1.1 +220543,NC_045512.2,23604,C,A,PASS,1726,0,1719,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1.1 +220543,NC_045512.2,23854,C,A,PASS,106,0,106,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1.1 +220543,NC_045512.2,23948,G,T,PASS,1529,0,1527,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1.1 +220543,NC_045512.2,24130,C,A,PASS,2315,0,2289,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1.1 +220543,NC_045512.2,24424,A,T,PASS,386,0,384,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1.1 +220543,NC_045512.2,24469,T,A,PASS,1576,0,1573,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1.1 +220543,NC_045512.2,24503,C,T,PASS,1890,45,1845,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1.1 +220543,NC_045512.2,25000,C,T,PASS,139,0,139,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1.1 +220543,NC_045512.2,25584,C,T,PASS,235,1,234,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1.1 +220543,NC_045512.2,26270,C,T,PASS,603,4,599,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1.1 +220543,NC_045512.2,26530,A,G,PASS,60,0,60,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1.1 +220543,NC_045512.2,26577,C,G,PASS,65,0,65,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1.1 +220543,NC_045512.2,26709,G,A,PASS,72,0,72,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1.1 +220543,NC_045512.2,27259,A,C,PASS,453,0,451,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1.1 +220543,NC_045512.2,27807,C,T,PASS,142,0,141,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1.1 +220543,NC_045512.2,28271,A,T,PASS,1561,4,1557,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1.1 +220543,NC_045512.2,28311,C,T,PASS,1582,3,1577,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1.1 +220543,NC_045512.2,2832,A,G,PASS,172,1,171,0.99,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1.1 +220543,NC_045512.2,28361,GGAGAACGCA,G,PASS,1130,1128,735,0.65,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1.1 +220543,NC_045512.2,28512,C,T,PASS,615,428,178,0.29,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,BA.1.1.1 +220543,NC_045512.2,28881,GG,AA,PASS,2666,37,2626,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1.1 +220543,NC_045512.2,28883,G,C,PASS,2644,3,2638,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1.1 +220543,NC_045512.2,29451,C,T,PASS,5293,30,5263,0.99,N,missense_variant,c.1178C>T,p.Thr393Ile,p.T393I,ivar,BA.1.1.1 +220543,NC_045512.2,3037,C,T,PASS,78,1,77,0.99,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1.1 +220543,NC_045512.2,5386,T,G,PASS,26,0,26,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.1.1 +220543,NC_045512.2,8393,G,A,PASS,4660,24,4632,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1.1 +220544,NC_045512.2,10449,C,A,PASS,10,0,10,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1.1 +220544,NC_045512.2,11282,AGTTTGTCTG,A,PASS,73,72,66,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1.1 +220544,NC_045512.2,11537,A,G,PASS,46,0,46,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1.1 +220544,NC_045512.2,13195,T,C,PASS,3335,1,3334,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1.1 +220544,NC_045512.2,14408,C,T,PASS,36,0,36,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1.1 +220544,NC_045512.2,16064,A,G,PASS,57,0,57,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1.1.1 +220544,NC_045512.2,16308,C,T,PASS,147,0,147,1.0,orf1ab,missense_variant,c.16043C>T,p.Ser5348Phe,p.S5348F,ivar,BA.1.1.1 +220544,NC_045512.2,206,C,A,PASS,32,18,12,0.38,orf1ab,upstream_gene_variant,c.-60C>A,.,.,ivar,BA.1.1.1 +220544,NC_045512.2,21762,C,T,PASS,78,0,78,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1.1 +220544,NC_045512.2,21764,ATACATG,A,PASS,79,78,66,0.84,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1.1 +220544,NC_045512.2,21846,C,T,PASS,38,0,38,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1.1 +220544,NC_045512.2,23013,A,C,PASS,10,0,10,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.1.1 +220544,NC_045512.2,23030,T,C,ft,10,7,3,0.3,S,missense_variant,c.1468T>C,p.Phe490Leu,p.F490L,ivar,BA.1.1.1 +220544,NC_045512.2,23040,A,G,PASS,10,0,10,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.1.1 +220544,NC_045512.2,23048,G,A,PASS,11,0,11,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.1.1 +220544,NC_045512.2,23055,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.1.1 +220544,NC_045512.2,23063,A,T,PASS,13,0,13,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.1.1 +220544,NC_045512.2,23075,T,C,PASS,16,0,16,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.1.1 +220544,NC_045512.2,23202,C,A,PASS,10,0,10,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1.1 +220544,NC_045512.2,23403,A,G,PASS,1658,2,1655,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1.1 +220544,NC_045512.2,23525,C,T,PASS,1539,5,1533,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1.1 +220544,NC_045512.2,23599,T,G,PASS,601,3,598,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1.1 +220544,NC_045512.2,23604,C,A,PASS,583,0,583,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1.1 +220544,NC_045512.2,23756,A,G,ft,11,8,3,0.27,S,missense_variant,c.2194A>G,p.Thr732Ala,p.T732A,ivar,BA.1.1.1 +220544,NC_045512.2,23854,C,A,PASS,12,0,12,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1.1 +220544,NC_045512.2,23948,G,T,PASS,178,0,178,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1.1 +220544,NC_045512.2,24130,C,A,PASS,282,0,279,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1.1 +220544,NC_045512.2,24424,A,T,PASS,68,0,68,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1.1 +220544,NC_045512.2,24469,T,A,PASS,322,0,322,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1.1 +220544,NC_045512.2,24503,C,T,PASS,381,14,367,0.96,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1.1 +220544,NC_045512.2,25000,C,T,PASS,18,0,18,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1.1 +220544,NC_045512.2,25584,C,T,PASS,27,0,27,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1.1 +220544,NC_045512.2,26270,C,T,PASS,131,0,131,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1.1 +220544,NC_045512.2,26530,A,G,PASS,119,0,119,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1.1 +220544,NC_045512.2,26577,C,G,PASS,123,0,123,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1.1 +220544,NC_045512.2,26709,G,A,PASS,133,2,131,0.98,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1.1 +220544,NC_045512.2,27259,A,C,PASS,73,0,73,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1.1 +220544,NC_045512.2,27807,C,T,PASS,15,0,15,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1.1 +220544,NC_045512.2,2790,C,T,ft,15,11,4,0.27,orf1ab,missense_variant,c.2525C>T,p.Thr842Ile,p.T842I,ivar,BA.1.1.1 +220544,NC_045512.2,28271,A,T,PASS,206,0,206,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1.1 +220544,NC_045512.2,28311,C,T,PASS,200,0,198,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1.1 +220544,NC_045512.2,2832,A,G,PASS,15,0,15,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1.1 +220544,NC_045512.2,28361,GGAGAACGCA,G,PASS,156,154,111,0.71,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1.1 +220544,NC_045512.2,28881,GG,AA,PASS,6341,80,6256,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1.1 +220544,NC_045512.2,28883,G,C,PASS,6310,0,6304,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1.1 +220544,NC_045512.2,8393,G,A,PASS,1458,11,1447,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1.1 +220544,NC_045512.2,9572,G,A,PASS,1323,9,1310,0.99,orf1ab,missense_variant,c.9307G>A,p.Gly3103Ser,p.G3103S,ivar,BA.1.1.1 +220545,NC_045512.2,10029,C,T,PASS,16,0,16,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220545,NC_045512.2,10449,C,A,PASS,23,0,23,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220545,NC_045512.2,11282,AGTTTGTCTG,A,PASS,94,94,81,0.86,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220545,NC_045512.2,11537,A,G,PASS,99,0,99,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220545,NC_045512.2,11958,T,C,PASS,18,0,18,1.0,orf1ab,missense_variant,c.11693T>C,p.Ile3898Thr,p.I3898T,ivar,Unassigned +220545,NC_045512.2,13195,T,C,PASS,3708,5,3703,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220545,NC_045512.2,14408,C,T,PASS,59,0,59,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220545,NC_045512.2,14774,A,T,ft,13,9,4,0.31,orf1ab,missense_variant,c.14509A>T,p.Met4837Leu,p.M4837L,ivar,Unassigned +220545,NC_045512.2,16064,A,G,PASS,75,0,75,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220545,NC_045512.2,18163,A,G,PASS,10,0,10,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,Unassigned +220545,NC_045512.2,21762,C,T,PASS,106,0,106,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220545,NC_045512.2,21764,ATACATG,A,PASS,109,106,91,0.83,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220545,NC_045512.2,21846,C,T,PASS,81,0,81,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220545,NC_045512.2,23403,A,G,PASS,1153,0,1153,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220545,NC_045512.2,23525,C,T,PASS,1133,2,1131,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220545,NC_045512.2,23599,T,G,PASS,445,0,445,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220545,NC_045512.2,23604,C,A,PASS,432,0,430,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220545,NC_045512.2,23854,C,A,PASS,24,0,24,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220545,NC_045512.2,23948,G,T,PASS,141,0,141,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220545,NC_045512.2,24130,C,A,PASS,177,0,177,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220545,NC_045512.2,24424,A,T,PASS,93,0,93,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220545,NC_045512.2,24469,T,A,PASS,454,0,451,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220545,NC_045512.2,24503,C,T,PASS,534,3,531,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220545,NC_045512.2,25000,C,T,PASS,35,0,35,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220545,NC_045512.2,25584,C,T,PASS,20,0,20,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220545,NC_045512.2,26270,C,T,PASS,77,0,77,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220545,NC_045512.2,26530,A,G,PASS,25,0,25,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220545,NC_045512.2,26577,C,G,PASS,36,0,36,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220545,NC_045512.2,26709,G,A,PASS,42,2,40,0.95,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220545,NC_045512.2,27259,A,C,PASS,62,0,62,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220545,NC_045512.2,27807,C,T,PASS,27,0,27,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220545,NC_045512.2,28238,T,C,PASS,556,0,556,1.0,ORF8,synonymous_variant,c.345T>C,p.Arg115Arg,p.R115R,ivar,Unassigned +220545,NC_045512.2,28271,A,T,PASS,638,4,630,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220545,NC_045512.2,28311,C,T,PASS,637,2,635,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220545,NC_045512.2,28361,GGAGAACGCA,G,PASS,462,461,341,0.74,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220545,NC_045512.2,28881,GG,AA,PASS,4829,76,4753,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220545,NC_045512.2,28883,G,C,PASS,4774,5,4765,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220545,NC_045512.2,3037,C,T,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220545,NC_045512.2,4835,G,C,PASS,55,35,20,0.36,orf1ab,missense_variant,c.4570G>C,p.Gly1524Arg,p.G1524R,ivar,Unassigned +220545,NC_045512.2,8393,G,A,PASS,1501,4,1495,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220546,NC_045512.2,10029,C,T,PASS,29,0,29,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.1.1 +220546,NC_045512.2,10449,C,A,PASS,74,0,74,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1.1 +220546,NC_045512.2,11282,AGTTTGTCTG,A,PASS,229,225,199,0.87,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1.1 +220546,NC_045512.2,11537,A,G,PASS,132,0,132,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1.1 +220546,NC_045512.2,13195,T,C,PASS,4491,2,4489,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1.1 +220546,NC_045512.2,14408,C,T,PASS,115,0,115,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1.1 +220546,NC_045512.2,16064,A,G,PASS,219,0,219,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1.1.1 +220546,NC_045512.2,18163,A,G,PASS,14,0,14,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.1.1 +220546,NC_045512.2,21762,C,T,PASS,121,0,121,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1.1 +220546,NC_045512.2,21764,ATACATG,A,PASS,127,121,110,0.87,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1.1 +220546,NC_045512.2,21846,C,T,PASS,121,0,121,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1.1 +220546,NC_045512.2,22679,T,C,PASS,11,0,11,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.1.1 +220546,NC_045512.2,22686,C,T,PASS,10,0,10,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.1.1 +220546,NC_045512.2,23040,A,G,PASS,12,0,12,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.1.1 +220546,NC_045512.2,23048,G,A,PASS,12,0,12,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.1.1 +220546,NC_045512.2,23055,A,G,PASS,12,0,12,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.1.1 +220546,NC_045512.2,23063,A,T,PASS,12,0,12,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.1.1 +220546,NC_045512.2,23075,T,C,PASS,13,0,13,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.1.1 +220546,NC_045512.2,23202,C,A,PASS,30,0,30,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1.1 +220546,NC_045512.2,23403,A,G,PASS,3367,2,3365,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1.1 +220546,NC_045512.2,23525,C,T,PASS,3050,3,3045,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1.1 +220546,NC_045512.2,23599,T,G,PASS,1142,0,1142,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1.1 +220546,NC_045512.2,23604,C,A,PASS,1100,0,1094,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1.1 +220546,NC_045512.2,23854,C,A,PASS,28,0,28,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1.1 +220546,NC_045512.2,23948,G,T,PASS,349,1,344,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1.1 +220546,NC_045512.2,24130,C,A,PASS,564,0,559,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1.1 +220546,NC_045512.2,24424,A,T,PASS,122,0,122,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1.1 +220546,NC_045512.2,24469,T,A,PASS,553,0,548,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1.1 +220546,NC_045512.2,24503,C,T,PASS,663,14,649,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1.1 +220546,NC_045512.2,25000,C,T,PASS,53,0,53,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1.1 +220546,NC_045512.2,25584,C,T,PASS,61,0,61,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1.1 +220546,NC_045512.2,26270,C,T,PASS,368,2,366,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1.1 +220546,NC_045512.2,26530,A,G,PASS,140,0,140,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1.1 +220546,NC_045512.2,26577,C,G,PASS,173,0,173,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1.1 +220546,NC_045512.2,26709,G,A,PASS,158,1,157,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1.1 +220546,NC_045512.2,27259,A,C,PASS,165,0,165,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1.1 +220546,NC_045512.2,27807,C,T,PASS,79,0,79,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1.1 +220546,NC_045512.2,28238,T,C,PASS,607,0,607,1.0,ORF8,synonymous_variant,c.345T>C,p.Arg115Arg,p.R115R,ivar,BA.1.1.1 +220546,NC_045512.2,28271,A,T,PASS,751,0,749,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1.1 +220546,NC_045512.2,28311,C,T,PASS,709,4,703,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1.1 +220546,NC_045512.2,2832,A,G,PASS,42,0,42,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1.1 +220546,NC_045512.2,28361,GGAGAACGCA,G,PASS,531,529,373,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1.1 +220546,NC_045512.2,28512,C,T,PASS,385,283,99,0.26,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,BA.1.1.1 +220546,NC_045512.2,28881,GG,AA,PASS,2009,27,1980,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1.1 +220546,NC_045512.2,28883,G,C,PASS,1988,0,1982,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1.1 +220546,NC_045512.2,3037,C,T,PASS,24,0,24,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1.1 +220546,NC_045512.2,5386,T,G,PASS,12,0,12,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.1.1 +220546,NC_045512.2,76,T,A,ft,11,7,4,0.36,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.1.1 +220546,NC_045512.2,78,T,G,ft,11,7,4,0.36,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.1.1 +220546,NC_045512.2,8393,G,A,PASS,3173,9,3162,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1.1 +220599,NC_045512.2,11537,A,G,PASS,13,0,13,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220599,NC_045512.2,13195,T,C,PASS,273,2,271,0.99,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220599,NC_045512.2,16064,A,G,PASS,19,0,19,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220599,NC_045512.2,16853,G,T,PASS,16,0,14,0.88,orf1ab,missense_variant,c.16588G>T,p.Val5530Leu,p.V5530L,ivar,Unassigned +220599,NC_045512.2,23403,A,G,PASS,187,0,187,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220599,NC_045512.2,23525,C,T,PASS,144,0,144,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220599,NC_045512.2,23599,T,G,PASS,67,0,67,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220599,NC_045512.2,23604,C,A,PASS,67,0,67,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220599,NC_045512.2,23948,G,T,PASS,20,0,20,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220599,NC_045512.2,24130,C,A,PASS,36,0,36,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220599,NC_045512.2,24424,A,T,PASS,14,0,14,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220599,NC_045512.2,24469,T,A,PASS,30,0,30,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220599,NC_045512.2,24503,C,T,PASS,33,1,32,0.97,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220599,NC_045512.2,28881,GG,AA,PASS,166,5,161,0.97,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220599,NC_045512.2,28883,G,C,PASS,167,2,165,0.99,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220599,NC_045512.2,29400,C,T,PASS,89,0,89,1.0,N,missense_variant,c.1127C>T,p.Ala376Val,p.A376V,ivar,Unassigned +220599,NC_045512.2,3663,G,A,PASS,148,2,146,0.99,orf1ab,missense_variant,c.3398G>A,p.Ser1133Asn,p.S1133N,ivar,Unassigned +220599,NC_045512.2,4266,T,C,ft,11,8,3,0.27,orf1ab,missense_variant,c.4001T>C,p.Leu1334Ser,p.L1334S,ivar,Unassigned +220599,NC_045512.2,4363,A,T,ft,10,6,4,0.4,orf1ab,missense_variant,c.4098A>T,p.Glu1366Asp,p.E1366D,ivar,Unassigned +220599,NC_045512.2,8393,G,A,PASS,197,1,196,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220601,NC_045512.2,10029,C,T,PASS,335,2,333,0.99,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.1 +220601,NC_045512.2,10449,C,A,PASS,827,0,824,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1 +220601,NC_045512.2,11282,AGTTTGTCTG,A,PASS,1412,1400,1185,0.84,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1 +220601,NC_045512.2,11537,A,G,PASS,872,0,872,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1 +220601,NC_045512.2,13195,T,C,PASS,2886,7,2877,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1 +220601,NC_045512.2,14408,C,T,PASS,367,5,362,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1 +220601,NC_045512.2,15240,C,T,PASS,59,0,59,1.0,orf1ab,missense_variant,c.14975C>T,p.Thr4992Ile,p.T4992I,ivar,BA.1.1 +220601,NC_045512.2,18163,A,G,PASS,241,2,239,0.99,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.1 +220601,NC_045512.2,21762,C,T,PASS,460,2,458,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1 +220601,NC_045512.2,21764,ATACATG,A,PASS,465,463,409,0.88,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1 +220601,NC_045512.2,21846,C,T,PASS,446,6,440,0.99,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1 +220601,NC_045512.2,21986,GGTGTTTATT,G,PASS,15,15,15,1.0,S,disruptive_inframe_deletion,c.425_433delGTGTTTATT,p.Gly142_Tyr145delinsAsp,p.G142_Y145delinsD,ivar,BA.1.1 +220601,NC_045512.2,22193,AATT,A,PASS,25,25,15,0.6,S,disruptive_inframe_deletion,c.632_634delATT,p.Asn211_Leu212delinsIle,p.N211_L212delinsI,ivar,BA.1.1 +220601,NC_045512.2,22204,T,TGAGCCAGAA,ft,14,14,5,0.36,S,disruptive_inframe_insertion,c.644_645insGCCAGAAGA,p.Arg214_Asp215insGluProGlu,p.R214_D215insEPE,ivar,BA.1.1 +220601,NC_045512.2,22578,G,A,PASS,130,2,128,0.98,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.1 +220601,NC_045512.2,22599,G,A,PASS,141,0,141,1.0,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,BA.1.1 +220601,NC_045512.2,22673,TC,CT,PASS,111,0,111,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.1 +220601,NC_045512.2,22679,T,C,PASS,162,0,162,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.1 +220601,NC_045512.2,22686,C,T,PASS,161,1,160,0.99,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.1 +220601,NC_045512.2,22813,G,T,PASS,20,0,20,1.0,S,missense_variant,c.1251G>T,p.Lys417Asn,p.K417N,ivar,BA.1.1 +220601,NC_045512.2,22882,T,G,PASS,10,0,10,1.0,S,missense_variant,c.1320T>G,p.Asn440Lys,p.N440K,ivar,BA.1.1 +220601,NC_045512.2,22898,G,A,PASS,13,0,13,1.0,S,missense_variant,c.1336G>A,p.Gly446Ser,p.G446S,ivar,BA.1.1 +220601,NC_045512.2,22992,G,A,PASS,38,0,38,1.0,S,missense_variant,c.1430G>A,p.Ser477Asn,p.S477N,ivar,BA.1.1 +220601,NC_045512.2,22995,C,A,PASS,37,0,37,1.0,S,missense_variant,c.1433C>A,p.Thr478Lys,p.T478K,ivar,BA.1.1 +220601,NC_045512.2,23013,A,C,PASS,52,0,52,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.1 +220601,NC_045512.2,23040,A,G,PASS,53,0,53,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.1 +220601,NC_045512.2,23048,G,A,PASS,56,0,56,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.1 +220601,NC_045512.2,23055,A,G,PASS,57,0,57,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.1 +220601,NC_045512.2,23063,A,T,PASS,62,0,62,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.1 +220601,NC_045512.2,23075,T,C,PASS,67,0,67,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.1 +220601,NC_045512.2,23202,C,A,PASS,101,1,99,0.98,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1 +220601,NC_045512.2,23403,A,G,PASS,2063,4,2059,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1 +220601,NC_045512.2,23525,C,T,PASS,1975,4,1971,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1 +220601,NC_045512.2,23599,T,G,PASS,738,0,738,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1 +220601,NC_045512.2,23604,C,A,PASS,705,2,699,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1 +220601,NC_045512.2,23854,C,A,PASS,538,0,538,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1 +220601,NC_045512.2,23948,G,T,PASS,1237,0,1235,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1 +220601,NC_045512.2,24130,C,A,PASS,1834,0,1827,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1 +220601,NC_045512.2,24424,A,T,PASS,1387,0,1383,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1 +220601,NC_045512.2,24469,T,A,PASS,1593,3,1585,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1 +220601,NC_045512.2,24503,C,T,PASS,1682,21,1660,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1 +220601,NC_045512.2,25000,C,T,PASS,565,1,564,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1 +220601,NC_045512.2,25584,C,T,PASS,769,3,763,0.99,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1 +220601,NC_045512.2,26270,C,T,PASS,1265,4,1260,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1 +220601,NC_045512.2,26530,A,G,PASS,243,0,243,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1 +220601,NC_045512.2,26577,C,G,PASS,223,0,223,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1 +220601,NC_045512.2,26709,G,A,PASS,305,2,303,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1 +220601,NC_045512.2,27259,A,C,PASS,843,0,843,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1 +220601,NC_045512.2,27807,C,T,PASS,419,0,416,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1 +220601,NC_045512.2,28271,A,T,PASS,834,0,833,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1 +220601,NC_045512.2,28311,C,T,PASS,845,3,842,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1 +220601,NC_045512.2,2832,A,G,PASS,1221,6,1215,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1 +220601,NC_045512.2,28361,GGAGAACGCA,G,PASS,695,694,501,0.72,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1 +220601,NC_045512.2,28877,AG,TC,PASS,632,2,629,1.0,N,synonymous_variant,c.604_605delAGinsTC,p.203,p.203,ivar,BA.1.1 +220601,NC_045512.2,28881,GG,AA,PASS,631,2,628,1.0,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1 +220601,NC_045512.2,28883,G,C,PASS,629,0,629,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1 +220601,NC_045512.2,3037,C,T,PASS,124,0,124,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1 +220601,NC_045512.2,5386,T,G,PASS,102,0,102,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.1 +220601,NC_045512.2,6445,C,T,PASS,10,0,10,1.0,orf1ab,synonymous_variant,c.6180C>T,p.Asp2060Asp,p.D2060D,ivar,BA.1.1 +220601,NC_045512.2,6512,AGTT,A,PASS,12,12,10,0.83,orf1ab,disruptive_inframe_deletion,c.6248_6250delGTT,p.Ser2083_Leu2084delinsIle,p.S2083_L2084delinsI,ivar,BA.1.1 +220601,NC_045512.2,8393,G,A,PASS,1980,19,1959,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1 +220624,NC_045512.2,10449,C,A,PASS,11,0,11,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220624,NC_045512.2,11282,AGTTTGTCTG,A,PASS,31,31,28,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220624,NC_045512.2,11537,A,G,PASS,58,0,58,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220624,NC_045512.2,13195,T,C,PASS,2327,1,2323,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220624,NC_045512.2,14408,C,T,PASS,26,0,26,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220624,NC_045512.2,16925,A,G,ft,24,18,6,0.25,orf1ab,missense_variant,c.16660A>G,p.Ile5554Val,p.I5554V,ivar,Unassigned +220624,NC_045512.2,20371,C,G,ft,23,17,6,0.26,orf1ab,stop_gained,c.20106C>G,p.Tyr6702*,p.Y6702*,ivar,Unassigned +220624,NC_045512.2,2172,A,G,PASS,181,5,176,0.97,orf1ab,missense_variant,c.1907A>G,p.Lys636Arg,p.K636R,ivar,Unassigned +220624,NC_045512.2,21762,C,T,PASS,21,0,21,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220624,NC_045512.2,21764,ATACATG,A,PASS,21,21,20,0.95,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220624,NC_045512.2,21846,C,T,PASS,25,0,25,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220624,NC_045512.2,23403,A,G,PASS,920,0,920,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220624,NC_045512.2,23525,C,T,PASS,910,0,907,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220624,NC_045512.2,23599,T,G,PASS,359,0,359,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220624,NC_045512.2,23604,C,A,PASS,351,0,351,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220624,NC_045512.2,23948,G,T,PASS,35,0,35,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220624,NC_045512.2,24130,C,A,PASS,29,0,29,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220624,NC_045512.2,24424,A,T,PASS,26,0,26,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220624,NC_045512.2,24469,T,A,PASS,145,0,142,0.98,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220624,NC_045512.2,24503,C,T,PASS,171,3,168,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220624,NC_045512.2,25584,C,T,PASS,17,0,17,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220624,NC_045512.2,25855,G,T,PASS,11,0,11,1.0,ORF3a,missense_variant,c.463G>T,p.Asp155Tyr,p.D155Y,ivar,Unassigned +220624,NC_045512.2,26270,C,T,PASS,112,0,112,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220624,NC_045512.2,26530,A,G,PASS,166,0,166,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220624,NC_045512.2,26577,C,G,PASS,175,0,175,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220624,NC_045512.2,26709,G,A,PASS,200,0,200,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220624,NC_045512.2,27259,A,C,PASS,51,0,51,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220624,NC_045512.2,27807,C,T,PASS,52,0,52,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220624,NC_045512.2,27933,G,A,PASS,186,0,186,1.0,ORF8,missense_variant,c.40G>A,p.Ala14Thr,p.A14T,ivar,Unassigned +220624,NC_045512.2,28271,A,T,PASS,345,2,343,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220624,NC_045512.2,28311,C,T,PASS,378,2,376,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220624,NC_045512.2,2832,A,G,PASS,18,0,18,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220624,NC_045512.2,28361,GGAGAACGCA,G,PASS,299,296,209,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220624,NC_045512.2,28512,C,T,PASS,182,132,46,0.25,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,Unassigned +220624,NC_045512.2,28881,GG,AA,PASS,4288,63,4219,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220624,NC_045512.2,28883,G,C,PASS,4247,4,4241,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220624,NC_045512.2,3037,C,T,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220624,NC_045512.2,8393,G,A,PASS,2102,5,2097,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220625,NC_045512.2,10449,C,A,PASS,15,0,15,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220625,NC_045512.2,11282,AGTTTGTCTG,A,PASS,47,47,42,0.89,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220625,NC_045512.2,11537,A,G,PASS,14,0,14,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220625,NC_045512.2,13195,T,C,PASS,1350,2,1348,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220625,NC_045512.2,14408,C,T,PASS,19,0,19,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220625,NC_045512.2,21762,C,T,PASS,26,0,26,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220625,NC_045512.2,21764,ATACATG,A,PASS,26,26,22,0.85,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220625,NC_045512.2,21846,C,T,PASS,16,0,16,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220625,NC_045512.2,23403,A,G,PASS,467,0,467,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220625,NC_045512.2,23525,C,T,PASS,551,0,551,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220625,NC_045512.2,23599,T,G,PASS,216,0,216,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220625,NC_045512.2,23604,C,A,PASS,209,0,207,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220625,NC_045512.2,23948,G,T,PASS,33,0,33,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220625,NC_045512.2,24130,C,A,PASS,40,0,40,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220625,NC_045512.2,24424,A,T,PASS,38,0,37,0.97,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220625,NC_045512.2,24469,T,A,PASS,152,2,150,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220625,NC_045512.2,24503,C,T,PASS,165,2,163,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220625,NC_045512.2,25584,C,T,PASS,21,0,21,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220625,NC_045512.2,26270,C,T,PASS,74,0,74,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220625,NC_045512.2,26530,A,G,PASS,194,0,194,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220625,NC_045512.2,26577,C,G,PASS,195,0,195,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220625,NC_045512.2,26709,G,A,PASS,162,2,160,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220625,NC_045512.2,27259,A,C,PASS,40,0,40,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220625,NC_045512.2,27807,C,T,PASS,32,0,32,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220625,NC_045512.2,28271,A,T,PASS,286,3,281,0.98,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220625,NC_045512.2,28311,C,T,PASS,291,2,288,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220625,NC_045512.2,28361,GGAGAACGCA,G,PASS,230,223,167,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220625,NC_045512.2,28881,GG,AA,PASS,8535,93,8429,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220625,NC_045512.2,28883,G,C,PASS,8481,1,8473,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220625,NC_045512.2,8269,T,A,PASS,183,66,115,0.63,orf1ab,missense_variant,c.8004T>A,p.Asp2668Glu,p.D2668E,ivar,Unassigned +220625,NC_045512.2,8393,G,A,PASS,932,7,925,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220626,NC_045512.2,10449,C,A,PASS,33,0,33,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220626,NC_045512.2,11074,CT,C,PASS,41,41,13,0.32,orf1ab,frameshift_variant,c.10817delT,p.Leu3606fs,p.L3606fs,ivar,Unassigned +220626,NC_045512.2,11282,AGTTTGTCTG,A,PASS,86,85,78,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220626,NC_045512.2,11537,A,G,PASS,82,0,82,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220626,NC_045512.2,13195,T,C,PASS,2747,4,2743,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220626,NC_045512.2,14408,C,T,PASS,43,0,43,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220626,NC_045512.2,21762,C,T,PASS,56,0,56,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220626,NC_045512.2,21764,ATACATG,A,PASS,57,56,48,0.84,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220626,NC_045512.2,21846,C,T,PASS,51,2,49,0.96,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220626,NC_045512.2,21,C,T,PASS,10,0,10,1.0,orf1ab,upstream_gene_variant,c.-245C>T,.,.,ivar,Unassigned +220626,NC_045512.2,23403,A,G,PASS,878,3,875,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220626,NC_045512.2,23525,C,T,PASS,938,2,934,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220626,NC_045512.2,23599,T,G,PASS,371,0,370,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220626,NC_045512.2,23604,C,A,PASS,357,0,352,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220626,NC_045512.2,23948,G,T,PASS,196,0,195,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220626,NC_045512.2,24130,C,A,PASS,297,0,296,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220626,NC_045512.2,24424,A,T,PASS,52,0,52,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220626,NC_045512.2,24469,T,A,PASS,251,0,249,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220626,NC_045512.2,24503,C,T,PASS,313,2,311,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220626,NC_045512.2,25000,C,T,PASS,21,0,21,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220626,NC_045512.2,25584,C,T,PASS,42,0,42,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220626,NC_045512.2,26270,C,T,PASS,221,0,221,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220626,NC_045512.2,26530,A,G,PASS,111,0,111,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220626,NC_045512.2,26577,C,G,PASS,88,0,88,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220626,NC_045512.2,26709,G,A,PASS,113,0,113,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220626,NC_045512.2,27052,C,T,ft,14,10,4,0.29,M,missense_variant,c.530C>T,p.Ser177Phe,p.S177F,ivar,Unassigned +220626,NC_045512.2,27259,A,C,PASS,184,0,184,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220626,NC_045512.2,27807,C,T,PASS,52,0,52,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220626,NC_045512.2,28271,A,T,PASS,513,0,513,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220626,NC_045512.2,28311,C,T,PASS,534,0,534,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220626,NC_045512.2,2832,A,G,PASS,36,0,36,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220626,NC_045512.2,28361,GGAGAACGCA,G,PASS,424,423,278,0.66,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220626,NC_045512.2,28877,AG,TC,PASS,4416,5,4405,1.0,N,synonymous_variant,c.604_605delAGinsTC,p.203,p.203,ivar,Unassigned +220626,NC_045512.2,28881,GG,AA,PASS,4440,22,4409,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220626,NC_045512.2,28883,G,C,PASS,4441,0,4429,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220626,NC_045512.2,3037,C,T,PASS,30,0,30,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220626,NC_045512.2,76,T,A,ft,13,9,4,0.31,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220626,NC_045512.2,78,T,G,ft,13,9,4,0.31,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220626,NC_045512.2,8393,G,A,PASS,2004,2,2002,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220627,NC_045512.2,10029,C,T,PASS,32,0,32,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1 +220627,NC_045512.2,10449,C,A,PASS,122,0,122,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1 +220627,NC_045512.2,11282,AGTTTGTCTG,A,PASS,385,380,343,0.89,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1 +220627,NC_045512.2,11537,A,G,PASS,480,0,480,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1 +220627,NC_045512.2,13195,T,C,PASS,6218,9,6205,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1 +220627,NC_045512.2,14408,C,T,PASS,260,0,258,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1 +220627,NC_045512.2,18163,A,G,PASS,37,0,37,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1 +220627,NC_045512.2,21762,C,T,PASS,324,2,319,0.98,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1 +220627,NC_045512.2,21764,ATACATG,A,PASS,333,322,292,0.88,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1 +220627,NC_045512.2,21795,G,A,PASS,361,3,355,0.98,S,missense_variant,c.233G>A,p.Arg78Lys,p.R78K,ivar,BA.1 +220627,NC_045512.2,21846,C,T,PASS,346,0,346,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1 +220627,NC_045512.2,22193,AATT,A,PASS,19,19,18,0.95,S,disruptive_inframe_deletion,c.632_634delATT,p.Asn211_Leu212delinsIle,p.N211_L212delinsI,ivar,BA.1 +220627,NC_045512.2,22204,T,TGAGCCAGAA,PASS,17,17,14,0.82,S,disruptive_inframe_insertion,c.644_645insGCCAGAAGA,p.Arg214_Asp215insGluProGlu,p.R214_D215insEPE,ivar,BA.1 +220627,NC_045512.2,22578,G,A,PASS,36,0,36,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1 +220627,NC_045512.2,22673,TC,CT,PASS,33,0,33,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1 +220627,NC_045512.2,22679,T,C,PASS,42,0,42,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1 +220627,NC_045512.2,22686,C,T,PASS,42,0,42,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1 +220627,NC_045512.2,23063,A,T,PASS,10,0,10,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1 +220627,NC_045512.2,23075,T,C,PASS,12,0,12,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1 +220627,NC_045512.2,23202,C,A,PASS,40,0,39,0.98,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1 +220627,NC_045512.2,23403,A,G,PASS,2548,5,2543,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1 +220627,NC_045512.2,23525,C,T,PASS,2427,2,2416,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1 +220627,NC_045512.2,23599,T,G,PASS,916,0,916,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1 +220627,NC_045512.2,23604,C,A,PASS,885,0,881,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1 +220627,NC_045512.2,23854,C,A,PASS,66,0,64,0.97,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1 +220627,NC_045512.2,23948,G,T,PASS,817,1,810,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1 +220627,NC_045512.2,24130,C,A,PASS,1219,0,1209,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1 +220627,NC_045512.2,24424,A,T,PASS,314,0,312,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1 +220627,NC_045512.2,24469,T,A,PASS,1245,0,1245,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1 +220627,NC_045512.2,24503,C,T,PASS,1525,27,1496,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1 +220627,NC_045512.2,25000,C,T,PASS,110,0,110,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1 +220627,NC_045512.2,25584,C,T,PASS,182,3,179,0.98,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1 +220627,NC_045512.2,26270,C,T,PASS,987,2,984,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1 +220627,NC_045512.2,26530,A,G,PASS,399,0,399,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1 +220627,NC_045512.2,26577,C,G,PASS,422,0,422,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1 +220627,NC_045512.2,26709,G,A,PASS,464,3,461,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1 +220627,NC_045512.2,27259,A,C,PASS,444,0,441,0.99,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1 +220627,NC_045512.2,27807,C,T,PASS,143,0,143,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1 +220627,NC_045512.2,28271,A,T,PASS,3928,8,3914,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1 +220627,NC_045512.2,28311,C,T,PASS,4161,5,4146,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1 +220627,NC_045512.2,2832,A,G,PASS,161,2,159,0.99,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1 +220627,NC_045512.2,28361,GGAGAACGCA,G,PASS,3059,3037,1264,0.41,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1 +220627,NC_045512.2,28881,GG,AA,PASS,2726,36,2687,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1 +220627,NC_045512.2,28883,G,C,PASS,2704,2,2697,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1 +220627,NC_045512.2,29195,G,T,PASS,2958,0,2935,0.99,N,missense_variant,c.922G>T,p.Ala308Ser,p.A308S,ivar,BA.1 +220627,NC_045512.2,3037,C,T,PASS,68,0,68,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1 +220627,NC_045512.2,5386,T,G,PASS,16,0,16,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1 +220627,NC_045512.2,5515,G,T,PASS,26,0,26,1.0,orf1ab,synonymous_variant,c.5250G>T,p.Val1750Val,p.V1750V,ivar,BA.1 +220627,NC_045512.2,7321,C,T,ft,12,8,4,0.33,orf1ab,synonymous_variant,c.7056C>T,p.Ser2352Ser,p.S2352S,ivar,BA.1 +220627,NC_045512.2,8393,G,A,PASS,2820,7,2811,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1 +220628,NC_045512.2,10029,C,T,PASS,42,0,42,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.15.1 +220628,NC_045512.2,10135,T,C,PASS,138,0,138,1.0,orf1ab,synonymous_variant,c.9870T>C,p.Leu3290Leu,p.L3290L,ivar,BA.1.15.1 +220628,NC_045512.2,10449,C,A,PASS,143,0,141,0.99,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.15.1 +220628,NC_045512.2,11282,AGTTTGTCTG,A,PASS,551,550,482,0.87,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.15.1 +220628,NC_045512.2,11537,A,G,PASS,649,0,649,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.15.1 +220628,NC_045512.2,13195,T,C,PASS,5534,17,5515,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.15.1 +220628,NC_045512.2,14408,C,T,PASS,273,0,273,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.15.1 +220628,NC_045512.2,18163,A,G,PASS,26,0,26,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.15.1 +220628,NC_045512.2,21762,C,T,PASS,404,2,402,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.15.1 +220628,NC_045512.2,21764,ATACATG,A,PASS,409,405,357,0.87,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.15.1 +220628,NC_045512.2,21846,C,T,PASS,379,2,375,0.99,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.15.1 +220628,NC_045512.2,21,C,T,PASS,28,0,28,1.0,orf1ab,upstream_gene_variant,c.-245C>T,.,.,ivar,BA.1.15.1 +220628,NC_045512.2,22578,G,A,PASS,37,0,37,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.15.1 +220628,NC_045512.2,22673,TC,CT,PASS,14,0,14,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.15.1 +220628,NC_045512.2,22679,T,C,PASS,21,0,21,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.15.1 +220628,NC_045512.2,22686,C,T,PASS,20,0,20,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.15.1 +220628,NC_045512.2,22992,G,A,PASS,12,0,12,1.0,S,missense_variant,c.1430G>A,p.Ser477Asn,p.S477N,ivar,BA.1.15.1 +220628,NC_045512.2,22995,C,A,PASS,12,0,10,0.83,S,missense_variant,c.1433C>A,p.Thr478Lys,p.T478K,ivar,BA.1.15.1 +220628,NC_045512.2,23013,A,C,PASS,15,0,15,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.15.1 +220628,NC_045512.2,23040,A,G,PASS,16,0,16,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.15.1 +220628,NC_045512.2,23048,G,A,PASS,15,0,15,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.15.1 +220628,NC_045512.2,23055,A,G,PASS,15,0,15,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.15.1 +220628,NC_045512.2,23063,A,T,PASS,16,0,16,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.15.1 +220628,NC_045512.2,23075,T,C,PASS,16,0,16,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.15.1 +220628,NC_045512.2,23202,C,A,PASS,37,0,37,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.15.1 +220628,NC_045512.2,23403,A,G,PASS,3111,5,3106,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.15.1 +220628,NC_045512.2,23525,C,T,PASS,2843,8,2834,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.15.1 +220628,NC_045512.2,23599,T,G,PASS,962,0,962,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.15.1 +220628,NC_045512.2,23604,C,A,PASS,912,0,906,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.15.1 +220628,NC_045512.2,23854,C,A,PASS,88,1,87,0.99,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.15.1 +220628,NC_045512.2,23948,G,T,PASS,794,0,788,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.15.1 +220628,NC_045512.2,24130,C,A,PASS,1286,0,1279,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.15.1 +220628,NC_045512.2,24424,A,T,PASS,354,0,353,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.15.1 +220628,NC_045512.2,24469,T,A,PASS,1256,4,1247,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.15.1 +220628,NC_045512.2,24503,C,T,PASS,1507,30,1473,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.15.1 +220628,NC_045512.2,24803,A,G,PASS,632,0,632,1.0,S,missense_variant,c.3241A>G,p.Ile1081Val,p.I1081V,ivar,BA.1.15.1 +220628,NC_045512.2,25000,C,T,PASS,102,3,99,0.97,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.15.1 +220628,NC_045512.2,25584,C,T,PASS,85,0,85,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.15.1 +220628,NC_045512.2,25708,C,T,PASS,106,0,106,1.0,ORF3a,missense_variant,c.316C>T,p.Leu106Phe,p.L106F,ivar,BA.1.15.1 +220628,NC_045512.2,26270,C,T,PASS,620,0,620,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.15.1 +220628,NC_045512.2,26530,A,G,PASS,288,0,288,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.15.1 +220628,NC_045512.2,26577,C,G,PASS,372,0,372,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.15.1 +220628,NC_045512.2,26709,G,A,PASS,304,2,302,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.15.1 +220628,NC_045512.2,27259,A,C,PASS,303,0,303,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.15.1 +220628,NC_045512.2,27667,G,T,PASS,76,56,20,0.26,ORF7a,stop_gained,c.274G>T,p.Glu92*,p.E92*,ivar,BA.1.15.1 +220628,NC_045512.2,27807,C,T,PASS,93,0,93,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.15.1 +220628,NC_045512.2,28271,A,T,PASS,1719,2,1712,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.15.1 +220628,NC_045512.2,28311,C,T,PASS,1718,5,1712,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.15.1 +220628,NC_045512.2,2832,A,G,PASS,220,0,220,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.15.1 +220628,NC_045512.2,28361,GGAGAACGCA,G,PASS,1218,1209,726,0.6,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.15.1 +220628,NC_045512.2,28363,A,T,PASS,16,12,4,0.25,N,synonymous_variant,c.90A>T,p.Gly30Gly,p.G30G,ivar,BA.1.15.1 +220628,NC_045512.2,28881,GG,AA,PASS,2323,23,2299,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.15.1 +220628,NC_045512.2,28883,G,C,PASS,2314,1,2311,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.15.1 +220628,NC_045512.2,29301,A,G,PASS,3873,8,3862,1.0,N,missense_variant,c.1028A>G,p.Asp343Gly,p.D343G,ivar,BA.1.15.1 +220628,NC_045512.2,3037,C,T,PASS,99,0,99,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.15.1 +220628,NC_045512.2,5386,T,G,PASS,34,0,34,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.15.1 +220628,NC_045512.2,694,T,A,PASS,6454,3969,2458,0.38,orf1ab,missense_variant,c.429T>A,p.Phe143Leu,p.F143L,ivar,BA.1.15.1 +220628,NC_045512.2,728,G,A,PASS,296,218,78,0.26,orf1ab,missense_variant,c.463G>A,p.Glu155Lys,p.E155K,ivar,BA.1.15.1 +220628,NC_045512.2,7488,C,T,PASS,1405,1041,364,0.26,orf1ab,missense_variant,c.7223C>T,p.Thr2408Ile,p.T2408I,ivar,BA.1.15.1 +220628,NC_045512.2,8393,G,A,PASS,3090,10,3074,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.15.1 +220629,NC_045512.2,11282,AGTTTGTCTG,A,PASS,38,38,34,0.89,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220629,NC_045512.2,11537,A,G,PASS,34,0,34,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220629,NC_045512.2,13195,T,C,PASS,1615,3,1612,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220629,NC_045512.2,14408,C,T,PASS,28,0,28,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220629,NC_045512.2,14646,T,A,PASS,36,22,14,0.39,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +220629,NC_045512.2,15738,C,T,PASS,1840,8,1832,1.0,orf1ab,missense_variant,c.15473C>T,p.Ser5158Leu,p.S5158L,ivar,Unassigned +220629,NC_045512.2,21762,C,T,PASS,26,0,26,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220629,NC_045512.2,21764,ATACATG,A,PASS,29,26,26,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220629,NC_045512.2,21846,C,T,PASS,43,0,43,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220629,NC_045512.2,23403,A,G,PASS,916,0,916,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220629,NC_045512.2,23525,C,T,PASS,950,0,949,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220629,NC_045512.2,23599,T,G,PASS,431,0,431,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220629,NC_045512.2,23604,C,A,PASS,415,0,413,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220629,NC_045512.2,23811,G,A,ft,11,6,5,0.45,S,missense_variant,c.2249G>A,p.Ser750Asn,p.S750N,ivar,Unassigned +220629,NC_045512.2,23853,AC,A,ft,13,13,4,0.31,S,frameshift_variant,c.2293delC,p.Arg765fs,p.R765fs,ivar,Unassigned +220629,NC_045512.2,23948,G,T,PASS,45,0,45,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220629,NC_045512.2,24130,C,A,PASS,85,0,85,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220629,NC_045512.2,24424,A,T,PASS,44,0,44,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220629,NC_045512.2,24469,T,A,PASS,346,0,346,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220629,NC_045512.2,24503,C,T,PASS,398,4,394,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220629,NC_045512.2,25584,C,T,PASS,23,0,23,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220629,NC_045512.2,25893,T,C,ft,16,12,4,0.25,ORF3a,synonymous_variant,c.501T>C,p.Ile167Ile,p.I167I,ivar,Unassigned +220629,NC_045512.2,26270,C,T,PASS,711,0,711,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220629,NC_045512.2,26530,A,G,PASS,243,0,243,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220629,NC_045512.2,26577,C,G,PASS,236,0,236,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220629,NC_045512.2,26709,G,A,PASS,290,2,288,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220629,NC_045512.2,27259,A,C,PASS,76,0,76,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220629,NC_045512.2,27670,G,T,PASS,18,0,18,1.0,ORF7a,missense_variant,c.277G>T,p.Val93Phe,p.V93F,ivar,Unassigned +220629,NC_045512.2,27807,C,T,PASS,22,0,22,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220629,NC_045512.2,28271,A,T,PASS,1156,0,1152,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220629,NC_045512.2,28311,C,T,PASS,1197,0,1193,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220629,NC_045512.2,2832,A,G,PASS,10,0,10,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220629,NC_045512.2,28361,GGAGAACGCA,G,PASS,975,969,682,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220629,NC_045512.2,28881,GG,AA,PASS,17113,95,17009,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220629,NC_045512.2,28883,G,C,PASS,17089,4,17063,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220629,NC_045512.2,5672,C,T,PASS,18,0,18,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220629,NC_045512.2,5924,G,A,PASS,11,0,11,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220629,NC_045512.2,685,AAAGTCATTT,A,PASS,11104,11008,5619,0.51,orf1ab,conservative_inframe_deletion,c.421_429delAAGTCATTT,p.Lys141_Phe143del,p.K141_F143del,ivar,Unassigned +220629,NC_045512.2,694,T,A,PASS,13,9,4,0.31,orf1ab,missense_variant,c.429T>A,p.Phe143Leu,p.F143L,ivar,Unassigned +220629,NC_045512.2,7690,A,G,PASS,29,21,8,0.28,orf1ab,synonymous_variant,c.7425A>G,p.Leu2475Leu,p.L2475L,ivar,Unassigned +220629,NC_045512.2,76,T,A,PASS,18,6,12,0.67,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220629,NC_045512.2,8393,G,A,PASS,974,1,973,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220630,NC_045512.2,10449,C,A,PASS,35,0,35,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220630,NC_045512.2,11282,AGTTTGTCTG,A,PASS,64,64,58,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220630,NC_045512.2,11537,A,G,PASS,134,0,134,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220630,NC_045512.2,13195,T,C,PASS,2893,5,2888,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220630,NC_045512.2,13544,T,C,ft,13,9,4,0.31,orf1ab,synonymous_variant,c.13279T>C,p.Leu4427Leu,p.L4427L,ivar,Unassigned +220630,NC_045512.2,14408,C,T,PASS,69,1,68,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220630,NC_045512.2,14646,T,A,PASS,123,91,32,0.26,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +220630,NC_045512.2,21762,C,T,PASS,121,0,121,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220630,NC_045512.2,21764,ATACATG,A,PASS,122,121,111,0.91,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220630,NC_045512.2,21846,C,T,PASS,117,0,117,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220630,NC_045512.2,23403,A,G,PASS,2458,2,2456,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220630,NC_045512.2,23525,C,T,PASS,2286,6,2280,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220630,NC_045512.2,23599,T,G,PASS,851,0,847,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220630,NC_045512.2,23604,C,A,PASS,823,0,815,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220630,NC_045512.2,23948,G,T,PASS,69,0,69,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220630,NC_045512.2,24130,C,A,PASS,115,0,115,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220630,NC_045512.2,24424,A,T,PASS,20,0,20,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220630,NC_045512.2,24469,T,A,PASS,294,0,291,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220630,NC_045512.2,24503,C,T,PASS,365,7,356,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220630,NC_045512.2,25000,C,T,PASS,28,0,28,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220630,NC_045512.2,25584,C,T,PASS,14,0,14,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220630,NC_045512.2,26270,C,T,PASS,131,2,129,0.98,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220630,NC_045512.2,26530,A,G,PASS,89,0,89,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220630,NC_045512.2,26577,C,G,PASS,75,0,75,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220630,NC_045512.2,26709,G,A,PASS,82,2,80,0.98,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220630,NC_045512.2,27259,A,C,PASS,79,0,79,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220630,NC_045512.2,27670,G,T,PASS,70,0,70,1.0,ORF7a,missense_variant,c.277G>T,p.Val93Phe,p.V93F,ivar,Unassigned +220630,NC_045512.2,27807,C,T,PASS,105,1,104,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220630,NC_045512.2,28271,A,T,PASS,798,0,796,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220630,NC_045512.2,28311,C,T,PASS,837,3,833,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220630,NC_045512.2,2832,A,G,PASS,16,0,16,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220630,NC_045512.2,28361,GGAGAACGCA,G,PASS,631,629,451,0.71,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220630,NC_045512.2,28512,C,T,PASS,400,289,104,0.26,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,Unassigned +220630,NC_045512.2,28881,GG,AA,PASS,2288,23,2261,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220630,NC_045512.2,28883,G,C,PASS,2275,0,2270,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220630,NC_045512.2,3037,C,T,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220630,NC_045512.2,728,G,A,ft,19,14,5,0.26,orf1ab,missense_variant,c.463G>A,p.Glu155Lys,p.E155K,ivar,Unassigned +220630,NC_045512.2,8393,G,A,PASS,2760,9,2748,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220631,NC_045512.2,10029,C,T,PASS,12,0,12,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,AY.127 +220631,NC_045512.2,11201,A,G,PASS,84,1,83,0.99,orf1ab,missense_variant,c.10936A>G,p.Thr3646Ala,p.T3646A,ivar,AY.127 +220631,NC_045512.2,11332,A,G,PASS,241,0,241,1.0,orf1ab,synonymous_variant,c.11067A>G,p.Val3689Val,p.V3689V,ivar,AY.127 +220631,NC_045512.2,1420,C,T,PASS,228,2,226,0.99,orf1ab,synonymous_variant,c.1155C>T,p.Ala385Ala,p.A385A,ivar,AY.127 +220631,NC_045512.2,14408,C,T,PASS,45,0,45,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,AY.127 +220631,NC_045512.2,14511,G,C,PASS,84,0,84,1.0,orf1ab,missense_variant,c.14246G>C,p.Arg4749Thr,p.R4749T,ivar,AY.127 +220631,NC_045512.2,15535,G,A,ft,16,12,4,0.25,orf1ab,synonymous_variant,c.15270G>A,p.Lys5090Lys,p.K5090K,ivar,AY.127 +220631,NC_045512.2,16466,C,T,PASS,221,0,221,1.0,orf1ab,missense_variant,c.16201C>T,p.His5401Tyr,p.H5401Y,ivar,AY.127 +220631,NC_045512.2,18788,C,T,PASS,1387,2,1385,1.0,orf1ab,stop_gained,c.18523C>T,p.Gln6175*,p.Q6175*,ivar,AY.127 +220631,NC_045512.2,19220,C,T,PASS,387,0,387,1.0,orf1ab,synonymous_variant,c.18955C>T,p.Leu6319Leu,p.L6319L,ivar,AY.127 +220631,NC_045512.2,21057,C,T,PASS,31,1,30,0.97,orf1ab,missense_variant,c.20792C>T,p.Thr6931Ile,p.T6931I,ivar,AY.127 +220631,NC_045512.2,210,G,T,PASS,39,0,39,1.0,orf1ab,upstream_gene_variant,c.-56G>T,.,.,ivar,AY.127 +220631,NC_045512.2,21618,C,G,PASS,390,0,390,1.0,S,missense_variant,c.56C>G,p.Thr19Arg,p.T19R,ivar,AY.127 +220631,NC_045512.2,21846,C,T,PASS,138,0,138,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,AY.127 +220631,NC_045512.2,22917,T,G,PASS,10,0,10,1.0,S,missense_variant,c.1355T>G,p.Leu452Arg,p.L452R,ivar,AY.127 +220631,NC_045512.2,23403,A,G,PASS,2800,7,2793,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,AY.127 +220631,NC_045512.2,23604,C,G,PASS,796,0,794,1.0,S,missense_variant,c.2042C>G,p.Pro681Arg,p.P681R,ivar,AY.127 +220631,NC_045512.2,24410,G,A,PASS,75,0,75,1.0,S,missense_variant,c.2848G>A,p.Asp950Asn,p.D950N,ivar,AY.127 +220631,NC_045512.2,25469,C,T,PASS,43,0,43,1.0,ORF3a,missense_variant,c.77C>T,p.Ser26Leu,p.S26L,ivar,AY.127 +220631,NC_045512.2,25518,T,C,PASS,43,0,43,1.0,ORF3a,synonymous_variant,c.126T>C,p.Pro42Pro,p.P42P,ivar,AY.127 +220631,NC_045512.2,25538,G,T,PASS,45,0,45,1.0,ORF3a,missense_variant,c.146G>T,p.Gly49Val,p.G49V,ivar,AY.127 +220631,NC_045512.2,26054,C,A,PASS,83,0,83,1.0,ORF3a,missense_variant,c.662C>A,p.Thr221Lys,p.T221K,ivar,AY.127 +220631,NC_045512.2,26172,G,T,PASS,94,0,92,0.98,ORF3a,missense_variant,c.780G>T,p.Met260Ile,p.M260I,ivar,AY.127 +220631,NC_045512.2,26767,T,C,PASS,376,0,376,1.0,M,missense_variant,c.245T>C,p.Ile82Thr,p.I82T,ivar,AY.127 +220631,NC_045512.2,27590,C,T,PASS,115,1,114,0.99,ORF7a,missense_variant,c.197C>T,p.Ala66Val,p.A66V,ivar,AY.127 +220631,NC_045512.2,27638,T,C,PASS,107,0,107,1.0,ORF7a,missense_variant,c.245T>C,p.Val82Ala,p.V82A,ivar,AY.127 +220631,NC_045512.2,27722,T,C,PASS,91,0,91,1.0,ORF7a,missense_variant,c.329T>C,p.Ile110Thr,p.I110T,ivar,AY.127 +220631,NC_045512.2,27752,C,T,PASS,109,0,109,1.0,ORF7a,missense_variant,c.359C>T,p.Thr120Ile,p.T120I,ivar,AY.127 +220631,NC_045512.2,27874,C,T,PASS,146,0,146,1.0,ORF7b,missense_variant,c.119C>T,p.Thr40Ile,p.T40I,ivar,AY.127 +220631,NC_045512.2,28247,AGATTTC,A,PASS,974,967,888,0.91,ORF8,conservative_inframe_deletion,c.355_360delGATTTC,p.Asp119_Phe120del,p.D119_F120del,ivar,AY.127 +220631,NC_045512.2,28253,C,A,PASS,105,1,102,0.97,ORF8,missense_variant,c.360C>A,p.Phe120Leu,p.F120L,ivar,AY.127 +220631,NC_045512.2,28270,TA,T,PASS,1218,1212,1147,0.94,N,upstream_gene_variant,c.-3delA,.,.,ivar,AY.127 +220631,NC_045512.2,28378,G,T,PASS,764,0,753,0.99,N,synonymous_variant,c.105G>T,p.Ala35Ala,p.A35A,ivar,AY.127 +220631,NC_045512.2,28461,A,G,PASS,480,0,480,1.0,N,missense_variant,c.188A>G,p.Asp63Gly,p.D63G,ivar,AY.127 +220631,NC_045512.2,28881,G,T,PASS,4129,17,4080,0.99,N,missense_variant,c.608G>T,p.Arg203Met,p.R203M,ivar,AY.127 +220631,NC_045512.2,28916,G,T,PASS,1270,0,1258,0.99,N,missense_variant,c.643G>T,p.Gly215Cys,p.G215C,ivar,AY.127 +220631,NC_045512.2,29402,G,T,PASS,6716,4,6694,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,AY.127 +220631,NC_045512.2,29734,G,A,PASS,113,0,113,1.0,S,downstream_gene_variant,c.*4350G>A,.,.,ivar,AY.127 +220631,NC_045512.2,29742,G,T,PASS,109,0,109,1.0,S,downstream_gene_variant,c.*4358G>T,.,.,ivar,AY.127 +220631,NC_045512.2,29751,G,T,PASS,113,0,111,0.98,S,downstream_gene_variant,c.*4367G>T,.,.,ivar,AY.127 +220631,NC_045512.2,3037,C,T,PASS,420,0,420,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,AY.127 +220631,NC_045512.2,3411,C,T,PASS,995,12,979,0.98,orf1ab,missense_variant,c.3146C>T,p.Ala1049Val,p.A1049V,ivar,AY.127 +220631,NC_045512.2,4181,G,T,PASS,27,0,27,1.0,orf1ab,missense_variant,c.3916G>T,p.Ala1306Ser,p.A1306S,ivar,AY.127 +220631,NC_045512.2,7526,G,A,PASS,1855,12,1843,0.99,orf1ab,missense_variant,c.7261G>A,p.Val2421Ile,p.V2421I,ivar,AY.127 +220631,NC_045512.2,8010,A,G,PASS,1725,54,1670,0.97,orf1ab,missense_variant,c.7745A>G,p.Asp2582Gly,p.D2582G,ivar,AY.127 +220631,NC_045512.2,8986,C,T,PASS,22,0,22,1.0,orf1ab,synonymous_variant,c.8721C>T,p.Asp2907Asp,p.D2907D,ivar,AY.127 +220631,NC_045512.2,9053,G,T,PASS,28,0,28,1.0,orf1ab,missense_variant,c.8788G>T,p.Val2930Leu,p.V2930L,ivar,AY.127 +220633,NC_045512.2,10029,C,T,PASS,30,0,30,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220633,NC_045512.2,10449,C,A,PASS,122,0,122,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220633,NC_045512.2,11282,AGTTTGTCTG,A,PASS,246,246,223,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220633,NC_045512.2,11537,A,G,PASS,139,0,139,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220633,NC_045512.2,13195,T,C,PASS,7290,16,7274,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220633,NC_045512.2,14408,C,T,PASS,146,0,146,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220633,NC_045512.2,18163,A,G,PASS,30,0,30,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220633,NC_045512.2,21762,C,T,PASS,154,0,154,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220633,NC_045512.2,21764,ATACATG,A,PASS,156,154,140,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220633,NC_045512.2,21846,C,T,PASS,158,0,158,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220633,NC_045512.2,22578,G,A,PASS,20,0,20,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220633,NC_045512.2,22673,TC,CT,PASS,11,0,11,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220633,NC_045512.2,22679,T,C,PASS,13,0,13,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220633,NC_045512.2,22686,C,T,PASS,13,0,13,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220633,NC_045512.2,23403,A,G,PASS,3760,2,3757,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220633,NC_045512.2,23525,C,T,PASS,3604,13,3581,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220633,NC_045512.2,23599,T,G,PASS,1393,0,1391,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220633,NC_045512.2,23604,C,A,PASS,1340,0,1337,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220633,NC_045512.2,23854,C,A,PASS,60,0,60,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220633,NC_045512.2,23948,G,T,PASS,659,0,658,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220633,NC_045512.2,24130,C,A,PASS,1017,1,1009,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220633,NC_045512.2,24424,A,T,PASS,219,1,216,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220633,NC_045512.2,24469,T,A,PASS,888,0,885,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220633,NC_045512.2,24503,C,T,PASS,1069,13,1054,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220633,NC_045512.2,25000,C,T,PASS,103,2,101,0.98,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220633,NC_045512.2,25584,C,T,PASS,173,0,173,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220633,NC_045512.2,26270,C,T,PASS,598,0,597,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220633,NC_045512.2,26530,A,G,PASS,161,0,161,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220633,NC_045512.2,26577,C,G,PASS,181,0,181,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220633,NC_045512.2,26709,G,A,PASS,150,0,150,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220633,NC_045512.2,27259,A,C,PASS,464,0,464,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220633,NC_045512.2,27670,G,T,PASS,112,0,111,0.99,ORF7a,missense_variant,c.277G>T,p.Val93Phe,p.V93F,ivar,BA.1.17 +220633,NC_045512.2,27807,C,T,PASS,123,1,122,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220633,NC_045512.2,28271,A,T,PASS,1502,5,1488,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220633,NC_045512.2,28311,C,T,PASS,1544,2,1540,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220633,NC_045512.2,2832,A,G,PASS,135,0,135,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220633,NC_045512.2,28361,GGAGAACGCA,G,PASS,1103,1083,619,0.56,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220633,NC_045512.2,28881,GG,AA,PASS,5114,72,5038,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220633,NC_045512.2,28883,G,C,PASS,5062,6,5048,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220633,NC_045512.2,3037,C,T,PASS,35,0,35,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220633,NC_045512.2,5672,C,T,PASS,153,0,153,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220633,NC_045512.2,5924,G,A,PASS,66,0,66,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220633,NC_045512.2,8393,G,A,PASS,5806,16,5787,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220634,NC_045512.2,11282,AGTTTGTCTG,A,PASS,61,61,50,0.82,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220634,NC_045512.2,11537,A,G,PASS,27,0,27,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220634,NC_045512.2,13195,T,C,PASS,1103,6,1097,0.99,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220634,NC_045512.2,21762,C,T,PASS,24,0,24,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220634,NC_045512.2,21764,ATACATG,A,PASS,24,24,19,0.79,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220634,NC_045512.2,21846,C,T,PASS,25,0,25,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220634,NC_045512.2,23403,A,G,PASS,955,2,953,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220634,NC_045512.2,23525,C,T,PASS,930,0,928,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220634,NC_045512.2,23599,T,G,PASS,394,0,394,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220634,NC_045512.2,23604,C,A,PASS,379,0,374,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220634,NC_045512.2,23948,G,T,PASS,35,0,35,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220634,NC_045512.2,24130,C,A,PASS,54,0,54,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220634,NC_045512.2,24424,A,T,PASS,41,0,41,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220634,NC_045512.2,24469,T,A,PASS,182,0,182,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220634,NC_045512.2,24503,C,T,PASS,193,4,189,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220634,NC_045512.2,25584,C,T,PASS,20,0,20,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220634,NC_045512.2,26270,C,T,PASS,186,2,184,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220634,NC_045512.2,26465,T,C,ft,11,8,3,0.27,E,missense_variant,c.221T>C,p.Leu74Pro,p.L74P,ivar,Unassigned +220634,NC_045512.2,26530,A,G,PASS,202,0,202,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220634,NC_045512.2,26577,C,G,PASS,209,0,209,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220634,NC_045512.2,26709,G,A,PASS,216,0,216,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220634,NC_045512.2,27259,A,C,PASS,90,0,90,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220634,NC_045512.2,27384,T,C,PASS,72,0,72,1.0,ORF6,synonymous_variant,c.183T>C,p.Asp61Asp,p.D61D,ivar,Unassigned +220634,NC_045512.2,27807,C,T,PASS,34,0,34,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220634,NC_045512.2,28271,A,T,PASS,256,0,254,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220634,NC_045512.2,28311,C,T,PASS,270,0,270,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220634,NC_045512.2,2832,A,G,PASS,23,0,23,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220634,NC_045512.2,28361,GGAGAACGCA,G,PASS,202,201,147,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220634,NC_045512.2,28881,GG,AA,PASS,7905,96,7806,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220634,NC_045512.2,28883,G,C,PASS,7846,1,7833,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220634,NC_045512.2,5672,C,T,PASS,24,0,24,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220634,NC_045512.2,5924,G,A,PASS,12,0,12,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220634,NC_045512.2,7984,T,C,PASS,214,0,214,1.0,orf1ab,synonymous_variant,c.7719T>C,p.Asp2573Asp,p.D2573D,ivar,Unassigned +220634,NC_045512.2,8393,G,A,PASS,2278,6,2271,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220635,NC_045512.2,10029,C,T,PASS,10,0,10,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220635,NC_045512.2,10135,T,C,PASS,12,0,12,1.0,orf1ab,synonymous_variant,c.9870T>C,p.Leu3290Leu,p.L3290L,ivar,Unassigned +220635,NC_045512.2,10449,C,A,PASS,28,0,28,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220635,NC_045512.2,11074,CT,C,PASS,32,30,9,0.28,orf1ab,frameshift_variant,c.10817delT,p.Leu3606fs,p.L3606fs,ivar,Unassigned +220635,NC_045512.2,11282,AGTTTGTCTG,A,PASS,114,114,105,0.92,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220635,NC_045512.2,11537,A,G,PASS,25,0,25,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220635,NC_045512.2,12864,A,C,PASS,80,0,78,0.98,orf1ab,missense_variant,c.12599A>C,p.Asp4200Ala,p.D4200A,ivar,Unassigned +220635,NC_045512.2,13195,T,C,PASS,1827,0,1827,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220635,NC_045512.2,14408,C,T,PASS,15,0,15,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220635,NC_045512.2,21762,C,T,PASS,33,0,33,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220635,NC_045512.2,21764,ATACATG,A,PASS,35,35,33,0.94,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220635,NC_045512.2,21846,C,T,PASS,33,0,33,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220635,NC_045512.2,21856,T,C,PASS,30,0,30,1.0,S,synonymous_variant,c.294T>C,p.Ser98Ser,p.S98S,ivar,Unassigned +220635,NC_045512.2,22578,G,A,PASS,15,0,15,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,Unassigned +220635,NC_045512.2,23403,A,G,PASS,713,2,711,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220635,NC_045512.2,23525,C,T,PASS,852,0,848,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220635,NC_045512.2,23599,T,G,PASS,337,0,335,0.99,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220635,NC_045512.2,23604,C,A,PASS,327,0,325,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220635,NC_045512.2,23854,C,A,PASS,10,0,10,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220635,NC_045512.2,23948,G,T,PASS,68,0,65,0.96,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220635,NC_045512.2,24130,C,A,PASS,110,0,110,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220635,NC_045512.2,24424,A,T,PASS,52,0,52,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220635,NC_045512.2,24469,T,A,PASS,197,0,197,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220635,NC_045512.2,24503,C,T,PASS,238,6,232,0.97,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220635,NC_045512.2,25584,C,T,PASS,32,0,32,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220635,NC_045512.2,25708,C,T,PASS,39,0,39,1.0,ORF3a,missense_variant,c.316C>T,p.Leu106Phe,p.L106F,ivar,Unassigned +220635,NC_045512.2,26270,C,T,PASS,278,0,276,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220635,NC_045512.2,26530,A,G,PASS,71,1,70,0.99,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220635,NC_045512.2,26577,C,G,PASS,96,0,96,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220635,NC_045512.2,26709,G,A,PASS,76,0,76,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220635,NC_045512.2,27259,A,C,PASS,122,0,122,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220635,NC_045512.2,27807,C,T,PASS,19,0,19,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220635,NC_045512.2,28271,A,T,PASS,593,0,592,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220635,NC_045512.2,28311,C,T,PASS,617,0,615,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220635,NC_045512.2,2832,A,G,PASS,17,0,17,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220635,NC_045512.2,28361,GGAGAACGCA,G,PASS,483,482,327,0.68,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220635,NC_045512.2,28881,GG,AA,PASS,6445,87,6352,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220635,NC_045512.2,28883,G,C,PASS,6389,4,6374,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220635,NC_045512.2,29301,A,G,PASS,2221,1,2219,1.0,N,missense_variant,c.1028A>G,p.Asp343Gly,p.D343G,ivar,Unassigned +220635,NC_045512.2,3105,A,G,PASS,110,2,108,0.98,orf1ab,missense_variant,c.2840A>G,p.Tyr947Cys,p.Y947C,ivar,Unassigned +220635,NC_045512.2,76,T,A,ft,19,13,6,0.32,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220635,NC_045512.2,78,T,G,ft,19,13,6,0.32,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220635,NC_045512.2,8393,G,A,PASS,2010,9,2001,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220636,NC_045512.2,10449,C,A,PASS,12,0,12,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220636,NC_045512.2,11282,AGTTTGTCTG,A,PASS,40,40,39,0.98,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220636,NC_045512.2,11537,A,G,PASS,38,1,37,0.97,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220636,NC_045512.2,12864,A,C,PASS,59,0,59,1.0,orf1ab,missense_variant,c.12599A>C,p.Asp4200Ala,p.D4200A,ivar,Unassigned +220636,NC_045512.2,13195,T,C,PASS,2304,3,2301,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220636,NC_045512.2,14408,C,T,PASS,18,0,18,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220636,NC_045512.2,18047,A,T,PASS,12,4,8,0.67,orf1ab,missense_variant,c.17782A>T,p.Met5928Leu,p.M5928L,ivar,Unassigned +220636,NC_045512.2,21762,C,T,PASS,21,0,21,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220636,NC_045512.2,21764,ATACATG,A,PASS,24,21,21,0.88,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220636,NC_045512.2,21846,C,T,PASS,36,0,36,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220636,NC_045512.2,21856,T,C,PASS,38,0,38,1.0,S,synonymous_variant,c.294T>C,p.Ser98Ser,p.S98S,ivar,Unassigned +220636,NC_045512.2,23403,A,G,PASS,706,2,704,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220636,NC_045512.2,23525,C,T,PASS,762,0,762,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220636,NC_045512.2,23599,T,G,PASS,299,0,299,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220636,NC_045512.2,23604,C,A,PASS,295,0,295,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220636,NC_045512.2,23948,G,T,PASS,33,0,33,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220636,NC_045512.2,24130,C,A,PASS,39,0,38,0.97,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220636,NC_045512.2,24424,A,T,PASS,21,0,21,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220636,NC_045512.2,24469,T,A,PASS,277,0,277,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220636,NC_045512.2,24503,C,T,PASS,304,1,303,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220636,NC_045512.2,25584,C,T,PASS,23,0,23,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220636,NC_045512.2,25708,C,T,PASS,18,0,18,1.0,ORF3a,missense_variant,c.316C>T,p.Leu106Phe,p.L106F,ivar,Unassigned +220636,NC_045512.2,26270,C,T,PASS,340,2,338,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220636,NC_045512.2,26530,A,G,PASS,121,0,121,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220636,NC_045512.2,26577,C,G,PASS,140,0,140,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220636,NC_045512.2,26709,G,A,PASS,138,0,138,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220636,NC_045512.2,27259,A,C,PASS,28,0,28,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220636,NC_045512.2,2754,T,C,PASS,102,1,101,0.99,orf1ab,missense_variant,c.2489T>C,p.Val830Ala,p.V830A,ivar,Unassigned +220636,NC_045512.2,27711,A,G,ft,13,9,4,0.31,ORF7a,synonymous_variant,c.318A>G,p.Ala106Ala,p.A106A,ivar,Unassigned +220636,NC_045512.2,27807,C,T,PASS,15,1,14,0.93,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220636,NC_045512.2,28271,A,T,PASS,490,1,489,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220636,NC_045512.2,28311,C,T,PASS,483,1,480,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220636,NC_045512.2,2832,A,G,PASS,132,0,132,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220636,NC_045512.2,28361,GGAGAACGCA,G,PASS,377,373,254,0.67,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220636,NC_045512.2,28881,GG,AA,PASS,8195,126,8059,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220636,NC_045512.2,28883,G,C,PASS,8109,0,8105,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220636,NC_045512.2,29301,A,G,PASS,3626,4,3622,1.0,N,missense_variant,c.1028A>G,p.Asp343Gly,p.D343G,ivar,Unassigned +220636,NC_045512.2,3105,A,G,PASS,56,0,56,1.0,orf1ab,missense_variant,c.2840A>G,p.Tyr947Cys,p.Y947C,ivar,Unassigned +220636,NC_045512.2,4097,T,TC,ft,17,17,5,0.29,orf1ab,frameshift_variant,c.3832_3833insC,p.Phe1278fs,p.F1278fs,ivar,Unassigned +220636,NC_045512.2,4101,T,C,ft,18,12,6,0.33,orf1ab,missense_variant,c.3836T>C,p.Leu1279Ser,p.L1279S,ivar,Unassigned +220636,NC_045512.2,4166,A,T,PASS,52,26,26,0.5,orf1ab,missense_variant,c.3901A>T,p.Ile1301Leu,p.I1301L,ivar,Unassigned +220636,NC_045512.2,4171,T,C,PASS,51,25,26,0.51,orf1ab,synonymous_variant,c.3906T>C,p.Pro1302Pro,p.P1302P,ivar,Unassigned +220636,NC_045512.2,728,G,A,ft,11,8,3,0.27,orf1ab,missense_variant,c.463G>A,p.Glu155Lys,p.E155K,ivar,Unassigned +220636,NC_045512.2,76,T,A,PASS,10,3,7,0.7,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220636,NC_045512.2,78,T,G,PASS,10,3,7,0.7,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220636,NC_045512.2,8393,G,A,PASS,1312,23,1286,0.98,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220637,NC_045512.2,10029,C,T,PASS,34,0,34,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220637,NC_045512.2,10449,C,A,PASS,121,0,121,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220637,NC_045512.2,11282,AGTTTGTCTG,A,PASS,312,310,269,0.86,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220637,NC_045512.2,11537,A,G,PASS,289,0,289,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220637,NC_045512.2,13195,T,C,PASS,7412,12,7399,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220637,NC_045512.2,14408,C,T,PASS,177,2,175,0.99,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220637,NC_045512.2,18163,A,G,PASS,30,0,30,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220637,NC_045512.2,21762,C,T,PASS,275,0,275,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220637,NC_045512.2,21764,ATACATG,A,PASS,280,276,255,0.91,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220637,NC_045512.2,21846,C,T,PASS,292,0,292,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220637,NC_045512.2,22578,G,A,PASS,25,0,25,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220637,NC_045512.2,22673,TC,CT,PASS,23,0,23,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220637,NC_045512.2,22679,T,C,PASS,29,0,29,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220637,NC_045512.2,22686,C,T,PASS,28,0,28,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220637,NC_045512.2,23202,C,A,PASS,20,0,20,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220637,NC_045512.2,23403,A,G,PASS,3875,6,3869,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220637,NC_045512.2,23525,C,T,PASS,3568,12,3551,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220637,NC_045512.2,23599,T,G,PASS,1289,0,1286,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220637,NC_045512.2,23604,C,A,PASS,1255,0,1246,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220637,NC_045512.2,23854,C,A,PASS,90,0,90,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220637,NC_045512.2,23948,G,T,PASS,810,0,810,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220637,NC_045512.2,24130,C,A,PASS,1321,0,1311,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220637,NC_045512.2,24424,A,T,PASS,329,2,323,0.98,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220637,NC_045512.2,24469,T,A,PASS,1351,1,1347,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220637,NC_045512.2,24503,C,T,PASS,1588,29,1552,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220637,NC_045512.2,25000,C,T,PASS,121,0,121,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220637,NC_045512.2,25584,C,T,PASS,173,0,173,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220637,NC_045512.2,26270,C,T,PASS,1076,2,1070,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220637,NC_045512.2,26530,A,G,PASS,346,2,344,0.99,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220637,NC_045512.2,26577,C,G,PASS,342,0,342,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220637,NC_045512.2,26709,G,A,PASS,373,1,371,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220637,NC_045512.2,27259,A,C,PASS,412,0,412,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220637,NC_045512.2,27807,C,T,PASS,128,0,127,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220637,NC_045512.2,28271,A,T,PASS,1853,2,1851,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220637,NC_045512.2,28311,C,T,PASS,1865,8,1857,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220637,NC_045512.2,2832,A,G,PASS,140,0,140,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220637,NC_045512.2,28361,GGAGAACGCA,G,PASS,1461,1454,930,0.64,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220637,NC_045512.2,28363,A,T,PASS,14,10,4,0.29,N,synonymous_variant,c.90A>T,p.Gly30Gly,p.G30G,ivar,BA.1.17 +220637,NC_045512.2,28881,GG,AA,PASS,3791,57,3732,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220637,NC_045512.2,28883,G,C,PASS,3743,3,3735,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220637,NC_045512.2,29772,T,C,PASS,48,1,47,0.98,S,downstream_gene_variant,c.*4388T>C,.,.,ivar,BA.1.17 +220637,NC_045512.2,3037,C,T,PASS,52,3,49,0.94,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220637,NC_045512.2,5386,T,G,PASS,22,0,22,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.17 +220637,NC_045512.2,5672,C,T,PASS,278,0,278,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220637,NC_045512.2,5924,G,A,PASS,105,0,105,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220637,NC_045512.2,8393,G,A,PASS,3953,20,3931,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220637,NC_045512.2,8652,T,C,PASS,3517,5,3510,1.0,orf1ab,missense_variant,c.8387T>C,p.Met2796Thr,p.M2796T,ivar,BA.1.17 +220638,NC_045512.2,10029,C,T,PASS,19,0,19,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220638,NC_045512.2,10449,C,A,PASS,95,0,95,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220638,NC_045512.2,1051,A,G,PASS,3027,5,3022,1.0,orf1ab,synonymous_variant,c.786A>G,p.Lys262Lys,p.K262K,ivar,BA.1.17 +220638,NC_045512.2,11282,AGTTTGTCTG,A,PASS,254,252,219,0.86,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220638,NC_045512.2,11537,A,G,PASS,215,0,215,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220638,NC_045512.2,13195,T,C,PASS,4423,4,4419,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220638,NC_045512.2,14408,C,T,PASS,120,3,117,0.98,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220638,NC_045512.2,15359,G,A,ft,11,5,5,0.45,orf1ab,missense_variant,c.15094G>A,p.Ala5032Thr,p.A5032T,ivar,BA.1.17 +220638,NC_045512.2,16887,C,T,PASS,704,4,700,0.99,orf1ab,missense_variant,c.16622C>T,p.Thr5541Ile,p.T5541I,ivar,BA.1.17 +220638,NC_045512.2,18163,A,G,PASS,21,0,21,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220638,NC_045512.2,18326,C,T,PASS,7551,39,7508,0.99,orf1ab,synonymous_variant,c.18061C>T,p.Leu6021Leu,p.L6021L,ivar,BA.1.17 +220638,NC_045512.2,21762,C,T,PASS,218,0,218,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220638,NC_045512.2,21764,ATACATG,A,PASS,227,218,201,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220638,NC_045512.2,21846,C,T,PASS,221,0,219,0.99,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220638,NC_045512.2,22193,AATT,A,PASS,11,11,11,1.0,S,disruptive_inframe_deletion,c.632_634delATT,p.Asn211_Leu212delinsIle,p.N211_L212delinsI,ivar,BA.1.17 +220638,NC_045512.2,22204,T,TGAGCCAGAA,ft,11,11,9,0.82,S,disruptive_inframe_insertion,c.644_645insGCCAGAAGA,p.Arg214_Asp215insGluProGlu,p.R214_D215insEPE,ivar,BA.1.17 +220638,NC_045512.2,22578,G,A,PASS,17,0,17,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220638,NC_045512.2,22673,T,C,PASS,10,0,10,1.0,S,missense_variant,c.1111T>C,p.Ser371Pro,p.S371P,ivar,BA.1.17 +220638,NC_045512.2,22679,T,C,PASS,18,0,18,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220638,NC_045512.2,22686,C,T,PASS,18,0,18,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220638,NC_045512.2,23013,A,C,PASS,10,0,10,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.17 +220638,NC_045512.2,23040,A,G,PASS,15,0,15,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.17 +220638,NC_045512.2,23048,G,A,PASS,15,0,15,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.17 +220638,NC_045512.2,23055,A,G,PASS,15,0,15,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.17 +220638,NC_045512.2,23063,A,T,PASS,15,0,15,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.17 +220638,NC_045512.2,23075,T,C,PASS,17,0,17,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.17 +220638,NC_045512.2,23202,C,A,PASS,24,0,24,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220638,NC_045512.2,23403,A,G,PASS,3470,2,3468,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220638,NC_045512.2,23525,C,T,PASS,3180,6,3174,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220638,NC_045512.2,23599,T,G,PASS,1176,0,1176,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220638,NC_045512.2,23604,C,A,PASS,1131,2,1124,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220638,NC_045512.2,23854,C,A,PASS,57,0,57,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220638,NC_045512.2,23948,G,T,PASS,760,0,755,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220638,NC_045512.2,24130,C,A,PASS,1140,1,1136,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220638,NC_045512.2,24424,A,T,PASS,228,2,224,0.98,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220638,NC_045512.2,24469,T,A,PASS,848,0,844,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220638,NC_045512.2,24503,C,T,PASS,979,18,961,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220638,NC_045512.2,25000,C,T,PASS,110,2,108,0.98,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220638,NC_045512.2,25584,C,T,PASS,133,0,133,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220638,NC_045512.2,26270,C,T,PASS,779,0,779,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220638,NC_045512.2,26530,A,G,PASS,350,0,350,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220638,NC_045512.2,26577,C,G,PASS,369,0,369,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220638,NC_045512.2,26709,G,A,PASS,422,0,422,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220638,NC_045512.2,27259,A,C,PASS,515,0,515,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220638,NC_045512.2,27384,T,C,PASS,601,0,601,1.0,ORF6,synonymous_variant,c.183T>C,p.Asp61Asp,p.D61D,ivar,BA.1.17 +220638,NC_045512.2,27807,C,T,PASS,135,1,134,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220638,NC_045512.2,28271,A,T,PASS,1839,6,1831,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220638,NC_045512.2,28311,C,T,PASS,1989,8,1976,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220638,NC_045512.2,2832,A,G,PASS,111,2,109,0.98,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220638,NC_045512.2,28361,GGAGAACGCA,G,PASS,1474,1463,704,0.48,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220638,NC_045512.2,28881,GG,AA,PASS,2234,12,2219,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220638,NC_045512.2,28883,G,C,PASS,2229,0,2227,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220638,NC_045512.2,3037,C,T,PASS,41,0,41,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220638,NC_045512.2,5672,C,T,PASS,257,0,257,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220638,NC_045512.2,5924,G,A,PASS,85,0,85,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220638,NC_045512.2,7984,T,C,PASS,1178,0,1178,1.0,orf1ab,synonymous_variant,c.7719T>C,p.Asp2573Asp,p.D2573D,ivar,BA.1.17 +220638,NC_045512.2,8393,G,A,PASS,3255,8,3241,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220638,NC_045512.2,9430,C,T,ft,14,9,5,0.36,orf1ab,synonymous_variant,c.9165C>T,p.Ile3055Ile,p.I3055I,ivar,BA.1.17 +220639,NC_045512.2,10449,C,A,PASS,16,0,16,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220639,NC_045512.2,11282,AGTTTGTCTG,A,PASS,57,57,52,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220639,NC_045512.2,11537,A,G,PASS,59,0,59,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220639,NC_045512.2,13195,T,C,PASS,3114,3,3110,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220639,NC_045512.2,14408,C,T,PASS,24,0,23,0.96,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220639,NC_045512.2,20337,T,C,ft,10,7,3,0.3,orf1ab,missense_variant,c.20072T>C,p.Ile6691Thr,p.I6691T,ivar,Unassigned +220639,NC_045512.2,21762,C,T,PASS,38,0,38,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220639,NC_045512.2,21764,ATACATG,A,PASS,38,38,36,0.95,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220639,NC_045512.2,21846,C,T,PASS,40,0,40,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220639,NC_045512.2,23403,A,G,PASS,1179,2,1177,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220639,NC_045512.2,23525,C,T,PASS,1112,0,1112,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220639,NC_045512.2,23599,T,G,PASS,414,0,414,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220639,NC_045512.2,23604,C,A,PASS,400,0,399,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220639,NC_045512.2,23948,G,T,PASS,80,0,80,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220639,NC_045512.2,24130,C,A,PASS,119,0,119,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220639,NC_045512.2,24424,A,T,PASS,14,0,14,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220639,NC_045512.2,24469,T,A,PASS,311,2,309,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220639,NC_045512.2,24503,C,T,PASS,388,8,380,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220639,NC_045512.2,25570,T,C,ft,20,14,6,0.3,ORF3a,missense_variant,c.178T>C,p.Ser60Pro,p.S60P,ivar,Unassigned +220639,NC_045512.2,25584,C,T,PASS,20,0,20,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220639,NC_045512.2,26270,C,T,PASS,53,0,53,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220639,NC_045512.2,26709,G,A,PASS,11,0,11,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220639,NC_045512.2,27259,A,C,PASS,35,0,35,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220639,NC_045512.2,27807,C,T,PASS,21,0,21,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220639,NC_045512.2,28271,A,T,PASS,460,2,456,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220639,NC_045512.2,28311,C,T,PASS,473,0,471,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220639,NC_045512.2,2832,A,G,PASS,17,0,17,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220639,NC_045512.2,28361,GGAGAACGCA,G,PASS,321,319,210,0.65,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220639,NC_045512.2,28881,GG,AA,PASS,6361,88,6267,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220639,NC_045512.2,28883,G,C,PASS,6288,3,6278,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220639,NC_045512.2,5672,C,T,PASS,35,0,35,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220639,NC_045512.2,5924,G,A,PASS,10,0,10,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220639,NC_045512.2,685,AAAGTCATTT,A,PASS,11186,11106,5685,0.51,orf1ab,conservative_inframe_deletion,c.421_429delAAGTCATTT,p.Lys141_Phe143del,p.K141_F143del,ivar,Unassigned +220639,NC_045512.2,8393,G,A,PASS,1511,2,1506,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220640,NC_045512.2,10449,C,A,PASS,18,0,18,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220640,NC_045512.2,11282,AGTTTGTCTG,A,PASS,43,43,43,1.0,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220640,NC_045512.2,11537,A,G,PASS,55,0,55,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220640,NC_045512.2,13195,T,C,PASS,2220,3,2217,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220640,NC_045512.2,14408,C,T,PASS,29,0,29,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220640,NC_045512.2,19834,C,T,ft,12,8,4,0.33,orf1ab,synonymous_variant,c.19569C>T,p.Tyr6523Tyr,p.Y6523Y,ivar,Unassigned +220640,NC_045512.2,21762,C,T,PASS,43,0,43,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220640,NC_045512.2,21764,ATACATG,A,PASS,43,43,42,0.98,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220640,NC_045512.2,21846,C,T,PASS,37,0,37,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220640,NC_045512.2,23403,A,G,PASS,1160,1,1159,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220640,NC_045512.2,23525,C,T,PASS,1142,3,1139,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220640,NC_045512.2,23599,T,G,PASS,447,0,447,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220640,NC_045512.2,23604,C,A,PASS,424,0,424,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220640,NC_045512.2,23948,G,T,PASS,99,0,97,0.98,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220640,NC_045512.2,24130,C,A,PASS,179,0,177,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220640,NC_045512.2,24424,A,T,PASS,40,0,40,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220640,NC_045512.2,24469,T,A,PASS,333,0,332,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220640,NC_045512.2,24503,C,T,PASS,408,4,404,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220640,NC_045512.2,25000,C,T,PASS,14,0,14,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220640,NC_045512.2,25584,C,T,PASS,32,0,32,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220640,NC_045512.2,26270,C,T,PASS,220,0,220,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220640,NC_045512.2,26530,A,G,PASS,150,2,148,0.99,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220640,NC_045512.2,26577,C,G,PASS,136,0,136,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220640,NC_045512.2,26709,G,A,PASS,135,0,135,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220640,NC_045512.2,27259,A,C,PASS,62,0,62,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220640,NC_045512.2,27807,C,T,PASS,19,0,19,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220640,NC_045512.2,28271,A,T,PASS,374,0,374,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220640,NC_045512.2,28311,C,T,PASS,386,0,386,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220640,NC_045512.2,2832,A,G,PASS,47,0,47,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220640,NC_045512.2,28361,GGAGAACGCA,G,PASS,306,304,221,0.72,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220640,NC_045512.2,28881,GG,AA,PASS,6266,77,6185,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220640,NC_045512.2,28883,G,C,PASS,6208,1,6194,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220640,NC_045512.2,3037,C,T,PASS,10,0,10,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220640,NC_045512.2,3241,C,T,PASS,261,0,261,1.0,orf1ab,synonymous_variant,c.2976C>T,p.Asp992Asp,p.D992D,ivar,Unassigned +220640,NC_045512.2,5924,G,A,PASS,11,0,11,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220640,NC_045512.2,76,T,A,ft,15,9,6,0.4,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220640,NC_045512.2,78,T,G,ft,15,9,6,0.4,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220640,NC_045512.2,8393,G,A,PASS,1676,2,1671,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220642,NC_045512.2,11072,TTC,T,ft,16,14,4,0.25,orf1ab,frameshift_variant,c.10809_10810delCT,p.Leu3606fs,p.L3606fs,ivar,B.1.617.2 +220642,NC_045512.2,11201,A,G,PASS,11,0,11,1.0,orf1ab,missense_variant,c.10936A>G,p.Thr3646Ala,p.T3646A,ivar,B.1.617.2 +220642,NC_045512.2,11332,A,G,PASS,27,0,26,0.96,orf1ab,synonymous_variant,c.11067A>G,p.Val3689Val,p.V3689V,ivar,B.1.617.2 +220642,NC_045512.2,14646,T,A,ft,23,17,6,0.26,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,B.1.617.2 +220642,NC_045512.2,15952,C,A,PASS,22,0,22,1.0,orf1ab,synonymous_variant,c.15687C>A,p.Ser5229Ser,p.S5229S,ivar,B.1.617.2 +220642,NC_045512.2,16466,C,T,PASS,41,0,41,1.0,orf1ab,missense_variant,c.16201C>T,p.His5401Tyr,p.H5401Y,ivar,B.1.617.2 +220642,NC_045512.2,18744,C,T,PASS,99,1,98,0.99,orf1ab,missense_variant,c.18479C>T,p.Thr6160Met,p.T6160M,ivar,B.1.617.2 +220642,NC_045512.2,19220,C,T,PASS,111,0,110,0.99,orf1ab,synonymous_variant,c.18955C>T,p.Leu6319Leu,p.L6319L,ivar,B.1.617.2 +220642,NC_045512.2,19590,T,C,ft,11,8,3,0.27,orf1ab,missense_variant,c.19325T>C,p.Ile6442Thr,p.I6442T,ivar,B.1.617.2 +220642,NC_045512.2,210,G,T,PASS,18,0,18,1.0,orf1ab,upstream_gene_variant,c.-56G>T,.,.,ivar,B.1.617.2 +220642,NC_045512.2,21618,C,G,PASS,12,0,12,1.0,S,missense_variant,c.56C>G,p.Thr19Arg,p.T19R,ivar,B.1.617.2 +220642,NC_045512.2,21682,C,T,PASS,23,0,23,1.0,S,synonymous_variant,c.120C>T,p.Asp40Asp,p.D40D,ivar,B.1.617.2 +220642,NC_045512.2,23403,A,G,PASS,557,0,557,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.617.2 +220642,NC_045512.2,23604,C,G,PASS,226,0,224,0.99,S,missense_variant,c.2042C>G,p.Pro681Arg,p.P681R,ivar,B.1.617.2 +220642,NC_045512.2,24410,G,A,PASS,21,0,21,1.0,S,missense_variant,c.2848G>A,p.Asp950Asn,p.D950N,ivar,B.1.617.2 +220642,NC_045512.2,26767,T,C,PASS,183,0,183,1.0,M,missense_variant,c.245T>C,p.Ile82Thr,p.I82T,ivar,B.1.617.2 +220642,NC_045512.2,27752,C,T,PASS,10,0,10,1.0,ORF7a,missense_variant,c.359C>T,p.Thr120Ile,p.T120I,ivar,B.1.617.2 +220642,NC_045512.2,27874,C,T,PASS,14,0,14,1.0,ORF7b,missense_variant,c.119C>T,p.Thr40Ile,p.T40I,ivar,B.1.617.2 +220642,NC_045512.2,28179,G,A,PASS,111,79,32,0.29,ORF8,missense_variant,c.286G>A,p.Gly96Ser,p.G96S,ivar,B.1.617.2 +220642,NC_045512.2,28247,AGATTTC,A,PASS,169,163,155,0.92,ORF8,conservative_inframe_deletion,c.355_360delGATTTC,p.Asp119_Phe120del,p.D119_F120del,ivar,B.1.617.2 +220642,NC_045512.2,28253,C,A,PASS,10,0,10,1.0,ORF8,missense_variant,c.360C>A,p.Phe120Leu,p.F120L,ivar,B.1.617.2 +220642,NC_045512.2,28270,TA,T,PASS,245,245,234,0.96,N,upstream_gene_variant,c.-3delA,.,.,ivar,B.1.617.2 +220642,NC_045512.2,28299,A,T,PASS,251,0,250,1.0,N,missense_variant,c.26A>T,p.Gln9Leu,p.Q9L,ivar,B.1.617.2 +220642,NC_045512.2,28326,G,T,PASS,239,1,236,0.99,N,missense_variant,c.53G>T,p.Gly18Val,p.G18V,ivar,B.1.617.2 +220642,NC_045512.2,28461,A,G,PASS,100,0,100,1.0,N,missense_variant,c.188A>G,p.Asp63Gly,p.D63G,ivar,B.1.617.2 +220642,NC_045512.2,28486,C,A,PASS,108,0,108,1.0,N,synonymous_variant,c.213C>A,p.Gly71Gly,p.G71G,ivar,B.1.617.2 +220642,NC_045512.2,28881,G,T,PASS,10522,44,10412,0.99,N,missense_variant,c.608G>T,p.Arg203Met,p.R203M,ivar,B.1.617.2 +220642,NC_045512.2,28916,G,T,PASS,434,0,429,0.99,N,missense_variant,c.643G>T,p.Gly215Cys,p.G215C,ivar,B.1.617.2 +220642,NC_045512.2,29260,G,T,PASS,1390,0,1387,1.0,N,synonymous_variant,c.987G>T,p.Thr329Thr,p.T329T,ivar,B.1.617.2 +220642,NC_045512.2,29402,G,T,PASS,2068,0,2058,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,B.1.617.2 +220642,NC_045512.2,3037,C,T,PASS,21,0,21,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1.617.2 +220642,NC_045512.2,7945,C,T,PASS,279,4,275,0.99,orf1ab,synonymous_variant,c.7680C>T,p.Tyr2560Tyr,p.Y2560Y,ivar,B.1.617.2 +220644,NC_045512.2,11282,AGTTTGTCTG,A,PASS,32,32,29,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220644,NC_045512.2,11537,A,G,PASS,32,0,32,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220644,NC_045512.2,13195,T,C,PASS,1360,2,1358,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220644,NC_045512.2,14408,C,T,PASS,23,0,23,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220644,NC_045512.2,21762,C,T,PASS,35,0,35,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220644,NC_045512.2,21764,ATACATG,A,PASS,35,35,29,0.83,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220644,NC_045512.2,21846,C,T,PASS,37,4,33,0.89,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220644,NC_045512.2,23403,A,G,PASS,708,0,707,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220644,NC_045512.2,23525,C,T,PASS,716,3,713,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220644,NC_045512.2,23599,T,G,PASS,356,0,356,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220644,NC_045512.2,23604,C,A,PASS,349,0,347,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220644,NC_045512.2,23948,G,T,PASS,71,0,71,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220644,NC_045512.2,24130,C,A,PASS,90,0,90,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220644,NC_045512.2,24263,CA,C,ft,16,16,6,0.38,S,frameshift_variant,c.2704delA,p.Met902fs,p.M902fs,ivar,Unassigned +220644,NC_045512.2,24424,A,T,PASS,16,0,16,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220644,NC_045512.2,24469,T,A,PASS,168,0,168,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220644,NC_045512.2,24503,C,T,PASS,200,4,196,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220644,NC_045512.2,25584,C,T,PASS,24,0,24,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220644,NC_045512.2,26270,C,T,PASS,378,0,378,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220644,NC_045512.2,26530,A,G,PASS,116,2,114,0.98,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220644,NC_045512.2,26577,C,G,PASS,114,0,114,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220644,NC_045512.2,26709,G,A,PASS,135,0,135,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220644,NC_045512.2,27259,A,C,PASS,111,0,111,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220644,NC_045512.2,2754,T,C,PASS,101,13,88,0.87,orf1ab,missense_variant,c.2489T>C,p.Val830Ala,p.V830A,ivar,Unassigned +220644,NC_045512.2,27621,G,C,PASS,11,0,11,1.0,ORF7a,missense_variant,c.228G>C,p.Gln76His,p.Q76H,ivar,Unassigned +220644,NC_045512.2,27628,GC,G,ft,13,13,4,0.31,ORF7a,frameshift_variant,c.237delC,p.Arg80fs,p.R80fs,ivar,Unassigned +220644,NC_045512.2,27807,C,T,PASS,28,0,28,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220644,NC_045512.2,28271,A,T,PASS,550,2,546,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220644,NC_045512.2,28311,C,T,PASS,561,3,558,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220644,NC_045512.2,2832,A,G,PASS,97,0,97,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220644,NC_045512.2,28361,GGAGAACGCA,G,PASS,409,407,262,0.64,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220644,NC_045512.2,28881,GG,AA,PASS,11494,98,11384,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220644,NC_045512.2,28883,G,C,PASS,11443,5,11421,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220644,NC_045512.2,4097,T,TC,PASS,24,24,14,0.58,orf1ab,frameshift_variant,c.3832_3833insC,p.Phe1278fs,p.F1278fs,ivar,Unassigned +220644,NC_045512.2,4101,T,C,PASS,27,10,17,0.63,orf1ab,missense_variant,c.3836T>C,p.Leu1279Ser,p.L1279S,ivar,Unassigned +220644,NC_045512.2,4699,T,C,ft,10,7,3,0.3,orf1ab,synonymous_variant,c.4434T>C,p.Ser1478Ser,p.S1478S,ivar,Unassigned +220644,NC_045512.2,5672,C,T,PASS,13,0,13,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220644,NC_045512.2,5924,G,A,PASS,11,0,11,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220644,NC_045512.2,76,T,A,ft,10,6,4,0.4,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220644,NC_045512.2,78,T,G,ft,10,6,4,0.4,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220644,NC_045512.2,8393,G,A,PASS,1470,0,1470,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220646,NC_045512.2,10029,C,T,PASS,18,0,18,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220646,NC_045512.2,10449,C,A,PASS,27,0,27,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220646,NC_045512.2,11282,AGTTTGTCTG,A,PASS,84,84,74,0.88,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220646,NC_045512.2,11537,A,G,PASS,69,0,69,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220646,NC_045512.2,13195,T,C,PASS,3077,6,3071,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220646,NC_045512.2,14408,C,T,PASS,48,0,48,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220646,NC_045512.2,16064,A,G,PASS,37,0,37,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220646,NC_045512.2,16950,T,A,PASS,136,98,38,0.28,orf1ab,missense_variant,c.16685T>A,p.Leu5562Gln,p.L5562Q,ivar,Unassigned +220646,NC_045512.2,18163,A,G,PASS,17,0,17,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,Unassigned +220646,NC_045512.2,21762,C,T,PASS,45,0,45,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220646,NC_045512.2,21764,ATACATG,A,PASS,45,45,42,0.93,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220646,NC_045512.2,21846,C,T,PASS,46,0,46,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220646,NC_045512.2,22578,G,A,PASS,10,0,10,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,Unassigned +220646,NC_045512.2,23403,A,G,PASS,773,0,773,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220646,NC_045512.2,23525,C,T,PASS,776,2,774,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220646,NC_045512.2,23599,T,G,PASS,337,0,337,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220646,NC_045512.2,23604,C,A,PASS,326,0,324,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220646,NC_045512.2,23854,C,A,PASS,18,0,17,0.94,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220646,NC_045512.2,23948,G,T,PASS,99,0,99,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220646,NC_045512.2,24130,C,A,PASS,158,0,156,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220646,NC_045512.2,24424,A,T,PASS,114,0,114,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220646,NC_045512.2,24469,T,A,PASS,545,6,539,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220646,NC_045512.2,24503,C,T,PASS,626,7,619,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220646,NC_045512.2,25000,C,T,PASS,31,0,31,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220646,NC_045512.2,25584,C,T,PASS,51,0,51,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220646,NC_045512.2,26270,C,T,PASS,516,2,514,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220646,NC_045512.2,26530,A,G,PASS,515,0,515,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220646,NC_045512.2,26577,C,G,PASS,562,0,562,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220646,NC_045512.2,26709,G,A,PASS,563,2,559,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220646,NC_045512.2,27259,A,C,PASS,166,0,166,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220646,NC_045512.2,27807,C,T,PASS,44,0,44,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220646,NC_045512.2,28271,A,T,PASS,690,0,690,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220646,NC_045512.2,28311,C,T,PASS,717,16,699,0.97,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220646,NC_045512.2,2832,A,G,PASS,37,0,37,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220646,NC_045512.2,28361,GGAGAACGCA,G,PASS,572,566,417,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220646,NC_045512.2,28881,GG,AA,PASS,8092,117,7963,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220646,NC_045512.2,28883,G,C,PASS,8026,5,8016,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220646,NC_045512.2,3037,C,T,PASS,15,0,15,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220646,NC_045512.2,8393,G,A,PASS,1947,10,1934,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220647,NC_045512.2,10029,C,T,PASS,27,0,27,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1 +220647,NC_045512.2,10449,C,A,PASS,65,0,65,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1 +220647,NC_045512.2,10647,C,T,PASS,55,12,43,0.78,orf1ab,missense_variant,c.10382C>T,p.Thr3461Ile,p.T3461I,ivar,BA.1 +220647,NC_045512.2,11005,C,T,PASS,16,4,12,0.75,orf1ab,synonymous_variant,c.10740C>T,p.His3580His,p.H3580H,ivar,BA.1 +220647,NC_045512.2,11282,AGTTTGTCTG,A,PASS,61,61,57,0.93,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1 +220647,NC_045512.2,11537,A,G,PASS,177,0,177,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1 +220647,NC_045512.2,13195,T,C,PASS,6907,12,6895,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1 +220647,NC_045512.2,14408,C,T,PASS,123,0,123,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1 +220647,NC_045512.2,15240,C,T,PASS,10,0,10,1.0,orf1ab,missense_variant,c.14975C>T,p.Thr4992Ile,p.T4992I,ivar,BA.1 +220647,NC_045512.2,15359,G,A,PASS,10,4,6,0.6,orf1ab,missense_variant,c.15094G>A,p.Ala5032Thr,p.A5032T,ivar,BA.1 +220647,NC_045512.2,16064,A,G,PASS,224,42,182,0.81,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1 +220647,NC_045512.2,18163,A,G,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1 +220647,NC_045512.2,21762,C,T,PASS,165,2,161,0.98,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1 +220647,NC_045512.2,21764,ATACATG,A,PASS,167,164,153,0.92,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1 +220647,NC_045512.2,21846,C,T,PASS,158,0,158,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1 +220647,NC_045512.2,21,C,T,PASS,12,0,12,1.0,orf1ab,upstream_gene_variant,c.-245C>T,.,.,ivar,BA.1 +220647,NC_045512.2,22578,G,A,PASS,15,0,15,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1 +220647,NC_045512.2,22599,G,A,PASS,15,4,11,0.73,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,BA.1 +220647,NC_045512.2,22679,T,C,PASS,16,0,16,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1 +220647,NC_045512.2,22686,C,T,PASS,16,0,16,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1 +220647,NC_045512.2,23013,A,C,PASS,11,0,11,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1 +220647,NC_045512.2,23040,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1 +220647,NC_045512.2,23048,G,A,PASS,11,0,11,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1 +220647,NC_045512.2,23055,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1 +220647,NC_045512.2,23063,A,T,PASS,11,0,11,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1 +220647,NC_045512.2,23075,T,C,PASS,10,0,10,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1 +220647,NC_045512.2,23403,A,G,PASS,2817,2,2815,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1 +220647,NC_045512.2,23525,C,T,PASS,2713,5,2696,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1 +220647,NC_045512.2,23599,T,G,PASS,1006,0,1005,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1 +220647,NC_045512.2,23604,C,A,PASS,972,0,966,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1 +220647,NC_045512.2,23854,C,A,PASS,58,0,57,0.98,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1 +220647,NC_045512.2,23948,G,T,PASS,380,0,378,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1 +220647,NC_045512.2,24130,C,A,PASS,569,0,567,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1 +220647,NC_045512.2,24424,A,T,PASS,225,0,223,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1 +220647,NC_045512.2,24469,T,A,PASS,865,1,862,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1 +220647,NC_045512.2,24503,C,T,PASS,1063,11,1050,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1 +220647,NC_045512.2,25000,C,T,PASS,78,0,78,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1 +220647,NC_045512.2,25531,A,G,PASS,101,27,74,0.73,ORF3a,missense_variant,c.139A>G,p.Ile47Val,p.I47V,ivar,BA.1 +220647,NC_045512.2,25584,C,T,PASS,104,0,102,0.98,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1 +220647,NC_045512.2,26270,C,T,PASS,714,1,713,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1 +220647,NC_045512.2,26530,A,G,PASS,619,0,619,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1 +220647,NC_045512.2,26577,C,G,PASS,691,2,689,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1 +220647,NC_045512.2,26709,G,A,PASS,724,4,720,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1 +220647,NC_045512.2,27259,A,C,PASS,416,0,415,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1 +220647,NC_045512.2,27807,C,T,PASS,126,0,126,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1 +220647,NC_045512.2,28271,A,T,PASS,1379,0,1377,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1 +220647,NC_045512.2,28311,C,T,PASS,1323,4,1317,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1 +220647,NC_045512.2,2832,A,G,PASS,91,0,91,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1 +220647,NC_045512.2,28361,GGAGAACGCA,G,PASS,976,967,669,0.69,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1 +220647,NC_045512.2,28881,GG,AA,PASS,5179,67,5106,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1 +220647,NC_045512.2,28883,G,C,PASS,5137,2,5122,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1 +220647,NC_045512.2,3037,C,T,PASS,29,0,29,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1 +220647,NC_045512.2,5386,T,G,PASS,18,0,18,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1 +220647,NC_045512.2,8393,G,A,PASS,3724,9,3714,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1 +220648,NC_045512.2,11074,CT,C,ft,15,15,4,0.27,orf1ab,frameshift_variant,c.10817delT,p.Leu3606fs,p.L3606fs,ivar,Unassigned +220648,NC_045512.2,11282,AGTTTGTCTG,A,PASS,60,60,57,0.95,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220648,NC_045512.2,11537,A,G,PASS,20,0,20,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220648,NC_045512.2,13195,T,C,PASS,1619,0,1619,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220648,NC_045512.2,14408,C,T,PASS,14,0,14,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220648,NC_045512.2,14717,A,G,ft,13,9,4,0.31,orf1ab,missense_variant,c.14452A>G,p.Arg4818Gly,p.R4818G,ivar,Unassigned +220648,NC_045512.2,1513,C,T,PASS,51,34,17,0.33,orf1ab,synonymous_variant,c.1248C>T,p.Cys416Cys,p.C416C,ivar,Unassigned +220648,NC_045512.2,21762,C,T,PASS,13,0,13,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220648,NC_045512.2,21764,ATACATG,A,PASS,13,13,11,0.85,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220648,NC_045512.2,23403,A,G,PASS,339,0,339,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220648,NC_045512.2,23525,C,T,PASS,317,0,317,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220648,NC_045512.2,23599,T,G,PASS,160,0,160,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220648,NC_045512.2,23604,C,A,PASS,149,0,148,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220648,NC_045512.2,23948,G,T,PASS,20,0,20,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220648,NC_045512.2,24130,C,A,PASS,45,0,45,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220648,NC_045512.2,24424,A,T,PASS,18,0,18,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220648,NC_045512.2,24469,T,A,PASS,106,0,106,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220648,NC_045512.2,24503,C,T,PASS,131,1,130,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220648,NC_045512.2,25584,C,T,PASS,11,0,11,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220648,NC_045512.2,26270,C,T,PASS,24,2,22,0.92,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220648,NC_045512.2,26530,A,G,PASS,80,0,80,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220648,NC_045512.2,26577,C,G,PASS,68,0,68,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220648,NC_045512.2,26709,G,A,PASS,56,0,56,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220648,NC_045512.2,27259,A,C,PASS,22,0,22,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220648,NC_045512.2,28271,A,T,PASS,75,2,73,0.97,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220648,NC_045512.2,28311,C,T,PASS,78,2,76,0.97,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220648,NC_045512.2,28361,GGAGAACGCA,G,PASS,58,57,37,0.64,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220648,NC_045512.2,28881,GG,AA,PASS,6310,41,6263,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220648,NC_045512.2,28883,G,C,PASS,6308,2,6297,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220648,NC_045512.2,8393,G,A,PASS,930,4,924,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220649,NC_045512.2,11282,AGTTTGTCTG,A,PASS,22,22,21,0.95,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1 +220649,NC_045512.2,11537,A,G,PASS,52,0,52,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1 +220649,NC_045512.2,13195,T,C,PASS,1944,4,1940,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1 +220649,NC_045512.2,14408,C,T,PASS,27,0,27,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1 +220649,NC_045512.2,14646,T,A,PASS,35,21,14,0.4,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,BA.1 +220649,NC_045512.2,19275,T,C,PASS,37,5,32,0.86,orf1ab,missense_variant,c.19010T>C,p.Leu6337Pro,p.L6337P,ivar,BA.1 +220649,NC_045512.2,21762,C,T,PASS,68,0,68,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1 +220649,NC_045512.2,21764,ATACATG,A,PASS,68,68,60,0.88,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1 +220649,NC_045512.2,21846,C,T,PASS,79,0,79,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1 +220649,NC_045512.2,22673,TC,CT,PASS,12,0,12,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1 +220649,NC_045512.2,22679,T,C,PASS,12,0,12,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1 +220649,NC_045512.2,22686,C,T,PASS,11,0,11,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1 +220649,NC_045512.2,23403,A,G,PASS,886,0,886,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1 +220649,NC_045512.2,23525,C,T,PASS,950,4,946,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1 +220649,NC_045512.2,23599,T,G,PASS,385,0,385,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1 +220649,NC_045512.2,23604,C,A,PASS,374,0,374,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1 +220649,NC_045512.2,23854,C,A,PASS,14,0,14,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1 +220649,NC_045512.2,23948,G,T,PASS,159,0,159,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1 +220649,NC_045512.2,24130,C,A,PASS,219,0,217,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1 +220649,NC_045512.2,24424,A,T,PASS,16,0,16,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1 +220649,NC_045512.2,24469,T,A,PASS,253,0,250,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1 +220649,NC_045512.2,24503,C,T,PASS,310,6,304,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1 +220649,NC_045512.2,25000,C,T,PASS,17,0,17,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1 +220649,NC_045512.2,25482,C,T,PASS,30,0,30,1.0,ORF3a,synonymous_variant,c.90C>T,p.Arg30Arg,p.R30R,ivar,BA.1 +220649,NC_045512.2,25584,C,T,PASS,20,0,20,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1 +220649,NC_045512.2,26270,C,T,PASS,293,0,293,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1 +220649,NC_045512.2,26530,A,G,PASS,178,0,178,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1 +220649,NC_045512.2,26577,C,G,PASS,169,0,169,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1 +220649,NC_045512.2,26709,G,A,PASS,149,0,149,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1 +220649,NC_045512.2,27259,A,C,PASS,88,0,88,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1 +220649,NC_045512.2,2754,T,C,PASS,25,13,12,0.48,orf1ab,missense_variant,c.2489T>C,p.Val830Ala,p.V830A,ivar,BA.1 +220649,NC_045512.2,27670,G,T,PASS,18,0,18,1.0,ORF7a,missense_variant,c.277G>T,p.Val93Phe,p.V93F,ivar,BA.1 +220649,NC_045512.2,27807,C,T,PASS,30,0,30,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1 +220649,NC_045512.2,2815,TA,T,PASS,34,34,19,0.56,orf1ab,frameshift_variant,c.2553delA,p.Val852fs,p.V852fs,ivar,BA.1 +220649,NC_045512.2,28271,A,T,PASS,719,1,718,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1 +220649,NC_045512.2,28311,C,T,PASS,808,2,802,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1 +220649,NC_045512.2,2832,A,G,PASS,31,0,31,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1 +220649,NC_045512.2,28361,GGAGAACGCA,G,PASS,558,557,301,0.54,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1 +220649,NC_045512.2,28363,A,T,PASS,23,13,6,0.26,N,synonymous_variant,c.90A>T,p.Gly30Gly,p.G30G,ivar,BA.1 +220649,NC_045512.2,28881,GG,AA,PASS,10996,110,10874,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1 +220649,NC_045512.2,28883,G,C,PASS,10931,9,10905,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1 +220649,NC_045512.2,5672,C,T,PASS,19,0,19,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1 +220649,NC_045512.2,76,T,A,ft,14,8,6,0.43,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1 +220649,NC_045512.2,78,T,G,PASS,12,6,6,0.5,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1 +220649,NC_045512.2,8393,G,A,PASS,2396,9,2383,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1 +220650,NC_045512.2,10449,C,A,PASS,27,0,27,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220650,NC_045512.2,10647,C,T,PASS,14,0,14,1.0,orf1ab,missense_variant,c.10382C>T,p.Thr3461Ile,p.T3461I,ivar,Unassigned +220650,NC_045512.2,11282,AGTTTGTCTG,A,PASS,20,20,18,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220650,NC_045512.2,11537,A,G,PASS,65,0,65,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220650,NC_045512.2,13195,T,C,PASS,3292,5,3287,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220650,NC_045512.2,14408,C,T,PASS,82,0,82,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220650,NC_045512.2,16064,A,G,PASS,70,0,70,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220650,NC_045512.2,21762,C,T,PASS,63,0,63,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220650,NC_045512.2,21764,ATACATG,A,PASS,63,63,53,0.84,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220650,NC_045512.2,21846,C,T,PASS,94,1,93,0.99,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220650,NC_045512.2,23403,A,G,PASS,993,2,991,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220650,NC_045512.2,23525,C,T,PASS,908,0,908,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220650,NC_045512.2,23599,T,G,PASS,328,0,328,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220650,NC_045512.2,23604,C,A,PASS,312,0,311,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220650,NC_045512.2,23854,C,A,PASS,12,0,12,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220650,NC_045512.2,23948,G,T,PASS,190,0,190,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220650,NC_045512.2,24130,C,A,PASS,240,0,240,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220650,NC_045512.2,24424,A,T,PASS,42,0,42,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220650,NC_045512.2,24469,T,A,PASS,455,0,454,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220650,NC_045512.2,24503,C,T,PASS,551,10,540,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220650,NC_045512.2,25000,C,T,PASS,17,0,17,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220650,NC_045512.2,25531,A,G,PASS,28,0,28,1.0,ORF3a,missense_variant,c.139A>G,p.Ile47Val,p.I47V,ivar,Unassigned +220650,NC_045512.2,25584,C,T,PASS,26,0,26,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220650,NC_045512.2,26270,C,T,PASS,249,0,249,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220650,NC_045512.2,26530,A,G,PASS,144,0,144,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220650,NC_045512.2,26577,C,G,PASS,147,0,147,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220650,NC_045512.2,26709,G,A,PASS,176,1,175,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220650,NC_045512.2,27259,A,C,PASS,126,0,126,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220650,NC_045512.2,27807,C,T,PASS,26,0,26,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220650,NC_045512.2,2785,T,C,PASS,79,51,28,0.35,orf1ab,synonymous_variant,c.2520T>C,p.Asn840Asn,p.N840N,ivar,Unassigned +220650,NC_045512.2,28271,A,T,PASS,795,2,791,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220650,NC_045512.2,28311,C,T,PASS,801,2,799,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220650,NC_045512.2,2832,A,G,PASS,88,0,88,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220650,NC_045512.2,28361,GGAGAACGCA,G,PASS,589,587,367,0.62,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220650,NC_045512.2,28881,GG,AA,PASS,6879,67,6807,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220650,NC_045512.2,28883,G,C,PASS,6840,2,6824,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220650,NC_045512.2,76,T,A,PASS,21,13,8,0.38,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220650,NC_045512.2,78,T,G,PASS,21,13,8,0.38,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220650,NC_045512.2,8393,G,A,PASS,2169,2,2167,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220651,NC_045512.2,10323,A,G,PASS,11,0,11,1.0,orf1ab,missense_variant,c.10058A>G,p.Lys3353Arg,p.K3353R,ivar,Unassigned +220651,NC_045512.2,10333,A,G,ft,11,8,3,0.27,orf1ab,synonymous_variant,c.10068A>G,p.Thr3356Thr,p.T3356T,ivar,Unassigned +220651,NC_045512.2,10449,C,A,PASS,22,0,22,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220651,NC_045512.2,11282,AGTTTGTCTG,A,PASS,34,33,26,0.76,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220651,NC_045512.2,11537,A,G,PASS,32,0,32,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220651,NC_045512.2,13195,T,C,PASS,1396,7,1389,0.99,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220651,NC_045512.2,14408,C,T,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220651,NC_045512.2,21740,T,G,PASS,27,15,12,0.44,S,missense_variant,c.178T>G,p.Ser60Ala,p.S60A,ivar,Unassigned +220651,NC_045512.2,21762,C,T,PASS,24,2,22,0.92,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220651,NC_045512.2,21764,ATACATG,A,PASS,24,24,24,1.0,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220651,NC_045512.2,21843,C,A,PASS,31,19,12,0.39,S,missense_variant,c.281C>A,p.Ser94Tyr,p.S94Y,ivar,Unassigned +220651,NC_045512.2,21846,C,T,PASS,31,0,31,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220651,NC_045512.2,23403,A,G,PASS,827,0,827,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220651,NC_045512.2,23525,C,T,PASS,880,1,878,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220651,NC_045512.2,23599,T,G,PASS,393,0,393,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220651,NC_045512.2,23604,C,A,PASS,380,1,378,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220651,NC_045512.2,23948,G,T,PASS,26,0,26,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220651,NC_045512.2,24130,C,A,PASS,44,0,44,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220651,NC_045512.2,24424,A,T,PASS,22,0,22,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220651,NC_045512.2,24469,T,A,PASS,263,0,262,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220651,NC_045512.2,24503,C,T,PASS,300,4,296,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220651,NC_045512.2,25584,C,T,PASS,17,0,17,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220651,NC_045512.2,2592,C,A,PASS,24,16,8,0.33,orf1ab,missense_variant,c.2327C>A,p.Ala776Asp,p.A776D,ivar,Unassigned +220651,NC_045512.2,26270,C,T,PASS,295,0,295,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220651,NC_045512.2,26530,A,G,PASS,223,0,223,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220651,NC_045512.2,26577,C,G,PASS,230,0,230,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220651,NC_045512.2,26709,G,A,PASS,234,0,234,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220651,NC_045512.2,27259,A,C,PASS,73,0,71,0.97,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220651,NC_045512.2,27807,C,T,PASS,24,0,24,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220651,NC_045512.2,28271,A,T,PASS,194,1,193,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220651,NC_045512.2,28311,C,T,PASS,223,2,221,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220651,NC_045512.2,2832,A,G,PASS,28,0,28,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220651,NC_045512.2,28361,GGAGAACGCA,G,PASS,186,186,128,0.69,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220651,NC_045512.2,28881,GG,AA,PASS,11290,87,11197,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220651,NC_045512.2,28883,G,C,PASS,11251,5,11237,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220651,NC_045512.2,4301,C,T,ft,12,8,4,0.33,orf1ab,missense_variant,c.4036C>T,p.Leu1346Phe,p.L1346F,ivar,Unassigned +220651,NC_045512.2,5672,C,T,PASS,16,0,16,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220651,NC_045512.2,7680,A,G,PASS,44,31,13,0.3,orf1ab,missense_variant,c.7415A>G,p.Asp2472Gly,p.D2472G,ivar,Unassigned +220651,NC_045512.2,76,T,A,ft,16,12,4,0.25,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220651,NC_045512.2,78,T,G,ft,16,12,4,0.25,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220651,NC_045512.2,8393,G,A,PASS,1490,7,1483,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220652,NC_045512.2,10135,T,C,PASS,11,0,11,1.0,orf1ab,synonymous_variant,c.9870T>C,p.Leu3290Leu,p.L3290L,ivar,Unassigned +220652,NC_045512.2,10449,C,A,PASS,21,0,21,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220652,NC_045512.2,11282,AGTTTGTCTG,A,PASS,58,58,53,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220652,NC_045512.2,113,A,G,PASS,33,20,13,0.39,orf1ab,upstream_gene_variant,c.-153A>G,.,.,ivar,Unassigned +220652,NC_045512.2,11537,A,G,PASS,41,0,41,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220652,NC_045512.2,116,G,A,PASS,36,23,13,0.36,orf1ab,upstream_gene_variant,c.-150G>A,.,.,ivar,Unassigned +220652,NC_045512.2,13195,T,C,PASS,2217,3,2214,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220652,NC_045512.2,14408,C,T,PASS,19,0,19,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220652,NC_045512.2,14646,T,A,PASS,49,35,14,0.29,orf1ab,missense_variant,c.14381T>A,p.Leu4794Gln,p.L4794Q,ivar,Unassigned +220652,NC_045512.2,21762,C,T,PASS,48,0,48,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220652,NC_045512.2,21764,ATACATG,A,PASS,49,48,45,0.92,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220652,NC_045512.2,21846,C,T,PASS,62,0,62,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220652,NC_045512.2,23403,A,G,PASS,858,0,858,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220652,NC_045512.2,23525,C,T,PASS,930,0,930,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220652,NC_045512.2,23599,T,G,PASS,413,0,413,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220652,NC_045512.2,23604,C,A,PASS,403,1,400,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220652,NC_045512.2,23854,C,A,PASS,14,0,14,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220652,NC_045512.2,23948,G,T,PASS,181,2,179,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220652,NC_045512.2,24130,C,A,PASS,220,0,219,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220652,NC_045512.2,24424,A,T,PASS,78,0,77,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220652,NC_045512.2,24469,T,A,PASS,425,0,425,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220652,NC_045512.2,24503,C,T,PASS,509,17,492,0.97,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220652,NC_045512.2,24803,A,G,PASS,190,0,190,1.0,S,missense_variant,c.3241A>G,p.Ile1081Val,p.I1081V,ivar,Unassigned +220652,NC_045512.2,25000,C,T,PASS,14,0,14,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220652,NC_045512.2,25584,C,T,PASS,19,0,19,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220652,NC_045512.2,25708,C,T,PASS,19,2,17,0.89,ORF3a,missense_variant,c.316C>T,p.Leu106Phe,p.L106F,ivar,Unassigned +220652,NC_045512.2,26270,C,T,PASS,493,0,493,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220652,NC_045512.2,26530,A,G,PASS,207,0,207,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220652,NC_045512.2,26577,C,G,PASS,193,0,193,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220652,NC_045512.2,26709,G,A,PASS,201,1,199,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220652,NC_045512.2,27259,A,C,PASS,95,0,95,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220652,NC_045512.2,2755,G,A,PASS,92,69,23,0.25,orf1ab,synonymous_variant,c.2490G>A,p.Val830Val,p.V830V,ivar,Unassigned +220652,NC_045512.2,27807,C,T,PASS,31,0,31,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220652,NC_045512.2,28271,A,T,PASS,646,0,644,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220652,NC_045512.2,28311,C,T,PASS,720,2,718,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220652,NC_045512.2,2832,A,G,PASS,95,0,95,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220652,NC_045512.2,28361,GGAGAACGCA,G,PASS,559,554,308,0.55,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220652,NC_045512.2,28881,GG,AA,PASS,9059,70,8983,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220652,NC_045512.2,28883,G,C,PASS,9032,4,9012,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220652,NC_045512.2,29301,A,G,PASS,3790,4,3786,1.0,N,missense_variant,c.1028A>G,p.Asp343Gly,p.D343G,ivar,Unassigned +220652,NC_045512.2,3037,C,T,PASS,11,0,11,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220652,NC_045512.2,7488,C,T,PASS,228,0,228,1.0,orf1ab,missense_variant,c.7223C>T,p.Thr2408Ile,p.T2408I,ivar,Unassigned +220652,NC_045512.2,76,T,A,ft,16,12,4,0.25,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220652,NC_045512.2,8393,G,A,PASS,2474,4,2469,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220653,NC_045512.2,11282,AGTTTGTCTG,A,ft,10,10,9,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220653,NC_045512.2,11537,A,G,PASS,10,0,10,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220653,NC_045512.2,13195,T,C,PASS,387,0,387,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220653,NC_045512.2,14560,T,A,ft,10,7,3,0.3,orf1ab,stop_gained,c.14295T>A,p.Cys4765*,p.C4765*,ivar,Unassigned +220653,NC_045512.2,23403,A,G,PASS,311,0,311,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220653,NC_045512.2,23525,C,T,PASS,312,2,310,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220653,NC_045512.2,23599,T,G,PASS,157,0,157,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220653,NC_045512.2,23604,C,A,PASS,154,0,154,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220653,NC_045512.2,23948,G,T,PASS,31,0,31,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220653,NC_045512.2,24130,C,A,PASS,25,0,25,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220653,NC_045512.2,24469,T,A,PASS,63,0,61,0.97,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220653,NC_045512.2,24503,C,T,PASS,71,0,71,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220653,NC_045512.2,26270,C,T,PASS,393,0,393,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220653,NC_045512.2,2633,T,A,PASS,11,5,6,0.55,orf1ab,missense_variant,c.2368T>A,p.Leu790Met,p.L790M,ivar,Unassigned +220653,NC_045512.2,26530,A,G,PASS,113,0,113,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220653,NC_045512.2,26577,C,G,PASS,115,0,113,0.98,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220653,NC_045512.2,26709,G,A,PASS,95,0,95,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220653,NC_045512.2,2702,T,C,ft,11,8,3,0.27,orf1ab,missense_variant,c.2437T>C,p.Phe813Leu,p.F813L,ivar,Unassigned +220653,NC_045512.2,27259,A,C,PASS,30,0,30,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220653,NC_045512.2,27807,C,T,PASS,17,0,17,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220653,NC_045512.2,28271,A,T,PASS,184,0,184,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220653,NC_045512.2,28311,C,T,PASS,212,1,211,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220653,NC_045512.2,2832,A,G,PASS,14,0,14,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220653,NC_045512.2,28361,GGAGAACGCA,G,PASS,166,166,120,0.72,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220653,NC_045512.2,28881,GG,AA,PASS,6955,64,6886,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220653,NC_045512.2,28883,G,C,PASS,6937,2,6917,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220653,NC_045512.2,5672,C,T,PASS,14,0,14,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220653,NC_045512.2,685,AAAGTCATTT,A,PASS,4513,4475,2142,0.47,orf1ab,conservative_inframe_deletion,c.421_429delAAGTCATTT,p.Lys141_Phe143del,p.K141_F143del,ivar,Unassigned +220653,NC_045512.2,694,T,A,PASS,14,9,5,0.36,orf1ab,missense_variant,c.429T>A,p.Phe143Leu,p.F143L,ivar,Unassigned +220653,NC_045512.2,76,T,A,PASS,10,4,6,0.6,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220653,NC_045512.2,8393,G,A,PASS,618,0,618,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220654,NC_045512.2,10029,C,T,PASS,14,0,14,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220654,NC_045512.2,10449,C,A,PASS,29,0,29,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220654,NC_045512.2,11282,AGTTTGTCTG,A,PASS,106,106,97,0.92,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220654,NC_045512.2,11537,A,G,PASS,65,0,65,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220654,NC_045512.2,13195,T,C,PASS,2838,1,2837,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220654,NC_045512.2,14408,C,T,PASS,21,0,21,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220654,NC_045512.2,18163,A,G,PASS,15,0,15,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,Unassigned +220654,NC_045512.2,21762,C,T,PASS,35,0,35,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220654,NC_045512.2,21764,ATACATG,A,PASS,37,35,33,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220654,NC_045512.2,21846,C,T,PASS,62,0,62,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220654,NC_045512.2,22578,G,A,PASS,10,0,10,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,Unassigned +220654,NC_045512.2,23403,A,G,PASS,780,0,780,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220654,NC_045512.2,23525,C,T,PASS,754,0,754,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220654,NC_045512.2,23599,T,G,PASS,331,0,331,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220654,NC_045512.2,23604,C,A,PASS,321,0,317,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220654,NC_045512.2,23854,C,A,PASS,26,0,26,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220654,NC_045512.2,23948,G,T,PASS,258,2,255,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220654,NC_045512.2,24130,C,A,PASS,310,0,308,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220654,NC_045512.2,24424,A,T,PASS,93,0,93,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220654,NC_045512.2,24469,T,A,PASS,384,0,384,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220654,NC_045512.2,24503,C,T,PASS,439,6,433,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220654,NC_045512.2,25000,C,T,PASS,26,0,26,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220654,NC_045512.2,25584,C,T,PASS,32,0,32,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220654,NC_045512.2,26270,C,T,PASS,591,3,588,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220654,NC_045512.2,26530,A,G,PASS,261,0,261,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220654,NC_045512.2,26577,C,G,PASS,241,0,241,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220654,NC_045512.2,26709,G,A,PASS,269,2,267,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220654,NC_045512.2,27259,A,C,PASS,227,0,225,0.99,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220654,NC_045512.2,27807,C,T,PASS,44,0,44,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220654,NC_045512.2,28271,A,T,PASS,695,0,693,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220654,NC_045512.2,28311,C,T,PASS,714,0,712,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220654,NC_045512.2,2832,A,G,PASS,19,0,19,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220654,NC_045512.2,28361,GGAGAACGCA,G,PASS,538,535,345,0.64,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220654,NC_045512.2,28881,GG,AA,PASS,5172,63,5108,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220654,NC_045512.2,28883,G,C,PASS,5131,0,5124,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220654,NC_045512.2,3037,C,T,PASS,17,0,17,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220654,NC_045512.2,5672,C,T,PASS,73,0,73,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220654,NC_045512.2,5924,G,A,PASS,24,0,24,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220654,NC_045512.2,76,T,A,PASS,21,13,8,0.38,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220654,NC_045512.2,78,T,G,PASS,21,13,8,0.38,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220654,NC_045512.2,8393,G,A,PASS,1798,4,1792,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220655,NC_045512.2,10029,C,T,PASS,13,0,13,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220655,NC_045512.2,10449,C,A,PASS,66,0,66,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220655,NC_045512.2,11282,AGTTTGTCTG,A,PASS,124,124,112,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220655,NC_045512.2,11537,A,G,PASS,249,2,247,0.99,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220655,NC_045512.2,13195,T,C,PASS,6068,4,6064,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220655,NC_045512.2,14408,C,T,PASS,167,2,164,0.98,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220655,NC_045512.2,17797,G,T,PASS,11,5,6,0.55,orf1ab,synonymous_variant,c.17532G>T,p.Leu5844Leu,p.L5844L,ivar,BA.1.17 +220655,NC_045512.2,18163,A,G,PASS,13,0,13,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220655,NC_045512.2,21762,C,T,PASS,224,0,224,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220655,NC_045512.2,21764,ATACATG,A,PASS,226,224,213,0.94,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220655,NC_045512.2,21846,C,T,PASS,271,0,271,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220655,NC_045512.2,21,C,T,PASS,14,0,14,1.0,orf1ab,upstream_gene_variant,c.-245C>T,.,.,ivar,BA.1.17 +220655,NC_045512.2,2270,A,G,PASS,22,0,22,1.0,orf1ab,missense_variant,c.2005A>G,p.Lys669Glu,p.K669E,ivar,BA.1.17 +220655,NC_045512.2,23013,A,C,PASS,13,0,13,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.17 +220655,NC_045512.2,23040,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.17 +220655,NC_045512.2,23048,G,A,PASS,11,0,11,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.17 +220655,NC_045512.2,23055,A,G,PASS,12,0,12,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.17 +220655,NC_045512.2,23063,A,T,PASS,11,2,9,0.82,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.17 +220655,NC_045512.2,23075,T,C,PASS,18,0,18,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.17 +220655,NC_045512.2,23202,C,A,PASS,23,0,23,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220655,NC_045512.2,23403,A,G,PASS,1843,2,1841,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220655,NC_045512.2,23525,C,T,PASS,1813,2,1808,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220655,NC_045512.2,23599,T,G,PASS,652,0,652,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220655,NC_045512.2,23604,C,A,PASS,630,0,620,0.98,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220655,NC_045512.2,23854,C,A,PASS,31,0,31,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220655,NC_045512.2,23948,G,T,PASS,445,0,445,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220655,NC_045512.2,24130,C,A,PASS,779,0,777,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220655,NC_045512.2,24424,A,T,PASS,137,0,136,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220655,NC_045512.2,24469,T,A,PASS,827,0,825,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220655,NC_045512.2,24503,C,T,PASS,960,7,950,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220655,NC_045512.2,25000,C,T,PASS,48,0,48,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220655,NC_045512.2,25584,C,T,PASS,85,0,85,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220655,NC_045512.2,25855,G,A,PASS,101,0,101,1.0,ORF3a,missense_variant,c.463G>A,p.Asp155Asn,p.D155N,ivar,BA.1.17 +220655,NC_045512.2,26270,C,T,PASS,450,2,448,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220655,NC_045512.2,26530,A,G,PASS,478,1,477,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220655,NC_045512.2,26577,C,G,PASS,554,0,554,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220655,NC_045512.2,26709,G,A,PASS,578,4,571,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220655,NC_045512.2,27259,A,C,PASS,283,0,281,0.99,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220655,NC_045512.2,27807,C,T,PASS,164,1,163,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220655,NC_045512.2,28271,A,T,PASS,2207,4,2199,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220655,NC_045512.2,28311,C,T,PASS,2296,7,2282,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220655,NC_045512.2,2832,A,G,PASS,60,0,60,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220655,NC_045512.2,28361,GGAGAACGCA,G,PASS,1753,1743,1191,0.68,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220655,NC_045512.2,28881,GG,AA,PASS,5261,74,5183,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220655,NC_045512.2,28883,G,C,PASS,5202,6,5186,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220655,NC_045512.2,29772,T,C,PASS,24,0,24,1.0,S,downstream_gene_variant,c.*4388T>C,.,.,ivar,BA.1.17 +220655,NC_045512.2,3037,C,T,PASS,24,0,24,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220655,NC_045512.2,5672,C,T,PASS,76,1,75,0.99,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220655,NC_045512.2,5924,G,A,PASS,36,0,36,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220655,NC_045512.2,76,T,A,ft,24,18,6,0.25,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.17 +220655,NC_045512.2,78,T,G,ft,24,18,6,0.25,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.17 +220655,NC_045512.2,8393,G,A,PASS,2376,3,2367,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220655,NC_045512.2,8652,T,C,PASS,2140,8,2132,1.0,orf1ab,missense_variant,c.8387T>C,p.Met2796Thr,p.M2796T,ivar,BA.1.17 +220656,NC_045512.2,11537,A,G,PASS,16,0,16,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220656,NC_045512.2,13195,T,C,PASS,831,0,831,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220656,NC_045512.2,13975,G,A,ft,11,8,3,0.27,orf1ab,stop_retained_variant,c.13710G>A,p.Ter4570Ter,p.*4570*,ivar,Unassigned +220656,NC_045512.2,14408,C,T,PASS,19,0,19,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220656,NC_045512.2,16409,C,T,PASS,31,22,9,0.29,orf1ab,stop_gained,c.16144C>T,p.Gln5382*,p.Q5382*,ivar,Unassigned +220656,NC_045512.2,21762,C,T,PASS,19,0,19,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220656,NC_045512.2,21764,ATACATG,A,PASS,19,19,17,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220656,NC_045512.2,21846,C,T,PASS,27,0,27,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220656,NC_045512.2,23403,A,G,PASS,428,0,428,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220656,NC_045512.2,23525,C,T,PASS,502,4,498,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220656,NC_045512.2,23599,T,G,PASS,215,0,215,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220656,NC_045512.2,23604,C,A,PASS,206,0,204,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220656,NC_045512.2,23948,G,T,PASS,45,0,45,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220656,NC_045512.2,24130,C,A,PASS,50,0,50,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220656,NC_045512.2,24469,T,A,PASS,80,0,80,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220656,NC_045512.2,24503,C,T,PASS,87,1,86,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220656,NC_045512.2,26577,C,G,PASS,16,0,16,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220656,NC_045512.2,26598,T,C,ft,14,10,4,0.29,M,missense_variant,c.76T>C,p.Phe26Leu,p.F26L,ivar,Unassigned +220656,NC_045512.2,26709,G,A,PASS,15,0,15,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220656,NC_045512.2,26712,T,C,PASS,15,5,10,0.67,M,missense_variant,c.190T>C,p.Cys64Arg,p.C64R,ivar,Unassigned +220656,NC_045512.2,27259,A,C,PASS,13,0,13,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220656,NC_045512.2,28271,A,T,PASS,136,0,134,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220656,NC_045512.2,28311,C,T,PASS,133,0,132,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220656,NC_045512.2,28361,GGAGAACGCA,G,PASS,98,97,63,0.64,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220656,NC_045512.2,28877,AG,TC,PASS,7768,7,7718,0.99,N,synonymous_variant,c.604_605delAGinsTC,p.203,p.203,ivar,Unassigned +220656,NC_045512.2,28881,GG,AA,PASS,7783,17,7748,1.0,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220656,NC_045512.2,28883,G,C,PASS,7807,2,7798,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220656,NC_045512.2,4920,C,T,ft,12,9,3,0.25,orf1ab,missense_variant,c.4655C>T,p.Thr1552Ile,p.T1552I,ivar,Unassigned +220656,NC_045512.2,8393,G,A,PASS,610,3,607,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220657,NC_045512.2,10647,C,T,PASS,19,0,19,1.0,orf1ab,missense_variant,c.10382C>T,p.Thr3461Ile,p.T3461I,ivar,Unassigned +220657,NC_045512.2,11537,A,G,PASS,34,0,34,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220657,NC_045512.2,13195,T,C,PASS,1613,0,1613,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220657,NC_045512.2,14408,C,T,PASS,22,0,22,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220657,NC_045512.2,15966,T,C,PASS,20,12,8,0.4,orf1ab,missense_variant,c.15701T>C,p.Val5234Ala,p.V5234A,ivar,Unassigned +220657,NC_045512.2,16064,A,G,PASS,14,0,14,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220657,NC_045512.2,21762,C,T,PASS,37,0,37,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220657,NC_045512.2,21764,ATACATG,A,PASS,38,37,30,0.79,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220657,NC_045512.2,21846,C,T,PASS,36,0,36,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220657,NC_045512.2,23403,A,G,PASS,510,2,508,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220657,NC_045512.2,23525,C,T,PASS,541,0,541,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220657,NC_045512.2,23599,T,G,PASS,264,0,264,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220657,NC_045512.2,23604,C,A,PASS,253,0,249,0.98,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220657,NC_045512.2,23948,G,T,PASS,44,0,44,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220657,NC_045512.2,24130,C,A,PASS,53,0,53,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220657,NC_045512.2,24424,A,T,PASS,26,0,26,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220657,NC_045512.2,24469,T,A,PASS,149,0,146,0.98,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220657,NC_045512.2,24503,C,T,PASS,170,4,166,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220657,NC_045512.2,26270,C,T,PASS,139,0,139,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220657,NC_045512.2,26577,C,G,PASS,11,0,11,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220657,NC_045512.2,26709,G,A,PASS,10,0,10,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220657,NC_045512.2,27259,A,C,PASS,44,0,43,0.98,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220657,NC_045512.2,27807,C,T,PASS,10,0,10,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220657,NC_045512.2,28271,A,T,PASS,737,0,737,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220657,NC_045512.2,28311,C,T,PASS,713,2,711,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220657,NC_045512.2,28361,GGAGAACGCA,G,PASS,527,525,343,0.65,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220657,NC_045512.2,28425,C,T,PASS,95,70,25,0.26,N,missense_variant,c.152C>T,p.Ser51Phe,p.S51F,ivar,Unassigned +220657,NC_045512.2,28881,GG,AA,PASS,9546,60,9475,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220657,NC_045512.2,28883,G,C,PASS,9546,3,9522,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220657,NC_045512.2,8393,G,A,PASS,1051,5,1046,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220658,NC_045512.2,11282,AGTTTGTCTG,A,PASS,20,20,18,0.9,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220658,NC_045512.2,11454,C,T,PASS,29,0,27,0.93,orf1ab,missense_variant,c.11189C>T,p.Ala3730Val,p.A3730V,ivar,Unassigned +220658,NC_045512.2,11537,A,G,PASS,22,0,22,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220658,NC_045512.2,13195,T,C,PASS,1324,7,1317,0.99,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220658,NC_045512.2,20936,C,T,ft,15,11,4,0.27,orf1ab,missense_variant,c.20671C>T,p.Arg6891Cys,p.R6891C,ivar,Unassigned +220658,NC_045512.2,21266,A,G,ft,12,8,4,0.33,orf1ab,missense_variant,c.21001A>G,p.Lys7001Glu,p.K7001E,ivar,Unassigned +220658,NC_045512.2,21762,C,T,PASS,27,0,27,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220658,NC_045512.2,21764,ATACATG,A,PASS,27,27,24,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220658,NC_045512.2,21846,C,T,PASS,19,0,19,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220658,NC_045512.2,22679,T,C,PASS,12,0,12,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,Unassigned +220658,NC_045512.2,22686,C,T,PASS,11,0,11,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,Unassigned +220658,NC_045512.2,23403,A,G,PASS,786,0,786,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220658,NC_045512.2,23525,C,T,PASS,772,0,772,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220658,NC_045512.2,23599,T,G,PASS,327,0,327,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220658,NC_045512.2,23604,C,A,PASS,323,0,323,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220658,NC_045512.2,23854,C,A,PASS,11,0,11,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220658,NC_045512.2,23948,G,T,PASS,89,0,89,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220658,NC_045512.2,24130,C,A,PASS,118,0,118,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220658,NC_045512.2,24424,A,T,PASS,36,0,36,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220658,NC_045512.2,24469,T,A,PASS,233,0,232,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220658,NC_045512.2,24503,C,T,PASS,264,11,253,0.96,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220658,NC_045512.2,25584,C,T,PASS,23,0,23,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220658,NC_045512.2,26270,C,T,PASS,231,2,229,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220658,NC_045512.2,26530,A,G,PASS,217,0,217,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220658,NC_045512.2,26577,C,G,PASS,261,0,261,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220658,NC_045512.2,26709,G,A,PASS,244,0,244,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220658,NC_045512.2,27259,A,C,PASS,60,0,60,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220658,NC_045512.2,27807,C,T,PASS,31,0,31,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220658,NC_045512.2,28271,A,T,PASS,725,0,723,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220658,NC_045512.2,28311,C,T,PASS,708,0,707,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220658,NC_045512.2,2832,A,G,PASS,13,0,13,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220658,NC_045512.2,28361,GGAGAACGCA,G,PASS,566,561,435,0.77,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220658,NC_045512.2,28881,GG,AA,PASS,10102,94,9997,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220658,NC_045512.2,28883,G,C,PASS,10039,10,10008,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220658,NC_045512.2,3037,C,T,PASS,19,0,19,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220658,NC_045512.2,5672,C,T,PASS,11,0,11,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220658,NC_045512.2,8393,G,A,PASS,885,0,885,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220659,NC_045512.2,10449,C,A,PASS,67,0,67,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220659,NC_045512.2,11282,AGTTTGTCTG,A,PASS,147,147,129,0.88,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220659,NC_045512.2,11537,A,G,PASS,153,0,153,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220659,NC_045512.2,13195,T,C,PASS,5113,5,5106,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220659,NC_045512.2,14408,C,T,PASS,87,0,87,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220659,NC_045512.2,18163,A,G,PASS,26,0,26,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220659,NC_045512.2,21762,C,T,PASS,130,2,128,0.98,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220659,NC_045512.2,21764,ATACATG,A,PASS,132,132,117,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220659,NC_045512.2,21846,C,T,PASS,126,0,126,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220659,NC_045512.2,22679,T,C,PASS,12,0,12,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220659,NC_045512.2,22686,C,T,PASS,12,0,12,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220659,NC_045512.2,23202,C,A,PASS,12,0,12,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220659,NC_045512.2,23403,A,G,PASS,3188,6,3182,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220659,NC_045512.2,23525,C,T,PASS,3075,2,3067,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220659,NC_045512.2,23599,T,G,PASS,1225,0,1223,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220659,NC_045512.2,23604,C,A,PASS,1183,1,1178,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220659,NC_045512.2,23854,C,A,PASS,44,0,44,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220659,NC_045512.2,23948,G,T,PASS,575,0,574,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220659,NC_045512.2,24130,C,A,PASS,852,0,845,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220659,NC_045512.2,24424,A,T,PASS,87,1,85,0.98,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220659,NC_045512.2,24469,T,A,PASS,624,1,618,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220659,NC_045512.2,24503,C,T,PASS,757,12,745,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220659,NC_045512.2,25000,C,T,PASS,40,1,39,0.98,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220659,NC_045512.2,25584,C,T,PASS,88,0,88,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220659,NC_045512.2,26270,C,T,PASS,546,1,545,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220659,NC_045512.2,26530,A,G,PASS,449,2,447,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220659,NC_045512.2,26577,C,G,PASS,432,0,432,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220659,NC_045512.2,26709,G,A,PASS,479,2,477,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220659,NC_045512.2,27259,A,C,PASS,294,0,294,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220659,NC_045512.2,27807,C,T,PASS,180,1,179,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220659,NC_045512.2,27970,C,T,PASS,696,19,675,0.97,ORF8,missense_variant,c.77C>T,p.Thr26Ile,p.T26I,ivar,BA.1.17 +220659,NC_045512.2,28271,A,T,PASS,1877,3,1872,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220659,NC_045512.2,28311,C,T,PASS,1989,8,1980,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220659,NC_045512.2,2832,A,G,PASS,68,0,68,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220659,NC_045512.2,28361,GGAGAACGCA,G,PASS,1571,1561,969,0.62,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220659,NC_045512.2,28881,GG,AA,PASS,3757,52,3700,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220659,NC_045512.2,28883,G,C,PASS,3725,2,3716,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220659,NC_045512.2,3037,C,T,PASS,23,0,23,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220659,NC_045512.2,5672,C,T,PASS,91,0,91,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220659,NC_045512.2,5924,G,A,PASS,34,0,34,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220659,NC_045512.2,76,T,A,PASS,24,10,14,0.58,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.17 +220659,NC_045512.2,78,T,G,PASS,23,9,14,0.61,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.17 +220659,NC_045512.2,7984,T,C,PASS,934,1,933,1.0,orf1ab,synonymous_variant,c.7719T>C,p.Asp2573Asp,p.D2573D,ivar,BA.1.17 +220659,NC_045512.2,8393,G,A,PASS,3572,10,3552,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220659,NC_045512.2,999,C,T,PASS,2162,22,2138,0.99,orf1ab,missense_variant,c.734C>T,p.Ser245Phe,p.S245F,ivar,BA.1.17 +220660,NC_045512.2,10029,C,T,PASS,15,0,15,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.1 +220660,NC_045512.2,10449,C,A,PASS,59,0,59,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1 +220660,NC_045512.2,11282,AGTTTGTCTG,A,PASS,155,151,132,0.85,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1 +220660,NC_045512.2,11537,A,G,PASS,138,0,138,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1 +220660,NC_045512.2,13195,T,C,PASS,4902,11,4891,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1 +220660,NC_045512.2,13750,T,A,ft,15,11,4,0.27,orf1ab,missense_variant,c.13485T>A,p.Ser4495Arg,p.S4495R,ivar,BA.1.1 +220660,NC_045512.2,14408,C,T,PASS,103,0,103,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1 +220660,NC_045512.2,15336,T,C,PASS,11,2,9,0.82,orf1ab,missense_variant,c.15071T>C,p.Leu5024Ser,p.L5024S,ivar,BA.1.1 +220660,NC_045512.2,15359,G,A,PASS,14,7,7,0.5,orf1ab,missense_variant,c.15094G>A,p.Ala5032Thr,p.A5032T,ivar,BA.1.1 +220660,NC_045512.2,16744,G,A,PASS,344,0,344,1.0,orf1ab,synonymous_variant,c.16479G>A,p.Leu5493Leu,p.L5493L,ivar,BA.1.1 +220660,NC_045512.2,18163,A,G,PASS,20,0,20,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.1 +220660,NC_045512.2,21762,C,T,PASS,186,0,186,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1 +220660,NC_045512.2,21764,ATACATG,A,PASS,190,187,174,0.92,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1 +220660,NC_045512.2,21846,C,T,PASS,164,0,164,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1 +220660,NC_045512.2,22578,G,A,PASS,13,0,13,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.1 +220660,NC_045512.2,23202,C,A,PASS,11,0,11,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1 +220660,NC_045512.2,23403,A,G,PASS,3655,3,3650,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1 +220660,NC_045512.2,23525,C,T,PASS,3613,12,3596,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1 +220660,NC_045512.2,23599,T,G,PASS,1227,0,1225,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1 +220660,NC_045512.2,23604,C,A,PASS,1189,2,1175,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1 +220660,NC_045512.2,23854,C,A,PASS,22,0,22,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1 +220660,NC_045512.2,23948,G,T,PASS,242,0,239,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1 +220660,NC_045512.2,24130,C,A,PASS,408,0,407,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1 +220660,NC_045512.2,24424,A,T,PASS,95,0,95,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1 +220660,NC_045512.2,24469,T,A,PASS,525,0,521,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1 +220660,NC_045512.2,24503,C,T,PASS,600,6,592,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1 +220660,NC_045512.2,25000,C,T,PASS,48,0,48,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1 +220660,NC_045512.2,2500,A,G,PASS,21,0,21,1.0,orf1ab,synonymous_variant,c.2235A>G,p.Glu745Glu,p.E745E,ivar,BA.1.1 +220660,NC_045512.2,25584,C,T,PASS,60,0,60,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1 +220660,NC_045512.2,26270,C,T,PASS,318,1,317,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1 +220660,NC_045512.2,26530,A,G,PASS,309,0,309,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1 +220660,NC_045512.2,26577,C,G,PASS,350,0,350,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1 +220660,NC_045512.2,26709,G,A,PASS,356,3,353,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1 +220660,NC_045512.2,27259,A,C,PASS,249,0,249,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1 +220660,NC_045512.2,27807,C,T,PASS,129,1,128,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1 +220660,NC_045512.2,28271,A,T,PASS,1073,0,1070,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1 +220660,NC_045512.2,28311,C,T,PASS,1105,5,1097,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1 +220660,NC_045512.2,2832,A,G,PASS,32,0,32,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1 +220660,NC_045512.2,28361,GGAGAACGCA,G,PASS,776,768,491,0.63,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1 +220660,NC_045512.2,28881,GG,AA,PASS,3182,53,3121,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1 +220660,NC_045512.2,28883,G,C,PASS,3146,1,3138,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1 +220660,NC_045512.2,3037,C,T,PASS,27,0,27,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1 +220660,NC_045512.2,3264,C,T,PASS,1132,6,1125,0.99,orf1ab,missense_variant,c.2999C>T,p.Thr1000Ile,p.T1000I,ivar,BA.1.1 +220660,NC_045512.2,5386,T,G,PASS,10,0,10,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.1 +220660,NC_045512.2,5512,C,T,ft,15,11,4,0.27,orf1ab,synonymous_variant,c.5247C>T,p.Asn1749Asn,p.N1749N,ivar,BA.1.1 +220660,NC_045512.2,76,T,A,PASS,36,24,12,0.33,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.1 +220660,NC_045512.2,78,T,G,PASS,36,24,12,0.33,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.1 +220660,NC_045512.2,8393,G,A,PASS,3635,10,3623,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1 +220661,NC_045512.2,10029,C,T,PASS,28,2,26,0.93,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.1 +220661,NC_045512.2,10449,C,A,PASS,83,0,83,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1 +220661,NC_045512.2,11282,AGTTTGTCTG,A,PASS,268,262,243,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1 +220661,NC_045512.2,11537,A,G,PASS,203,0,203,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1 +220661,NC_045512.2,13195,T,C,PASS,7311,6,7305,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1 +220661,NC_045512.2,13678,T,C,PASS,10,0,10,1.0,orf1ab,synonymous_variant,c.13413T>C,p.Thr4471Thr,p.T4471T,ivar,BA.1.1 +220661,NC_045512.2,14408,C,T,PASS,157,0,157,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1 +220661,NC_045512.2,18163,A,G,PASS,14,1,13,0.93,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.1 +220661,NC_045512.2,20081,C,T,ft,11,7,4,0.36,orf1ab,synonymous_variant,c.19816C>T,p.Leu6606Leu,p.L6606L,ivar,BA.1.1 +220661,NC_045512.2,20085,AG,A,ft,12,12,4,0.33,orf1ab,frameshift_variant,c.19822delG,p.Val6608fs,p.V6608fs,ivar,BA.1.1 +220661,NC_045512.2,21595,C,T,PASS,626,3,621,0.99,S,synonymous_variant,c.33C>T,p.Val11Val,p.V11V,ivar,BA.1.1 +220661,NC_045512.2,21762,C,T,PASS,151,2,149,0.99,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1 +220661,NC_045512.2,21764,ATACATG,A,PASS,152,151,145,0.95,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1 +220661,NC_045512.2,21846,C,T,PASS,232,1,231,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1 +220661,NC_045512.2,22992,G,A,PASS,13,0,13,1.0,S,missense_variant,c.1430G>A,p.Ser477Asn,p.S477N,ivar,BA.1.1 +220661,NC_045512.2,22995,C,A,PASS,13,0,13,1.0,S,missense_variant,c.1433C>A,p.Thr478Lys,p.T478K,ivar,BA.1.1 +220661,NC_045512.2,23013,A,C,PASS,11,0,11,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.1 +220661,NC_045512.2,23040,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.1 +220661,NC_045512.2,23048,G,A,PASS,11,0,11,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.1 +220661,NC_045512.2,23055,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.1 +220661,NC_045512.2,23063,A,T,PASS,11,0,11,1.0,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.1 +220661,NC_045512.2,23202,C,A,PASS,23,0,23,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1 +220661,NC_045512.2,23403,A,G,PASS,3174,1,3172,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1 +220661,NC_045512.2,23525,C,T,PASS,3000,2,2995,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1 +220661,NC_045512.2,23599,T,G,PASS,1121,0,1121,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1 +220661,NC_045512.2,23604,C,A,PASS,1072,0,1066,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1 +220661,NC_045512.2,23854,C,A,PASS,29,0,29,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1 +220661,NC_045512.2,23948,G,T,PASS,427,0,427,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1 +220661,NC_045512.2,24130,C,A,PASS,638,0,636,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1 +220661,NC_045512.2,24424,A,T,PASS,151,2,149,0.99,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1 +220661,NC_045512.2,24469,T,A,PASS,857,0,855,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1 +220661,NC_045512.2,24503,C,T,PASS,1013,12,1001,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1 +220661,NC_045512.2,2470,C,T,PASS,13,0,13,1.0,orf1ab,synonymous_variant,c.2205C>T,p.Ala735Ala,p.A735A,ivar,BA.1.1 +220661,NC_045512.2,25000,C,T,PASS,59,2,57,0.97,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1 +220661,NC_045512.2,25584,C,T,PASS,70,0,70,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1 +220661,NC_045512.2,26270,C,T,PASS,516,0,516,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1 +220661,NC_045512.2,26530,A,G,PASS,322,0,322,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1 +220661,NC_045512.2,26577,C,G,PASS,284,0,283,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1 +220661,NC_045512.2,26709,G,A,PASS,337,7,330,0.98,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1 +220661,NC_045512.2,27259,A,C,PASS,178,0,178,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1 +220661,NC_045512.2,27807,C,T,PASS,70,0,70,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1 +220661,NC_045512.2,28271,A,T,PASS,1288,0,1287,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1 +220661,NC_045512.2,28311,C,T,PASS,1327,5,1322,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1 +220661,NC_045512.2,2832,A,G,PASS,29,0,29,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1 +220661,NC_045512.2,28361,GGAGAACGCA,G,PASS,988,984,611,0.62,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1 +220661,NC_045512.2,28881,GG,AA,PASS,4858,62,4792,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1 +220661,NC_045512.2,28883,G,C,PASS,4828,1,4820,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1 +220661,NC_045512.2,3037,C,T,PASS,31,0,31,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1 +220661,NC_045512.2,8393,G,A,PASS,2995,5,2985,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1 +220663,NC_045512.2,11537,A,G,PASS,11,0,11,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220663,NC_045512.2,12864,A,C,PASS,17,0,17,1.0,orf1ab,missense_variant,c.12599A>C,p.Asp4200Ala,p.D4200A,ivar,Unassigned +220663,NC_045512.2,1312,G,T,PASS,475,0,473,1.0,orf1ab,missense_variant,c.1047G>T,p.Leu349Phe,p.L349F,ivar,Unassigned +220663,NC_045512.2,13195,T,C,PASS,259,0,259,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220663,NC_045512.2,18622,A,G,ft,16,11,5,0.31,orf1ab,synonymous_variant,c.18357A>G,p.Leu6119Leu,p.L6119L,ivar,Unassigned +220663,NC_045512.2,22578,G,A,PASS,10,0,10,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,Unassigned +220663,NC_045512.2,23403,A,G,PASS,89,0,89,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220663,NC_045512.2,23525,C,T,PASS,106,0,106,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220663,NC_045512.2,23599,T,G,PASS,65,0,65,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220663,NC_045512.2,23604,C,A,PASS,61,0,61,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220663,NC_045512.2,23948,G,T,PASS,13,0,13,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220663,NC_045512.2,24130,C,A,PASS,37,0,37,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220663,NC_045512.2,24424,A,T,PASS,61,0,61,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220663,NC_045512.2,24469,T,A,PASS,139,0,139,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220663,NC_045512.2,24503,C,T,PASS,158,0,158,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220663,NC_045512.2,26270,C,T,PASS,479,2,477,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220663,NC_045512.2,26530,A,G,PASS,156,0,156,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220663,NC_045512.2,26577,C,G,PASS,120,0,120,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220663,NC_045512.2,26709,G,A,PASS,166,2,163,0.98,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220663,NC_045512.2,27259,A,C,PASS,23,0,23,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220663,NC_045512.2,27274,T,A,PASS,24,17,7,0.29,ORF6,missense_variant,c.73T>A,p.Ser25Thr,p.S25T,ivar,Unassigned +220663,NC_045512.2,27476,C,T,PASS,147,21,126,0.86,ORF7a,missense_variant,c.83C>T,p.Thr28Ile,p.T28I,ivar,Unassigned +220663,NC_045512.2,28271,A,T,PASS,150,0,150,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220663,NC_045512.2,28311,C,T,PASS,159,2,157,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220663,NC_045512.2,28361,GGAGAACGCA,G,PASS,171,171,136,0.8,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220663,NC_045512.2,28881,GG,AA,PASS,9147,39,9102,1.0,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220663,NC_045512.2,28883,G,C,PASS,9138,1,9105,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220663,NC_045512.2,29301,A,G,PASS,1729,4,1725,1.0,N,missense_variant,c.1028A>G,p.Asp343Gly,p.D343G,ivar,Unassigned +220663,NC_045512.2,3760,T,A,PASS,316,215,97,0.31,orf1ab,stop_gained,c.3495T>A,p.Cys1165*,p.C1165*,ivar,Unassigned +220663,NC_045512.2,8393,G,A,PASS,223,0,223,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220664,NC_045512.2,10029,C,T,PASS,25,0,25,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.1.1 +220664,NC_045512.2,10449,C,A,PASS,112,0,112,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1.1 +220664,NC_045512.2,11282,AGTTTGTCTG,A,PASS,302,300,269,0.89,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1.1 +220664,NC_045512.2,11537,A,G,PASS,300,0,300,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1.1 +220664,NC_045512.2,13195,T,C,PASS,5521,11,5501,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1.1 +220664,NC_045512.2,14408,C,T,PASS,214,4,210,0.98,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1.1 +220664,NC_045512.2,15336,T,C,PASS,11,5,6,0.55,orf1ab,missense_variant,c.15071T>C,p.Leu5024Ser,p.L5024S,ivar,BA.1.1.1 +220664,NC_045512.2,15359,G,A,PASS,13,6,7,0.54,orf1ab,missense_variant,c.15094G>A,p.Ala5032Thr,p.A5032T,ivar,BA.1.1.1 +220664,NC_045512.2,16064,A,G,PASS,251,1,250,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1.1.1 +220664,NC_045512.2,18163,A,G,PASS,24,0,24,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.1.1 +220664,NC_045512.2,21762,C,T,PASS,261,0,261,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1.1 +220664,NC_045512.2,21764,ATACATG,A,PASS,266,261,232,0.87,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1.1 +220664,NC_045512.2,21846,C,T,PASS,297,0,297,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1.1 +220664,NC_045512.2,22578,G,A,PASS,12,0,12,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.1.1 +220664,NC_045512.2,22599,G,A,PASS,13,0,13,1.0,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,BA.1.1.1 +220664,NC_045512.2,22673,TC,CT,PASS,11,0,11,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.1.1 +220664,NC_045512.2,22679,T,C,PASS,14,0,14,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.1.1 +220664,NC_045512.2,22686,C,T,PASS,12,0,12,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.1.1 +220664,NC_045512.2,22992,G,A,PASS,10,0,10,1.0,S,missense_variant,c.1430G>A,p.Ser477Asn,p.S477N,ivar,BA.1.1.1 +220664,NC_045512.2,23013,A,C,PASS,10,0,10,1.0,S,missense_variant,c.1451A>C,p.Glu484Ala,p.E484A,ivar,BA.1.1.1 +220664,NC_045512.2,23040,A,G,PASS,11,0,11,1.0,S,missense_variant,c.1478A>G,p.Gln493Arg,p.Q493R,ivar,BA.1.1.1 +220664,NC_045512.2,23048,G,A,PASS,15,0,15,1.0,S,missense_variant,c.1486G>A,p.Gly496Ser,p.G496S,ivar,BA.1.1.1 +220664,NC_045512.2,23055,A,G,PASS,15,0,15,1.0,S,missense_variant,c.1493A>G,p.Gln498Arg,p.Q498R,ivar,BA.1.1.1 +220664,NC_045512.2,23063,A,T,PASS,16,0,14,0.88,S,missense_variant,c.1501A>T,p.Asn501Tyr,p.N501Y,ivar,BA.1.1.1 +220664,NC_045512.2,23075,T,C,PASS,17,0,17,1.0,S,missense_variant,c.1513T>C,p.Tyr505His,p.Y505H,ivar,BA.1.1.1 +220664,NC_045512.2,23202,C,A,PASS,25,0,25,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1.1 +220664,NC_045512.2,23403,A,G,PASS,2654,0,2654,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1.1 +220664,NC_045512.2,23525,C,T,PASS,2542,8,2530,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1.1 +220664,NC_045512.2,23599,T,G,PASS,936,0,935,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1.1 +220664,NC_045512.2,23604,C,A,PASS,896,0,889,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1.1 +220664,NC_045512.2,23854,C,A,PASS,55,0,54,0.98,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1.1 +220664,NC_045512.2,23948,G,T,PASS,895,0,891,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1.1 +220664,NC_045512.2,24130,C,A,PASS,1211,0,1198,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1.1 +220664,NC_045512.2,24424,A,T,PASS,268,0,268,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1.1 +220664,NC_045512.2,24469,T,A,PASS,1053,0,1053,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1.1 +220664,NC_045512.2,24503,C,T,PASS,1182,13,1169,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1.1 +220664,NC_045512.2,25000,C,T,PASS,80,0,80,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1.1 +220664,NC_045512.2,25584,C,T,PASS,152,0,152,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1.1 +220664,NC_045512.2,26270,C,T,PASS,636,2,634,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1.1 +220664,NC_045512.2,26530,A,G,PASS,177,0,177,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1.1 +220664,NC_045512.2,26577,C,G,PASS,203,0,203,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1.1 +220664,NC_045512.2,26709,G,A,PASS,205,0,205,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1.1 +220664,NC_045512.2,27259,A,C,PASS,346,0,340,0.98,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1.1 +220664,NC_045512.2,27807,C,T,PASS,64,0,64,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1.1 +220664,NC_045512.2,28271,A,T,PASS,1831,9,1819,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1.1 +220664,NC_045512.2,28311,C,T,PASS,1835,8,1827,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1.1 +220664,NC_045512.2,2832,A,G,PASS,167,1,166,0.99,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1.1 +220664,NC_045512.2,28361,GGAGAACGCA,G,PASS,1291,1282,743,0.58,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1.1 +220664,NC_045512.2,28881,GG,AA,PASS,2695,46,2646,0.98,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1.1 +220664,NC_045512.2,28883,G,C,PASS,2663,6,2655,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1.1 +220664,NC_045512.2,3037,C,T,PASS,50,0,50,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1.1 +220664,NC_045512.2,5386,T,G,PASS,15,0,15,1.0,orf1ab,synonymous_variant,c.5121T>G,p.Ala1707Ala,p.A1707A,ivar,BA.1.1.1 +220664,NC_045512.2,8393,G,A,PASS,2721,8,2712,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1.1 +220665,NC_045512.2,10449,C,A,PASS,14,0,14,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220665,NC_045512.2,11074,CT,C,ft,15,15,4,0.27,orf1ab,frameshift_variant,c.10817delT,p.Leu3606fs,p.L3606fs,ivar,BA.1.17 +220665,NC_045512.2,11186,T,C,PASS,26,18,8,0.31,orf1ab,synonymous_variant,c.10921T>C,p.Leu3641Leu,p.L3641L,ivar,BA.1.17 +220665,NC_045512.2,11282,AGTTTGTCTG,A,PASS,49,49,41,0.84,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220665,NC_045512.2,11537,A,G,PASS,41,0,41,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220665,NC_045512.2,13195,T,C,PASS,2411,5,2405,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220665,NC_045512.2,14408,C,T,PASS,27,0,27,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220665,NC_045512.2,19162,G,T,PASS,872,0,868,1.0,orf1ab,synonymous_variant,c.18897G>T,p.Leu6299Leu,p.L6299L,ivar,BA.1.17 +220665,NC_045512.2,21762,C,T,PASS,52,0,52,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220665,NC_045512.2,21764,ATACATG,A,PASS,52,52,47,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220665,NC_045512.2,21846,C,T,PASS,44,0,44,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220665,NC_045512.2,23202,C,A,PASS,16,0,16,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220665,NC_045512.2,23235,A,G,PASS,19,12,7,0.37,S,missense_variant,c.1673A>G,p.Lys558Arg,p.K558R,ivar,BA.1.17 +220665,NC_045512.2,23403,A,G,PASS,1508,0,1508,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220665,NC_045512.2,23525,C,T,PASS,1423,6,1417,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220665,NC_045512.2,23599,T,G,PASS,648,0,648,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220665,NC_045512.2,23604,C,A,PASS,624,0,621,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220665,NC_045512.2,23948,G,T,PASS,71,0,71,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220665,NC_045512.2,24130,C,A,PASS,140,0,140,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220665,NC_045512.2,24424,A,T,PASS,37,0,37,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220665,NC_045512.2,24469,T,A,PASS,384,0,383,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220665,NC_045512.2,24503,C,T,PASS,449,11,438,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220665,NC_045512.2,25000,C,T,PASS,17,0,17,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220665,NC_045512.2,25584,C,T,PASS,35,0,35,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220665,NC_045512.2,26270,C,T,PASS,646,0,646,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220665,NC_045512.2,26530,A,G,PASS,242,0,242,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220665,NC_045512.2,26577,C,G,PASS,271,0,269,0.99,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220665,NC_045512.2,26709,G,A,PASS,262,1,261,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220665,NC_045512.2,27259,A,C,PASS,149,0,149,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220665,NC_045512.2,27670,G,T,PASS,32,0,32,1.0,ORF7a,missense_variant,c.277G>T,p.Val93Phe,p.V93F,ivar,BA.1.17 +220665,NC_045512.2,27676,G,C,PASS,31,23,8,0.26,ORF7a,missense_variant,c.283G>C,p.Glu95Gln,p.E95Q,ivar,BA.1.17 +220665,NC_045512.2,27807,C,T,PASS,55,0,55,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220665,NC_045512.2,28271,A,T,PASS,837,0,837,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220665,NC_045512.2,28311,C,T,PASS,921,0,919,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220665,NC_045512.2,2832,A,G,PASS,88,0,88,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220665,NC_045512.2,28361,GGAGAACGCA,G,PASS,702,694,428,0.61,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220665,NC_045512.2,28881,GG,AA,PASS,8854,85,8762,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220665,NC_045512.2,28883,G,C,PASS,8819,2,8808,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220665,NC_045512.2,3037,C,T,PASS,11,0,11,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220665,NC_045512.2,4087,T,C,ft,15,11,4,0.27,orf1ab,synonymous_variant,c.3822T>C,p.Ile1274Ile,p.I1274I,ivar,BA.1.17 +220665,NC_045512.2,5672,C,T,PASS,49,0,49,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220665,NC_045512.2,5924,G,A,PASS,20,0,20,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220665,NC_045512.2,76,T,A,PASS,17,5,12,0.71,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.17 +220665,NC_045512.2,78,T,G,PASS,17,5,12,0.71,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.17 +220665,NC_045512.2,8393,G,A,PASS,2963,3,2957,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220666,NC_045512.2,11267,GAT,G,PASS,28,24,10,0.36,orf1ab,frameshift_variant,c.11005_11006delAT,p.Met3669fs,p.M3669fs,ivar,B.1.617.2 +220666,NC_045512.2,11332,A,G,PASS,31,0,31,1.0,orf1ab,synonymous_variant,c.11067A>G,p.Val3689Val,p.V3689V,ivar,B.1.617.2 +220666,NC_045512.2,11387,G,A,ft,14,10,4,0.29,orf1ab,missense_variant,c.11122G>A,p.Val3708Met,p.V3708M,ivar,B.1.617.2 +220666,NC_045512.2,13432,C,T,PASS,486,0,486,1.0,orf1ab,synonymous_variant,c.13167C>T,p.Pro4389Pro,p.P4389P,ivar,B.1.617.2 +220666,NC_045512.2,21456,A,G,ft,10,7,3,0.3,orf1ab,missense_variant,c.21191A>G,p.Lys7064Arg,p.K7064R,ivar,B.1.617.2 +220666,NC_045512.2,21618,C,G,PASS,18,0,18,1.0,S,missense_variant,c.56C>G,p.Thr19Arg,p.T19R,ivar,B.1.617.2 +220666,NC_045512.2,23403,A,G,PASS,319,0,319,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,B.1.617.2 +220666,NC_045512.2,23604,C,G,PASS,103,0,103,1.0,S,missense_variant,c.2042C>G,p.Pro681Arg,p.P681R,ivar,B.1.617.2 +220666,NC_045512.2,26767,T,C,PASS,24,0,24,1.0,M,missense_variant,c.245T>C,p.Ile82Thr,p.I82T,ivar,B.1.617.2 +220666,NC_045512.2,27874,C,T,PASS,13,0,13,1.0,ORF7b,missense_variant,c.119C>T,p.Thr40Ile,p.T40I,ivar,B.1.617.2 +220666,NC_045512.2,27916,G,A,PASS,12,0,12,1.0,ORF8,missense_variant,c.23G>A,p.Gly8Glu,p.G8E,ivar,B.1.617.2 +220666,NC_045512.2,27943,A,G,PASS,10,3,7,0.7,ORF8,missense_variant,c.50A>G,p.His17Arg,p.H17R,ivar,B.1.617.2 +220666,NC_045512.2,28055,A,G,PASS,15,8,7,0.47,ORF8,synonymous_variant,c.162A>G,p.Ser54Ser,p.S54S,ivar,B.1.617.2 +220666,NC_045512.2,28089,GGTTCTA,G,PASS,12,12,12,1.0,ORF8,disruptive_inframe_deletion,c.197_202delGTTCTA,p.Gly66_Lys68delinsGlu,p.G66_K68delinsE,ivar,B.1.617.2 +220666,NC_045512.2,28179,G,T,PASS,18,0,18,1.0,ORF8,missense_variant,c.286G>T,p.Gly96Cys,p.G96C,ivar,B.1.617.2 +220666,NC_045512.2,28247,AGATTTC,A,PASS,90,90,86,0.96,ORF8,conservative_inframe_deletion,c.355_360delGATTTC,p.Asp119_Phe120del,p.D119_F120del,ivar,B.1.617.2 +220666,NC_045512.2,28270,TA,T,PASS,118,118,105,0.89,N,upstream_gene_variant,c.-3delA,.,.,ivar,B.1.617.2 +220666,NC_045512.2,28367,C,T,PASS,96,0,96,1.0,N,missense_variant,c.94C>T,p.Arg32Cys,p.R32C,ivar,B.1.617.2 +220666,NC_045512.2,28378,G,T,PASS,78,0,78,1.0,N,synonymous_variant,c.105G>T,p.Ala35Ala,p.A35A,ivar,B.1.617.2 +220666,NC_045512.2,28461,A,G,PASS,16,0,16,1.0,N,missense_variant,c.188A>G,p.Asp63Gly,p.D63G,ivar,B.1.617.2 +220666,NC_045512.2,28881,G,T,PASS,5069,16,5029,0.99,N,missense_variant,c.608G>T,p.Arg203Met,p.R203M,ivar,B.1.617.2 +220666,NC_045512.2,28916,G,T,PASS,56,0,56,1.0,N,missense_variant,c.643G>T,p.Gly215Cys,p.G215C,ivar,B.1.617.2 +220666,NC_045512.2,29402,G,T,PASS,1865,0,1860,1.0,N,missense_variant,c.1129G>T,p.Asp377Tyr,p.D377Y,ivar,B.1.617.2 +220666,NC_045512.2,3903,C,T,PASS,82,0,82,1.0,orf1ab,missense_variant,c.3638C>T,p.Pro1213Leu,p.P1213L,ivar,B.1.617.2 +220666,NC_045512.2,3987,C,T,PASS,90,0,90,1.0,orf1ab,missense_variant,c.3722C>T,p.Thr1241Ile,p.T1241I,ivar,B.1.617.2 +220667,NC_045512.2,10029,C,T,PASS,30,0,30,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220667,NC_045512.2,10449,C,A,PASS,76,0,74,0.97,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220667,NC_045512.2,11282,AGTTTGTCTG,A,PASS,206,202,182,0.88,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220667,NC_045512.2,11537,A,G,PASS,96,0,96,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220667,NC_045512.2,13195,T,C,PASS,3626,4,3622,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220667,NC_045512.2,14408,C,T,PASS,117,0,117,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220667,NC_045512.2,18163,A,G,PASS,13,0,13,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220667,NC_045512.2,21762,C,T,PASS,157,2,155,0.99,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220667,NC_045512.2,21764,ATACATG,A,PASS,159,157,139,0.87,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220667,NC_045512.2,21846,C,T,PASS,153,1,152,0.99,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220667,NC_045512.2,21,C,T,PASS,10,0,10,1.0,orf1ab,upstream_gene_variant,c.-245C>T,.,.,ivar,BA.1.17 +220667,NC_045512.2,22578,G,A,PASS,18,0,18,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220667,NC_045512.2,22673,TC,CT,PASS,10,0,10,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220667,NC_045512.2,22679,T,C,PASS,20,0,20,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220667,NC_045512.2,22686,C,T,PASS,20,0,20,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220667,NC_045512.2,23202,C,A,PASS,16,0,16,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220667,NC_045512.2,23403,A,G,PASS,3381,6,3375,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220667,NC_045512.2,23525,C,T,PASS,3427,9,3416,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220667,NC_045512.2,23599,T,G,PASS,1504,0,1502,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220667,NC_045512.2,23604,C,A,PASS,1436,0,1431,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220667,NC_045512.2,23854,C,A,PASS,19,0,19,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220667,NC_045512.2,23948,G,T,PASS,846,0,845,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220667,NC_045512.2,24130,C,A,PASS,1170,0,1164,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220667,NC_045512.2,24424,A,T,PASS,128,2,124,0.97,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220667,NC_045512.2,24469,T,A,PASS,798,2,795,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220667,NC_045512.2,24503,C,T,PASS,930,23,905,0.97,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220667,NC_045512.2,25000,C,T,PASS,67,0,67,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220667,NC_045512.2,25584,C,T,PASS,100,2,98,0.98,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220667,NC_045512.2,26270,C,T,PASS,1358,2,1355,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220667,NC_045512.2,26361,G,A,ft,11,8,3,0.27,E,synonymous_variant,c.117G>A,p.Leu39Leu,p.L39L,ivar,BA.1.17 +220667,NC_045512.2,26530,A,G,PASS,438,0,436,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220667,NC_045512.2,26577,C,G,PASS,513,0,513,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220667,NC_045512.2,26709,G,A,PASS,473,1,472,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220667,NC_045512.2,27259,A,C,PASS,618,0,618,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220667,NC_045512.2,27384,T,C,PASS,764,0,764,1.0,ORF6,synonymous_variant,c.183T>C,p.Asp61Asp,p.D61D,ivar,BA.1.17 +220667,NC_045512.2,27807,C,T,PASS,112,0,112,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220667,NC_045512.2,28271,A,T,PASS,1976,8,1963,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220667,NC_045512.2,28311,C,T,PASS,2173,5,2164,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220667,NC_045512.2,2832,A,G,PASS,80,0,80,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220667,NC_045512.2,28361,GGAGAACGCA,G,PASS,1572,1560,679,0.43,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220667,NC_045512.2,28881,GG,AA,PASS,3593,32,3558,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220667,NC_045512.2,28883,G,C,PASS,3570,4,3559,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220667,NC_045512.2,3037,C,T,PASS,19,0,19,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220667,NC_045512.2,3040,C,T,PASS,19,0,19,1.0,orf1ab,synonymous_variant,c.2775C>T,p.Tyr925Tyr,p.Y925Y,ivar,BA.1.17 +220667,NC_045512.2,5672,C,T,PASS,165,0,165,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220667,NC_045512.2,5924,G,A,PASS,77,0,77,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220667,NC_045512.2,76,T,A,PASS,45,9,36,0.8,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.17 +220667,NC_045512.2,78,T,G,PASS,45,9,36,0.8,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.17 +220667,NC_045512.2,7984,T,C,PASS,627,0,627,1.0,orf1ab,synonymous_variant,c.7719T>C,p.Asp2573Asp,p.D2573D,ivar,BA.1.17 +220667,NC_045512.2,8393,G,A,PASS,4302,15,4285,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220668,NC_045512.2,10449,C,A,PASS,11,0,11,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220668,NC_045512.2,11282,AGTTTGTCTG,A,PASS,33,33,29,0.88,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220668,NC_045512.2,13195,T,C,PASS,418,0,418,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220668,NC_045512.2,206,C,A,PASS,20,13,7,0.35,orf1ab,upstream_gene_variant,c.-60C>A,.,.,ivar,Unassigned +220668,NC_045512.2,21762,C,T,PASS,11,0,11,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220668,NC_045512.2,21764,ATACATG,A,ft,11,11,7,0.64,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220668,NC_045512.2,21846,C,T,PASS,16,0,16,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220668,NC_045512.2,23403,A,G,PASS,734,0,734,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220668,NC_045512.2,23525,C,T,PASS,748,0,748,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220668,NC_045512.2,23599,T,G,PASS,379,0,377,0.99,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220668,NC_045512.2,23604,C,A,PASS,358,0,358,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220668,NC_045512.2,23948,G,T,PASS,25,0,25,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220668,NC_045512.2,24130,C,A,PASS,19,0,19,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220668,NC_045512.2,24424,A,T,PASS,30,0,30,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220668,NC_045512.2,24469,T,A,PASS,94,0,94,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220668,NC_045512.2,24503,C,T,PASS,95,0,95,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220668,NC_045512.2,25000,C,T,PASS,17,0,17,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220668,NC_045512.2,25584,C,T,PASS,28,0,28,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220668,NC_045512.2,26270,C,T,PASS,475,2,473,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220668,NC_045512.2,26530,A,G,PASS,90,0,90,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220668,NC_045512.2,26577,C,G,PASS,87,0,87,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220668,NC_045512.2,26709,G,A,PASS,96,0,96,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220668,NC_045512.2,27259,A,C,PASS,68,0,68,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220668,NC_045512.2,27384,T,C,PASS,82,0,82,1.0,ORF6,synonymous_variant,c.183T>C,p.Asp61Asp,p.D61D,ivar,Unassigned +220668,NC_045512.2,27807,C,T,PASS,11,0,11,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220668,NC_045512.2,28271,A,T,PASS,260,2,258,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220668,NC_045512.2,28311,C,T,PASS,297,0,297,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220668,NC_045512.2,2832,A,G,PASS,10,0,10,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220668,NC_045512.2,28361,GGAGAACGCA,G,PASS,176,176,105,0.6,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220668,NC_045512.2,28881,GG,AA,PASS,8461,46,8410,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220668,NC_045512.2,28883,G,C,PASS,8448,1,8431,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220668,NC_045512.2,5672,C,T,PASS,10,0,10,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220668,NC_045512.2,5924,G,A,PASS,10,0,10,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220668,NC_045512.2,76,T,A,PASS,18,6,12,0.67,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220668,NC_045512.2,78,T,G,PASS,18,6,12,0.67,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220668,NC_045512.2,7984,T,C,PASS,85,0,85,1.0,orf1ab,synonymous_variant,c.7719T>C,p.Asp2573Asp,p.D2573D,ivar,Unassigned +220668,NC_045512.2,8393,G,A,PASS,1330,2,1325,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220669,NC_045512.2,10449,C,A,PASS,19,0,19,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220669,NC_045512.2,11282,AGTTTGTCTG,A,PASS,35,35,33,0.94,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220669,NC_045512.2,11537,A,G,PASS,38,0,38,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220669,NC_045512.2,13195,T,C,PASS,1499,4,1495,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220669,NC_045512.2,14408,C,T,PASS,27,0,27,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220669,NC_045512.2,14466,T,C,PASS,28,20,8,0.29,orf1ab,missense_variant,c.14201T>C,p.Phe4734Ser,p.F4734S,ivar,Unassigned +220669,NC_045512.2,14533,A,G,ft,16,12,4,0.25,orf1ab,synonymous_variant,c.14268A>G,p.Leu4756Leu,p.L4756L,ivar,Unassigned +220669,NC_045512.2,15572,A,G,PASS,2032,731,1301,0.64,orf1ab,missense_variant,c.15307A>G,p.Met5103Val,p.M5103V,ivar,Unassigned +220669,NC_045512.2,21762,C,T,PASS,37,0,37,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220669,NC_045512.2,21764,ATACATG,A,PASS,38,37,29,0.76,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220669,NC_045512.2,21846,C,T,PASS,45,0,45,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220669,NC_045512.2,22578,G,A,PASS,12,0,12,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,Unassigned +220669,NC_045512.2,23403,A,G,PASS,1002,0,1002,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220669,NC_045512.2,23525,C,T,PASS,1030,4,1026,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220669,NC_045512.2,23599,T,G,PASS,490,0,488,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220669,NC_045512.2,23604,C,A,PASS,473,0,471,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220669,NC_045512.2,23948,G,T,PASS,113,0,113,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220669,NC_045512.2,24130,C,A,PASS,208,0,204,0.98,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220669,NC_045512.2,24424,A,T,PASS,42,0,40,0.95,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220669,NC_045512.2,24469,T,A,PASS,239,0,239,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220669,NC_045512.2,24503,C,T,PASS,281,6,275,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220669,NC_045512.2,25000,C,T,PASS,27,0,27,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220669,NC_045512.2,25584,C,T,PASS,26,0,26,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220669,NC_045512.2,26270,C,T,PASS,1060,2,1056,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220669,NC_045512.2,26530,A,G,PASS,516,0,516,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220669,NC_045512.2,26577,C,G,PASS,508,1,505,0.99,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220669,NC_045512.2,26709,G,A,PASS,474,1,472,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220669,NC_045512.2,27259,A,C,PASS,247,0,247,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220669,NC_045512.2,27807,C,T,PASS,93,0,92,0.99,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220669,NC_045512.2,28271,A,T,PASS,1304,1,1302,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220669,NC_045512.2,28311,C,T,PASS,1395,0,1393,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220669,NC_045512.2,2832,A,G,PASS,11,0,11,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220669,NC_045512.2,28361,GGAGAACGCA,G,PASS,1092,1088,734,0.67,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220669,NC_045512.2,28363,A,T,PASS,16,10,6,0.38,N,synonymous_variant,c.90A>T,p.Gly30Gly,p.G30G,ivar,Unassigned +220669,NC_045512.2,28881,GG,AA,PASS,10404,75,10319,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220669,NC_045512.2,28883,G,C,PASS,10380,2,10364,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220669,NC_045512.2,5672,C,T,PASS,37,2,35,0.95,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220669,NC_045512.2,5924,G,A,PASS,19,0,19,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220669,NC_045512.2,76,T,A,PASS,44,7,37,0.84,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220669,NC_045512.2,8393,G,A,PASS,2083,5,2078,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220670,NC_045512.2,10449,C,A,PASS,39,0,38,0.97,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220670,NC_045512.2,11282,AGTTTGTCTG,A,PASS,101,101,83,0.82,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220670,NC_045512.2,11537,A,G,PASS,81,0,81,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220670,NC_045512.2,13195,T,C,PASS,4671,5,4664,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220670,NC_045512.2,14408,C,T,PASS,72,0,72,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220670,NC_045512.2,21762,C,T,PASS,121,0,121,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220670,NC_045512.2,21764,ATACATG,A,PASS,121,121,114,0.94,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220670,NC_045512.2,21846,C,T,PASS,117,0,117,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220670,NC_045512.2,22673,TC,CT,PASS,11,0,11,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220670,NC_045512.2,22679,T,C,PASS,11,0,11,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220670,NC_045512.2,22686,C,T,PASS,11,0,11,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220670,NC_045512.2,23202,C,A,PASS,14,0,14,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220670,NC_045512.2,23403,A,G,PASS,2094,0,2093,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220670,NC_045512.2,23525,C,T,PASS,2060,6,2053,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220670,NC_045512.2,23599,T,G,PASS,847,0,845,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220670,NC_045512.2,23604,C,A,PASS,817,0,815,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220670,NC_045512.2,23854,C,A,PASS,25,0,25,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220670,NC_045512.2,23948,G,T,PASS,328,0,327,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220670,NC_045512.2,24130,C,A,PASS,478,0,477,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220670,NC_045512.2,24424,A,T,PASS,93,0,93,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220670,NC_045512.2,24469,T,A,PASS,520,0,518,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220670,NC_045512.2,24503,C,T,PASS,628,4,624,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220670,NC_045512.2,25000,C,T,PASS,33,0,33,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220670,NC_045512.2,25584,C,T,PASS,58,0,58,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220670,NC_045512.2,26270,C,T,PASS,372,2,370,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220670,NC_045512.2,26530,A,G,PASS,296,0,296,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220670,NC_045512.2,26577,C,G,PASS,354,0,354,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220670,NC_045512.2,26709,G,A,PASS,361,0,361,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220670,NC_045512.2,27259,A,C,PASS,268,0,268,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220670,NC_045512.2,27632,G,A,PASS,42,28,14,0.33,ORF7a,missense_variant,c.239G>A,p.Arg80Lys,p.R80K,ivar,BA.1.17 +220670,NC_045512.2,27807,C,T,PASS,65,0,65,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220670,NC_045512.2,28253,CA,C,PASS,482,480,479,0.99,ORF8,frameshift_variant,c.361delA,p.Ile121fs,p.I121fs,ivar,BA.1.17 +220670,NC_045512.2,28271,A,T,PASS,569,0,566,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220670,NC_045512.2,28311,C,T,PASS,572,4,568,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220670,NC_045512.2,2832,A,G,PASS,78,0,78,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220670,NC_045512.2,28361,GGAGAACGCA,G,PASS,386,382,243,0.63,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220670,NC_045512.2,28881,GG,AA,PASS,7323,72,7246,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220670,NC_045512.2,28883,G,C,PASS,7288,2,7274,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220670,NC_045512.2,29399,G,T,PASS,4164,2,4135,0.99,N,missense_variant,c.1126G>T,p.Ala376Ser,p.A376S,ivar,BA.1.17 +220670,NC_045512.2,3037,C,T,PASS,14,0,14,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220670,NC_045512.2,5672,C,T,PASS,57,2,55,0.96,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220670,NC_045512.2,5924,G,A,PASS,19,0,19,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220670,NC_045512.2,8393,G,A,PASS,3256,13,3242,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220670,NC_045512.2,8652,T,C,PASS,2794,8,2786,1.0,orf1ab,missense_variant,c.8387T>C,p.Met2796Thr,p.M2796T,ivar,BA.1.17 +220671,NC_045512.2,10029,C,T,PASS,28,0,28,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,BA.1.17 +220671,NC_045512.2,10449,C,A,PASS,72,0,72,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.17 +220671,NC_045512.2,11282,AGTTTGTCTG,A,PASS,205,204,183,0.89,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.17 +220671,NC_045512.2,11537,A,G,PASS,98,0,98,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.17 +220671,NC_045512.2,12008,C,T,PASS,232,1,230,0.99,orf1ab,missense_variant,c.11743C>T,p.Leu3915Phe,p.L3915F,ivar,BA.1.17 +220671,NC_045512.2,13195,T,C,PASS,3794,10,3783,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.17 +220671,NC_045512.2,14408,C,T,PASS,62,1,61,0.98,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.17 +220671,NC_045512.2,18163,A,G,PASS,15,0,15,1.0,orf1ab,synonymous_variant,c.17898A>G,p.Thr5966Thr,p.T5966T,ivar,BA.1.17 +220671,NC_045512.2,21762,C,T,PASS,95,0,95,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.17 +220671,NC_045512.2,21764,ATACATG,A,PASS,97,95,87,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.17 +220671,NC_045512.2,21846,C,T,PASS,108,0,108,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.17 +220671,NC_045512.2,22578,G,A,PASS,23,0,23,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.17 +220671,NC_045512.2,22673,TC,CT,PASS,12,0,12,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.17 +220671,NC_045512.2,22679,T,C,PASS,20,0,20,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.17 +220671,NC_045512.2,22686,C,T,PASS,20,0,20,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.17 +220671,NC_045512.2,23202,C,A,PASS,22,0,20,0.91,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.17 +220671,NC_045512.2,23403,A,G,PASS,2394,0,2394,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.17 +220671,NC_045512.2,23525,C,T,PASS,2316,2,2309,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.17 +220671,NC_045512.2,23599,T,G,PASS,848,0,848,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.17 +220671,NC_045512.2,23604,C,A,PASS,798,0,798,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.17 +220671,NC_045512.2,23854,C,A,PASS,59,0,59,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.17 +220671,NC_045512.2,23948,G,T,PASS,554,0,553,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.17 +220671,NC_045512.2,24130,C,A,PASS,893,0,890,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.17 +220671,NC_045512.2,24424,A,T,PASS,239,0,239,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.17 +220671,NC_045512.2,24469,T,A,PASS,691,0,687,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.17 +220671,NC_045512.2,24503,C,T,PASS,756,12,744,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.17 +220671,NC_045512.2,25000,C,T,PASS,56,0,56,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.17 +220671,NC_045512.2,25584,C,T,PASS,115,1,113,0.98,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.17 +220671,NC_045512.2,26270,C,T,PASS,1039,7,1032,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.17 +220671,NC_045512.2,26530,A,G,PASS,611,1,610,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.17 +220671,NC_045512.2,26577,C,G,PASS,638,0,637,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.17 +220671,NC_045512.2,26709,G,A,PASS,596,5,588,0.99,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.17 +220671,NC_045512.2,27259,A,C,PASS,630,0,630,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.17 +220671,NC_045512.2,27670,G,T,PASS,211,0,211,1.0,ORF7a,missense_variant,c.277G>T,p.Val93Phe,p.V93F,ivar,BA.1.17 +220671,NC_045512.2,27807,C,T,PASS,253,4,248,0.98,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.17 +220671,NC_045512.2,28271,A,T,PASS,1608,3,1605,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.17 +220671,NC_045512.2,28311,C,T,PASS,1682,3,1673,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.17 +220671,NC_045512.2,2832,A,G,PASS,92,0,92,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.17 +220671,NC_045512.2,28361,GGAGAACGCA,G,PASS,1336,1327,801,0.6,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.17 +220671,NC_045512.2,28881,GG,AA,PASS,2721,26,2694,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.17 +220671,NC_045512.2,28883,G,C,PASS,2714,2,2706,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.17 +220671,NC_045512.2,3037,C,T,PASS,31,1,30,0.97,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.17 +220671,NC_045512.2,5672,C,T,PASS,183,0,183,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,BA.1.17 +220671,NC_045512.2,5924,G,A,PASS,61,0,61,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,BA.1.17 +220671,NC_045512.2,76,T,A,PASS,44,27,17,0.39,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,BA.1.17 +220671,NC_045512.2,78,T,G,PASS,44,27,17,0.39,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,BA.1.17 +220671,NC_045512.2,8393,G,A,PASS,3239,22,3215,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.17 +220672,NC_045512.2,10029,C,T,PASS,15,0,15,1.0,orf1ab,missense_variant,c.9764C>T,p.Thr3255Ile,p.T3255I,ivar,Unassigned +220672,NC_045512.2,11282,AGTTTGTCTG,A,PASS,48,48,44,0.92,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220672,NC_045512.2,11537,A,G,PASS,64,0,64,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220672,NC_045512.2,13195,T,C,PASS,1572,4,1568,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220672,NC_045512.2,14408,C,T,PASS,61,0,61,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220672,NC_045512.2,20776,A,G,PASS,29,0,29,1.0,orf1ab,synonymous_variant,c.20511A>G,p.Ala6837Ala,p.A6837A,ivar,Unassigned +220672,NC_045512.2,21762,C,T,PASS,41,0,41,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220672,NC_045512.2,21764,ATACATG,A,PASS,41,41,37,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220672,NC_045512.2,21846,C,T,PASS,53,0,53,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220672,NC_045512.2,21,C,T,PASS,10,0,10,1.0,orf1ab,upstream_gene_variant,c.-245C>T,.,.,ivar,Unassigned +220672,NC_045512.2,23323,T,C,PASS,173,2,171,0.99,S,synonymous_variant,c.1761T>C,p.Ile587Ile,p.I587I,ivar,Unassigned +220672,NC_045512.2,23403,A,G,PASS,197,0,197,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220672,NC_045512.2,23525,C,T,PASS,191,0,191,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220672,NC_045512.2,23599,T,G,PASS,81,0,81,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220672,NC_045512.2,23604,C,A,PASS,79,0,79,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220672,NC_045512.2,23948,G,T,PASS,46,0,46,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220672,NC_045512.2,24130,C,A,PASS,60,0,60,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220672,NC_045512.2,24424,A,T,PASS,82,0,82,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220672,NC_045512.2,24469,T,A,PASS,575,2,571,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220672,NC_045512.2,24503,C,T,PASS,689,5,684,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220672,NC_045512.2,25000,C,T,PASS,31,0,31,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220672,NC_045512.2,25584,C,T,PASS,27,2,25,0.93,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220672,NC_045512.2,26270,C,T,PASS,1197,0,1197,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220672,NC_045512.2,26530,A,G,PASS,248,0,248,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220672,NC_045512.2,26577,C,G,PASS,225,0,225,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220672,NC_045512.2,26709,G,A,PASS,228,0,228,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220672,NC_045512.2,27259,A,C,PASS,103,0,103,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220672,NC_045512.2,27807,C,T,PASS,18,0,18,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220672,NC_045512.2,28271,A,T,PASS,1324,4,1318,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220672,NC_045512.2,28311,C,T,PASS,1356,0,1352,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220672,NC_045512.2,2832,A,G,PASS,19,0,19,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220672,NC_045512.2,28361,GGAGAACGCA,G,PASS,932,929,481,0.52,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220672,NC_045512.2,28881,GG,AA,PASS,9896,69,9823,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220672,NC_045512.2,28883,G,C,PASS,9878,3,9869,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220672,NC_045512.2,3800,G,A,PASS,80,2,78,0.98,orf1ab,missense_variant,c.3535G>A,p.Asp1179Asn,p.D1179N,ivar,Unassigned +220672,NC_045512.2,514,TGTTATG,T,PASS,11985,11766,9796,0.82,orf1ab,conservative_inframe_deletion,c.253_258delATGGTT,p.Met85_Val86del,p.M85_V86del,ivar,Unassigned +220672,NC_045512.2,520,G,C,PASS,87,44,43,0.49,orf1ab,missense_variant,c.255G>C,p.Met85Ile,p.M85I,ivar,Unassigned +220672,NC_045512.2,76,T,A,PASS,11,3,8,0.73,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220672,NC_045512.2,78,T,G,PASS,11,3,8,0.73,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220672,NC_045512.2,8393,G,A,PASS,544,2,541,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220675,NC_045512.2,11537,A,G,PASS,17,0,17,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220675,NC_045512.2,13195,T,C,PASS,553,0,553,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220675,NC_045512.2,13908,T,C,ft,10,6,4,0.4,orf1ab,missense_variant,c.13643T>C,p.Ile4548Thr,p.I4548T,ivar,Unassigned +220675,NC_045512.2,14408,C,T,PASS,13,0,13,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220675,NC_045512.2,21762,C,T,PASS,16,0,16,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220675,NC_045512.2,21764,ATACATG,A,PASS,16,16,16,1.0,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220675,NC_045512.2,21846,C,T,PASS,30,0,30,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220675,NC_045512.2,23403,A,G,PASS,214,0,214,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220675,NC_045512.2,23525,C,T,PASS,221,3,218,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220675,NC_045512.2,23599,T,G,PASS,105,0,105,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220675,NC_045512.2,23604,C,A,PASS,103,0,103,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220675,NC_045512.2,23948,G,T,PASS,25,0,25,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220675,NC_045512.2,24130,C,A,PASS,50,0,50,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220675,NC_045512.2,24424,A,T,PASS,24,0,24,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220675,NC_045512.2,24469,T,A,PASS,131,1,130,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220675,NC_045512.2,24503,C,T,PASS,140,7,133,0.95,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220675,NC_045512.2,25000,C,T,PASS,12,0,12,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220675,NC_045512.2,25057,T,C,ft,13,8,5,0.38,S,synonymous_variant,c.3495T>C,p.Asp1165Asp,p.D1165D,ivar,Unassigned +220675,NC_045512.2,25584,C,T,PASS,24,2,22,0.92,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220675,NC_045512.2,26270,C,T,PASS,221,0,221,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220675,NC_045512.2,26530,A,G,PASS,72,0,72,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220675,NC_045512.2,26577,C,G,PASS,67,0,67,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220675,NC_045512.2,26709,G,A,PASS,75,2,73,0.97,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220675,NC_045512.2,27259,A,C,PASS,48,0,48,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220675,NC_045512.2,28271,A,T,PASS,187,0,187,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220675,NC_045512.2,28311,C,T,PASS,213,2,211,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220675,NC_045512.2,28361,GGAGAACGCA,G,PASS,185,185,129,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220675,NC_045512.2,28881,GG,AA,PASS,12043,60,11972,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220675,NC_045512.2,28883,G,C,PASS,12008,2,11991,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220675,NC_045512.2,5672,C,T,PASS,40,2,38,0.95,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220675,NC_045512.2,8393,G,A,PASS,307,0,307,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220675,NC_045512.2,8652,T,C,PASS,198,0,198,1.0,orf1ab,missense_variant,c.8387T>C,p.Met2796Thr,p.M2796T,ivar,Unassigned +220677,NC_045512.2,10449,C,A,PASS,19,0,19,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220677,NC_045512.2,10947,G,A,ft,10,7,3,0.3,orf1ab,missense_variant,c.10682G>A,p.Arg3561Lys,p.R3561K,ivar,Unassigned +220677,NC_045512.2,11282,AGTTTGTCTG,A,PASS,27,27,25,0.93,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220677,NC_045512.2,11537,A,G,PASS,43,0,43,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220677,NC_045512.2,12053,C,T,PASS,154,0,154,1.0,orf1ab,missense_variant,c.11788C>T,p.Leu3930Phe,p.L3930F,ivar,Unassigned +220677,NC_045512.2,13195,T,C,PASS,2302,4,2298,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220677,NC_045512.2,14408,C,T,PASS,27,0,27,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220677,NC_045512.2,21762,C,T,PASS,44,0,44,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220677,NC_045512.2,21764,ATACATG,A,PASS,44,44,39,0.89,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220677,NC_045512.2,21846,C,T,PASS,43,0,43,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220677,NC_045512.2,23403,A,G,PASS,1670,8,1662,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220677,NC_045512.2,23525,C,T,PASS,1615,3,1609,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220677,NC_045512.2,23599,T,G,PASS,575,0,575,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220677,NC_045512.2,23604,C,A,PASS,558,0,551,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220677,NC_045512.2,23948,G,T,PASS,147,0,147,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220677,NC_045512.2,24130,C,A,PASS,220,0,218,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220677,NC_045512.2,24424,A,T,PASS,24,0,24,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220677,NC_045512.2,24469,T,A,PASS,305,0,302,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220677,NC_045512.2,24503,C,T,PASS,348,7,341,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220677,NC_045512.2,25000,C,T,PASS,16,0,16,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220677,NC_045512.2,25032,A,G,ft,15,11,4,0.27,S,missense_variant,c.3470A>G,p.Lys1157Arg,p.K1157R,ivar,Unassigned +220677,NC_045512.2,25584,C,T,PASS,26,0,26,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220677,NC_045512.2,26270,C,T,PASS,328,2,326,0.99,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220677,NC_045512.2,26530,A,G,PASS,216,0,216,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220677,NC_045512.2,26577,C,G,PASS,231,0,230,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220677,NC_045512.2,26612,A,G,PASS,182,0,182,1.0,M,synonymous_variant,c.90A>G,p.Thr30Thr,p.T30T,ivar,Unassigned +220677,NC_045512.2,26709,G,A,PASS,223,0,223,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220677,NC_045512.2,27259,A,C,PASS,134,0,134,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220677,NC_045512.2,27670,G,T,PASS,60,0,60,1.0,ORF7a,missense_variant,c.277G>T,p.Val93Phe,p.V93F,ivar,Unassigned +220677,NC_045512.2,27807,C,T,PASS,87,0,87,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220677,NC_045512.2,28271,A,T,PASS,979,3,974,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220677,NC_045512.2,28311,C,T,PASS,971,0,967,1.0,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220677,NC_045512.2,2832,A,G,PASS,10,0,10,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220677,NC_045512.2,28361,GGAGAACGCA,G,PASS,710,708,439,0.62,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220677,NC_045512.2,28512,C,T,PASS,190,135,50,0.26,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,Unassigned +220677,NC_045512.2,28881,GG,AA,PASS,6996,77,6913,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220677,NC_045512.2,28883,G,C,PASS,6957,1,6945,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220677,NC_045512.2,3037,C,T,PASS,21,0,21,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220677,NC_045512.2,5672,C,T,PASS,18,0,18,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220677,NC_045512.2,5924,G,A,PASS,13,0,13,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220677,NC_045512.2,8393,G,A,PASS,2925,8,2915,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220678,NC_045512.2,10449,C,A,PASS,42,0,42,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1.1 +220678,NC_045512.2,11282,AGTTTGTCTG,A,PASS,99,99,86,0.87,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1.1 +220678,NC_045512.2,11537,A,G,PASS,164,0,164,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1.1 +220678,NC_045512.2,13195,T,C,PASS,5198,9,5188,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1.1 +220678,NC_045512.2,14408,C,T,PASS,150,0,150,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1.1 +220678,NC_045512.2,16064,A,G,PASS,125,0,125,1.0,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1.1.1 +220678,NC_045512.2,21762,C,T,PASS,162,0,162,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1.1 +220678,NC_045512.2,21764,ATACATG,A,PASS,164,160,140,0.85,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1.1 +220678,NC_045512.2,21846,C,T,PASS,187,0,187,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1.1 +220678,NC_045512.2,22578,G,A,PASS,14,0,14,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.1.1 +220678,NC_045512.2,22599,G,A,PASS,16,0,16,1.0,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,BA.1.1.1 +220678,NC_045512.2,22673,TC,CT,PASS,14,0,14,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.1.1 +220678,NC_045512.2,22679,T,C,PASS,16,0,16,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.1.1 +220678,NC_045512.2,22686,C,T,PASS,15,0,15,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.1.1 +220678,NC_045512.2,23202,C,A,PASS,21,0,21,1.0,S,missense_variant,c.1640C>A,p.Thr547Lys,p.T547K,ivar,BA.1.1.1 +220678,NC_045512.2,23403,A,G,PASS,2029,0,2029,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1.1 +220678,NC_045512.2,23525,C,T,PASS,1991,10,1981,0.99,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1.1 +220678,NC_045512.2,23599,T,G,PASS,763,2,761,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1.1 +220678,NC_045512.2,23604,C,A,PASS,732,0,724,0.99,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1.1 +220678,NC_045512.2,23854,C,A,PASS,21,0,21,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1.1 +220678,NC_045512.2,23948,G,T,PASS,280,0,278,0.99,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1.1 +220678,NC_045512.2,24130,C,A,PASS,345,0,343,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1.1 +220678,NC_045512.2,24424,A,T,PASS,95,0,95,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1.1 +220678,NC_045512.2,24469,T,A,PASS,708,0,708,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1.1 +220678,NC_045512.2,24503,C,T,PASS,855,8,845,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1.1 +220678,NC_045512.2,25000,C,T,PASS,37,0,37,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1.1 +220678,NC_045512.2,25584,C,T,PASS,45,0,45,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1.1 +220678,NC_045512.2,26270,C,T,PASS,305,0,305,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1.1 +220678,NC_045512.2,26530,A,G,PASS,272,0,272,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1.1 +220678,NC_045512.2,26577,C,G,PASS,290,0,290,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1.1 +220678,NC_045512.2,26709,G,A,PASS,255,4,251,0.98,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1.1 +220678,NC_045512.2,27259,A,C,PASS,129,0,129,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1.1 +220678,NC_045512.2,27807,C,T,PASS,51,0,51,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1.1 +220678,NC_045512.2,28271,A,T,PASS,1166,5,1159,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1.1 +220678,NC_045512.2,28311,C,T,PASS,1184,7,1171,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1.1 +220678,NC_045512.2,2832,A,G,PASS,89,0,89,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1.1 +220678,NC_045512.2,28361,GGAGAACGCA,G,PASS,910,906,488,0.54,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1.1 +220678,NC_045512.2,28881,GG,AA,PASS,5630,77,5551,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1.1 +220678,NC_045512.2,28883,G,C,PASS,5585,3,5569,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1.1 +220678,NC_045512.2,3037,C,T,PASS,18,0,18,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1.1 +220678,NC_045512.2,8014,T,C,PASS,492,17,475,0.97,orf1ab,synonymous_variant,c.7749T>C,p.Ser2583Ser,p.S2583S,ivar,BA.1.1.1 +220678,NC_045512.2,8393,G,A,PASS,2422,5,2413,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1.1 +220679,NC_045512.2,10449,C,A,PASS,14,0,14,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220679,NC_045512.2,11282,AGTTTGTCTG,A,PASS,72,69,59,0.82,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220679,NC_045512.2,11537,A,G,PASS,42,0,42,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220679,NC_045512.2,13195,T,C,PASS,2959,4,2953,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220679,NC_045512.2,14408,C,T,PASS,35,0,35,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220679,NC_045512.2,19374,C,T,PASS,103,0,103,1.0,orf1ab,missense_variant,c.19109C>T,p.Ser6370Phe,p.S6370F,ivar,Unassigned +220679,NC_045512.2,20743,A,C,PASS,1266,0,1263,1.0,orf1ab,missense_variant,c.20478A>C,p.Lys6826Asn,p.K6826N,ivar,Unassigned +220679,NC_045512.2,21274,T,C,PASS,28,0,28,1.0,orf1ab,synonymous_variant,c.21009T>C,p.Phe7003Phe,p.F7003F,ivar,Unassigned +220679,NC_045512.2,21762,C,T,PASS,72,0,72,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220679,NC_045512.2,21764,ATACATG,A,PASS,72,72,63,0.88,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220679,NC_045512.2,21846,C,T,PASS,74,0,74,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220679,NC_045512.2,21,C,T,PASS,10,0,10,1.0,orf1ab,upstream_gene_variant,c.-245C>T,.,.,ivar,Unassigned +220679,NC_045512.2,23403,A,G,PASS,1977,2,1975,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220679,NC_045512.2,23525,C,T,PASS,1792,2,1789,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220679,NC_045512.2,23599,T,G,PASS,744,0,744,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220679,NC_045512.2,23604,C,A,PASS,722,0,719,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220679,NC_045512.2,23854,C,A,PASS,19,0,19,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220679,NC_045512.2,23948,G,T,PASS,98,0,96,0.98,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220679,NC_045512.2,24130,C,A,PASS,142,0,141,0.99,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220679,NC_045512.2,24424,A,T,PASS,60,0,60,1.0,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220679,NC_045512.2,24469,T,A,PASS,275,0,275,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220679,NC_045512.2,24503,C,T,PASS,350,4,346,0.99,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220679,NC_045512.2,25000,C,T,PASS,26,0,26,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220679,NC_045512.2,25584,C,T,PASS,71,0,71,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220679,NC_045512.2,2610,C,A,ft,14,10,4,0.29,orf1ab,missense_variant,c.2345C>A,p.Pro782Gln,p.P782Q,ivar,Unassigned +220679,NC_045512.2,26270,C,T,PASS,438,1,437,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220679,NC_045512.2,26530,A,G,PASS,320,0,320,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220679,NC_045512.2,26577,C,G,PASS,362,0,362,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220679,NC_045512.2,26709,G,A,PASS,358,0,358,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220679,NC_045512.2,27259,A,C,PASS,167,0,167,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220679,NC_045512.2,27807,C,T,PASS,55,0,55,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220679,NC_045512.2,28271,A,T,PASS,559,2,557,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220679,NC_045512.2,28311,C,T,PASS,631,3,626,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220679,NC_045512.2,2832,A,G,PASS,20,0,20,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220679,NC_045512.2,28361,GGAGAACGCA,G,PASS,514,508,361,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220679,NC_045512.2,28881,GG,AA,PASS,4183,51,4127,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220679,NC_045512.2,28883,G,C,PASS,4145,1,4137,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220679,NC_045512.2,3037,C,T,PASS,18,0,18,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,Unassigned +220679,NC_045512.2,5672,C,T,PASS,40,0,40,1.0,orf1ab,missense_variant,c.5407C>T,p.Pro1803Ser,p.P1803S,ivar,Unassigned +220679,NC_045512.2,5924,G,A,PASS,17,0,17,1.0,orf1ab,missense_variant,c.5659G>A,p.Val1887Ile,p.V1887I,ivar,Unassigned +220679,NC_045512.2,76,T,A,PASS,26,16,8,0.31,orf1ab,upstream_gene_variant,c.-190T>A,.,.,ivar,Unassigned +220679,NC_045512.2,78,T,G,PASS,26,18,8,0.31,orf1ab,upstream_gene_variant,c.-188T>G,.,.,ivar,Unassigned +220679,NC_045512.2,8393,G,A,PASS,2826,4,2818,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220680,NC_045512.2,11537,A,G,PASS,19,0,19,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220680,NC_045512.2,12368,A,G,ft,17,12,5,0.29,orf1ab,missense_variant,c.12103A>G,p.Thr4035Ala,p.T4035A,ivar,Unassigned +220680,NC_045512.2,12749,G,T,PASS,36,21,15,0.42,orf1ab,missense_variant,c.12484G>T,p.Ala4162Ser,p.A4162S,ivar,Unassigned +220680,NC_045512.2,13195,T,C,PASS,665,2,663,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220680,NC_045512.2,21762,C,T,PASS,14,0,14,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220680,NC_045512.2,21764,ATACATG,A,ft,14,14,8,0.57,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220680,NC_045512.2,21846,C,T,PASS,18,0,18,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220680,NC_045512.2,23403,A,G,PASS,246,0,246,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220680,NC_045512.2,23525,C,T,PASS,216,0,216,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220680,NC_045512.2,23599,T,G,PASS,95,0,95,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220680,NC_045512.2,23604,C,A,PASS,90,0,90,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220680,NC_045512.2,23948,G,T,PASS,13,0,13,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220680,NC_045512.2,24130,C,A,PASS,22,2,20,0.91,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220680,NC_045512.2,24469,T,A,PASS,67,0,65,0.97,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220680,NC_045512.2,24503,C,T,PASS,76,0,76,1.0,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220680,NC_045512.2,26270,C,T,PASS,259,0,259,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220680,NC_045512.2,26530,A,G,PASS,39,0,39,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220680,NC_045512.2,26577,C,G,PASS,46,0,46,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220680,NC_045512.2,26709,G,A,PASS,38,0,38,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220680,NC_045512.2,27259,A,C,PASS,33,0,33,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220680,NC_045512.2,28253,CA,C,PASS,575,575,572,0.99,ORF8,frameshift_variant,c.361delA,p.Ile121fs,p.I121fs,ivar,Unassigned +220680,NC_045512.2,28271,A,T,PASS,728,2,724,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220680,NC_045512.2,28311,C,T,PASS,721,2,713,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220680,NC_045512.2,28361,GGAGAACGCA,G,PASS,501,498,352,0.7,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220680,NC_045512.2,28881,GG,AA,PASS,4386,36,4344,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220680,NC_045512.2,28883,G,C,PASS,4380,2,4371,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220680,NC_045512.2,29399,G,T,PASS,2419,4,2405,0.99,N,missense_variant,c.1126G>T,p.Ala376Ser,p.A376S,ivar,Unassigned +220680,NC_045512.2,4221,A,G,ft,16,12,4,0.25,orf1ab,missense_variant,c.3956A>G,p.Lys1319Arg,p.K1319R,ivar,Unassigned +220680,NC_045512.2,8256,A,G,PASS,77,53,24,0.31,orf1ab,missense_variant,c.7991A>G,p.Glu2664Gly,p.E2664G,ivar,Unassigned +220680,NC_045512.2,8393,G,A,PASS,606,2,604,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220680,NC_045512.2,8652,T,C,PASS,427,1,426,1.0,orf1ab,missense_variant,c.8387T>C,p.Met2796Thr,p.M2796T,ivar,Unassigned +220684,NC_045512.2,10449,C,A,PASS,23,0,23,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,Unassigned +220684,NC_045512.2,11282,AGTTTGTCTG,A,PASS,71,71,67,0.94,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,Unassigned +220684,NC_045512.2,11537,A,G,PASS,25,0,25,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,Unassigned +220684,NC_045512.2,13195,T,C,PASS,4647,1,4644,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,Unassigned +220684,NC_045512.2,14408,C,T,PASS,61,3,58,0.95,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,Unassigned +220684,NC_045512.2,16064,A,G,PASS,80,2,78,0.98,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,Unassigned +220684,NC_045512.2,19937,A,G,ft,14,10,4,0.29,orf1ab,missense_variant,c.19672A>G,p.Thr6558Ala,p.T6558A,ivar,Unassigned +220684,NC_045512.2,21458,T,C,PASS,263,0,263,1.0,orf1ab,missense_variant,c.21193T>C,p.Ser7065Pro,p.S7065P,ivar,Unassigned +220684,NC_045512.2,21762,C,T,PASS,70,0,70,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,Unassigned +220684,NC_045512.2,21764,ATACATG,A,PASS,70,69,63,0.9,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,Unassigned +220684,NC_045512.2,21846,C,T,PASS,72,0,72,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,Unassigned +220684,NC_045512.2,23403,A,G,PASS,1549,3,1546,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,Unassigned +220684,NC_045512.2,23525,C,T,PASS,1462,0,1461,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,Unassigned +220684,NC_045512.2,23599,T,G,PASS,609,0,609,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,Unassigned +220684,NC_045512.2,23604,C,A,PASS,592,0,592,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,Unassigned +220684,NC_045512.2,23854,C,A,PASS,23,0,23,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,Unassigned +220684,NC_045512.2,23948,G,T,PASS,255,0,255,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,Unassigned +220684,NC_045512.2,24130,C,A,PASS,379,0,379,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,Unassigned +220684,NC_045512.2,24424,A,T,PASS,47,0,46,0.98,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,Unassigned +220684,NC_045512.2,24469,T,A,PASS,463,0,460,0.99,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,Unassigned +220684,NC_045512.2,24503,C,T,PASS,595,9,586,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,Unassigned +220684,NC_045512.2,25000,C,T,PASS,14,0,14,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,Unassigned +220684,NC_045512.2,25584,C,T,PASS,65,0,65,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,Unassigned +220684,NC_045512.2,26270,C,T,PASS,205,0,205,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,Unassigned +220684,NC_045512.2,26530,A,G,PASS,60,0,60,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,Unassigned +220684,NC_045512.2,26577,C,G,PASS,68,0,68,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,Unassigned +220684,NC_045512.2,26709,G,A,PASS,70,0,70,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,Unassigned +220684,NC_045512.2,27259,A,C,PASS,120,0,119,0.99,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,Unassigned +220684,NC_045512.2,27807,C,T,PASS,30,0,30,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,Unassigned +220684,NC_045512.2,28271,A,T,PASS,400,0,399,1.0,N,upstream_gene_variant,c.-3A>T,.,.,ivar,Unassigned +220684,NC_045512.2,28311,C,T,PASS,415,1,412,0.99,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,Unassigned +220684,NC_045512.2,2832,A,G,PASS,133,0,133,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,Unassigned +220684,NC_045512.2,28361,GGAGAACGCA,G,PASS,319,317,227,0.71,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,Unassigned +220684,NC_045512.2,28881,GG,AA,PASS,5519,59,5456,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,Unassigned +220684,NC_045512.2,28883,G,C,PASS,5487,2,5473,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,Unassigned +220684,NC_045512.2,8393,G,A,PASS,2672,20,2646,0.99,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,Unassigned +220685,NC_045512.2,10449,C,A,PASS,20,0,20,1.0,orf1ab,missense_variant,c.10184C>A,p.Pro3395His,p.P3395H,ivar,BA.1.1.1 +220685,NC_045512.2,11282,AGTTTGTCTG,A,PASS,110,109,100,0.91,orf1ab,disruptive_inframe_deletion,c.11022_11030delGTCTGGTTT,p.Leu3674_Gly3676del,p.L3674_G3676del,ivar,BA.1.1.1 +220685,NC_045512.2,11537,A,G,PASS,62,0,62,1.0,orf1ab,missense_variant,c.11272A>G,p.Ile3758Val,p.I3758V,ivar,BA.1.1.1 +220685,NC_045512.2,13195,T,C,PASS,3349,12,3337,1.0,orf1ab,synonymous_variant,c.12930T>C,p.Val4310Val,p.V4310V,ivar,BA.1.1.1 +220685,NC_045512.2,14408,C,T,PASS,64,0,64,1.0,orf1ab,synonymous_variant,c.14143C>T,p.Leu4715Leu,p.L4715L,ivar,BA.1.1.1 +220685,NC_045512.2,16064,A,G,PASS,72,2,70,0.97,orf1ab,missense_variant,c.15799A>G,p.Arg5267Gly,p.R5267G,ivar,BA.1.1.1 +220685,NC_045512.2,21762,C,T,PASS,47,0,47,1.0,S,missense_variant,c.200C>T,p.Ala67Val,p.A67V,ivar,BA.1.1.1 +220685,NC_045512.2,21764,ATACATG,A,PASS,47,47,43,0.91,S,disruptive_inframe_deletion,c.204_209delACATGT,p.His69_Val70del,p.H69_V70del,ivar,BA.1.1.1 +220685,NC_045512.2,21846,C,T,PASS,42,0,42,1.0,S,missense_variant,c.284C>T,p.Thr95Ile,p.T95I,ivar,BA.1.1.1 +220685,NC_045512.2,22578,G,A,PASS,12,0,12,1.0,S,missense_variant,c.1016G>A,p.Gly339Asp,p.G339D,ivar,BA.1.1.1 +220685,NC_045512.2,22599,G,A,PASS,14,0,14,1.0,S,missense_variant,c.1037G>A,p.Arg346Lys,p.R346K,ivar,BA.1.1.1 +220685,NC_045512.2,22673,TC,CT,PASS,10,0,10,1.0,S,missense_variant,c.1111_1112delTCinsCT,p.Ser371Leu,p.S371L,ivar,BA.1.1.1 +220685,NC_045512.2,22679,T,C,PASS,14,0,14,1.0,S,missense_variant,c.1117T>C,p.Ser373Pro,p.S373P,ivar,BA.1.1.1 +220685,NC_045512.2,22686,C,T,PASS,14,0,14,1.0,S,missense_variant,c.1124C>T,p.Ser375Phe,p.S375F,ivar,BA.1.1.1 +220685,NC_045512.2,23403,A,G,PASS,1274,2,1272,1.0,S,missense_variant,c.1841A>G,p.Asp614Gly,p.D614G,ivar,BA.1.1.1 +220685,NC_045512.2,23525,C,T,PASS,1276,1,1275,1.0,S,missense_variant,c.1963C>T,p.His655Tyr,p.H655Y,ivar,BA.1.1.1 +220685,NC_045512.2,23599,T,G,PASS,519,0,519,1.0,S,missense_variant,c.2037T>G,p.Asn679Lys,p.N679K,ivar,BA.1.1.1 +220685,NC_045512.2,23604,C,A,PASS,510,0,508,1.0,S,missense_variant,c.2042C>A,p.Pro681His,p.P681H,ivar,BA.1.1.1 +220685,NC_045512.2,23854,C,A,PASS,29,0,29,1.0,S,missense_variant,c.2292C>A,p.Asn764Lys,p.N764K,ivar,BA.1.1.1 +220685,NC_045512.2,23948,G,T,PASS,169,0,169,1.0,S,missense_variant,c.2386G>T,p.Asp796Tyr,p.D796Y,ivar,BA.1.1.1 +220685,NC_045512.2,24130,C,A,PASS,171,0,171,1.0,S,missense_variant,c.2568C>A,p.Asn856Lys,p.N856K,ivar,BA.1.1.1 +220685,NC_045512.2,24424,A,T,PASS,101,3,98,0.97,S,missense_variant,c.2862A>T,p.Gln954His,p.Q954H,ivar,BA.1.1.1 +220685,NC_045512.2,24469,T,A,PASS,402,1,400,1.0,S,missense_variant,c.2907T>A,p.Asn969Lys,p.N969K,ivar,BA.1.1.1 +220685,NC_045512.2,24503,C,T,PASS,469,11,458,0.98,S,missense_variant,c.2941C>T,p.Leu981Phe,p.L981F,ivar,BA.1.1.1 +220685,NC_045512.2,25000,C,T,PASS,19,0,19,1.0,S,synonymous_variant,c.3438C>T,p.Asp1146Asp,p.D1146D,ivar,BA.1.1.1 +220685,NC_045512.2,25584,C,T,PASS,50,0,50,1.0,ORF3a,synonymous_variant,c.192C>T,p.Thr64Thr,p.T64T,ivar,BA.1.1.1 +220685,NC_045512.2,26270,C,T,PASS,114,0,114,1.0,E,missense_variant,c.26C>T,p.Thr9Ile,p.T9I,ivar,BA.1.1.1 +220685,NC_045512.2,26530,A,G,PASS,48,0,48,1.0,M,missense_variant,c.8A>G,p.Asp3Gly,p.D3G,ivar,BA.1.1.1 +220685,NC_045512.2,26577,C,G,PASS,36,0,36,1.0,M,missense_variant,c.55C>G,p.Gln19Glu,p.Q19E,ivar,BA.1.1.1 +220685,NC_045512.2,26709,G,A,PASS,37,0,37,1.0,M,missense_variant,c.187G>A,p.Ala63Thr,p.A63T,ivar,BA.1.1.1 +220685,NC_045512.2,27259,A,C,PASS,82,0,82,1.0,ORF6,synonymous_variant,c.58A>C,p.Arg20Arg,p.R20R,ivar,BA.1.1.1 +220685,NC_045512.2,27807,C,T,PASS,23,0,23,1.0,ORF7b,synonymous_variant,c.52C>T,p.Leu18Leu,p.L18L,ivar,BA.1.1.1 +220685,NC_045512.2,28271,A,T,PASS,315,0,313,0.99,N,upstream_gene_variant,c.-3A>T,.,.,ivar,BA.1.1.1 +220685,NC_045512.2,28311,C,T,PASS,318,8,308,0.97,N,missense_variant,c.38C>T,p.Pro13Leu,p.P13L,ivar,BA.1.1.1 +220685,NC_045512.2,2832,A,G,PASS,19,0,19,1.0,orf1ab,missense_variant,c.2567A>G,p.Lys856Arg,p.K856R,ivar,BA.1.1.1 +220685,NC_045512.2,28333,C,T,PASS,307,4,303,0.99,N,synonymous_variant,c.60C>T,p.Pro20Pro,p.P20P,ivar,BA.1.1.1 +220685,NC_045512.2,28361,GGAGAACGCA,G,PASS,258,256,188,0.73,N,disruptive_inframe_deletion,c.90_98delAGAACGCAG,p.Glu31_Ser33del,p.E31_S33del,ivar,BA.1.1.1 +220685,NC_045512.2,28512,C,T,PASS,186,124,56,0.3,N,missense_variant,c.239C>T,p.Pro80Leu,p.P80L,ivar,BA.1.1.1 +220685,NC_045512.2,28881,GG,AA,PASS,3917,51,3864,0.99,N,missense_variant,c.608_609delGGinsAA,p.Arg203Lys,p.R203K,ivar,BA.1.1.1 +220685,NC_045512.2,28883,G,C,PASS,3900,4,3892,1.0,N,missense_variant,c.610G>C,p.Gly204Arg,p.G204R,ivar,BA.1.1.1 +220685,NC_045512.2,3037,C,T,PASS,13,0,13,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,BA.1.1.1 +220685,NC_045512.2,4710,C,A,ft,12,7,5,0.42,orf1ab,missense_variant,c.4445C>A,p.Ala1482Asp,p.A1482D,ivar,BA.1.1.1 +220685,NC_045512.2,8393,G,A,PASS,2105,5,2100,1.0,orf1ab,missense_variant,c.8128G>A,p.Ala2710Thr,p.A2710T,ivar,BA.1.1.1 diff --git a/relecov_tools/example_data/upload_bioinfo_metadata/bioinfo_metadata.json b/relecov_tools/example_data/upload_bioinfo_metadata/bioinfo_metadata.json new file mode 100644 index 00000000..01c8ef58 --- /dev/null +++ b/relecov_tools/example_data/upload_bioinfo_metadata/bioinfo_metadata.json @@ -0,0 +1,5322 @@ +{ + "214821": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "dbcc703ccb7da3002fee6c0486199009", + "consensus_sequence_R1_name": "214821_S12_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "b76fba963664b532004c4ce7153ae14f", + "consensus_sequence_R2_name": "214821_S12_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "287.0", + "fastq_r1": "214821_S12_R1_001.fastq.gz", + "fastq_r2": "214821_S12_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "8442.32", + "number_of_base_pairs_sequenced": "633884", + "number_of_variants_AF_greater_75percent": "19", + "number_of_variants_with_effect": "9", + "per_Ns": "8,44", + "per_genome_greater_10x": "92,0", + "per_reads_host": "1,08", + "per_reads_virus": "98,54", + "per_unmapped": "0,380247", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "285604", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214821", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "19" + }, + "214822": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "4837ac15de51cfeda4596b09f5876ac8", + "consensus_sequence_R1_name": "214822_S13_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "f0bf2700ae3eadc9a3f01c515546bf4a", + "consensus_sequence_R2_name": "214822_S13_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "6.0", + "fastq_r1": "214822_S13_R1_001.fastq.gz", + "fastq_r2": "214822_S13_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "58831.99", + "number_of_base_pairs_sequenced": "765444", + "number_of_variants_AF_greater_75percent": "8", + "number_of_variants_with_effect": "3", + "per_Ns": "58,83", + "per_genome_greater_10x": "41,0", + "per_reads_host": "63,33", + "per_reads_virus": "20,09", + "per_unmapped": "16,5778", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "306868", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214822", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "8" + }, + "214823": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "0", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "213a150461737cb0af48fddd370f4ceb", + "consensus_sequence_R1_name": "214823_S1_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "1b58f58f861c81ed94401a319c7ae9ce", + "consensus_sequence_R2_name": "214823_S1_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "214823_S1_R1_001.fastq.gz", + "fastq_r2": "214823_S1_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "nan", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "nan", + "number_of_base_pairs_sequenced": "597696", + "number_of_variants_AF_greater_75percent": "0", + "number_of_variants_with_effect": "0", + "per_Ns": "nan", + "per_genome_greater_10x": "nan", + "per_reads_host": "58,03", + "per_reads_virus": "0,1", + "per_unmapped": "41,8663", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "196898", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214823", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "0" + }, + "214824": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "0", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "cf00a386d54734ee7a60932f487d946c", + "consensus_sequence_R1_name": "214824_S14_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "eebdabee75e327f49d9e1741d22c40c3", + "consensus_sequence_R2_name": "214824_S14_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "214824_S14_R1_001.fastq.gz", + "fastq_r2": "214824_S14_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "nan", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "nan", + "number_of_base_pairs_sequenced": "728", + "number_of_variants_AF_greater_75percent": "0", + "number_of_variants_with_effect": "0", + "per_Ns": "nan", + "per_genome_greater_10x": "nan", + "per_reads_host": "33,06", + "per_reads_virus": "55,65", + "per_unmapped": "11,2903", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "248", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214824", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "0" + }, + "214825": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "c4b42c9ca78676b244f6fd3356a39bbd", + "consensus_sequence_R1_name": "214825_S15_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "7356129837732007e08e94a7699c5f7e", + "consensus_sequence_R2_name": "214825_S15_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "217.0", + "fastq_r1": "214825_S15_R1_001.fastq.gz", + "fastq_r2": "214825_S15_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "9102.77", + "number_of_base_pairs_sequenced": "613424", + "number_of_variants_AF_greater_75percent": "17", + "number_of_variants_with_effect": "9", + "per_Ns": "9,10", + "per_genome_greater_10x": "91,0", + "per_reads_host": "0,54", + "per_reads_virus": "97,74", + "per_unmapped": "1,71652", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "277306", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214825", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "17" + }, + "214826": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "19c93d83eb76748429c4f466ee094314", + "consensus_sequence_R1_name": "214826_S16_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "3c245df1ecc9456ab121337847a8282d", + "consensus_sequence_R2_name": "214826_S16_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "239.0", + "fastq_r1": "214826_S16_R1_001.fastq.gz", + "fastq_r2": "214826_S16_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "7868.78", + "number_of_base_pairs_sequenced": "760124", + "number_of_variants_AF_greater_75percent": "17", + "number_of_variants_with_effect": "9", + "per_Ns": "7,87", + "per_genome_greater_10x": "92,0", + "per_reads_host": "0,25", + "per_reads_virus": "99,28", + "per_unmapped": "0,473221", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "337052", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214826", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "17" + }, + "214827": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "43205fbe2b44e0bb2aaa3c67a4825c6b", + "consensus_sequence_R1_name": "214827_S17_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "b95ddabc2cb0337a225c7998ec3a11fd", + "consensus_sequence_R2_name": "214827_S17_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "10.0", + "fastq_r1": "214827_S17_R1_001.fastq.gz", + "fastq_r2": "214827_S17_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "49724.11", + "number_of_base_pairs_sequenced": "987912", + "number_of_variants_AF_greater_75percent": "15", + "number_of_variants_with_effect": "7", + "per_Ns": "49,72", + "per_genome_greater_10x": "50,0", + "per_reads_host": "22,70", + "per_reads_virus": "32,19", + "per_unmapped": "45,1113", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "246530", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214827", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "15" + }, + "214828": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "7523b3b8f6129259f6004ef85b479926", + "consensus_sequence_R1_name": "214828_S18_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "fa80eea72d21d5d686af19b779e00cd7", + "consensus_sequence_R2_name": "214828_S18_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "33.0", + "fastq_r1": "214828_S18_R1_001.fastq.gz", + "fastq_r2": "214828_S18_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "32314.48", + "number_of_base_pairs_sequenced": "773992", + "number_of_variants_AF_greater_75percent": "14", + "number_of_variants_with_effect": "7", + "per_Ns": "32,31", + "per_genome_greater_10x": "68,0", + "per_reads_host": "18,16", + "per_reads_virus": "69,75", + "per_unmapped": "12093", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "312802", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214828", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "14" + }, + "214829": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "ccb040c582e6c68c9efe84fca0e51c95", + "consensus_sequence_R1_name": "214829_S19_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "047dd05f8008dc7fa0c9f2c5d7f51677", + "consensus_sequence_R2_name": "214829_S19_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "249.0", + "fastq_r1": "214829_S19_R1_001.fastq.gz", + "fastq_r2": "214829_S19_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "10654.45", + "number_of_base_pairs_sequenced": "864220", + "number_of_variants_AF_greater_75percent": "20", + "number_of_variants_with_effect": "8", + "per_Ns": "10,65", + "per_genome_greater_10x": "89,0", + "per_reads_host": "1,80", + "per_reads_virus": "97,51", + "per_unmapped": "0,687833", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "379598", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214829", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "20" + }, + "214830": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "f440d83caf0456f3af2bba389dbe9663", + "consensus_sequence_R1_name": "214830_S20_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "345fadf48d0f1b4e14951d82145f985a", + "consensus_sequence_R2_name": "214830_S20_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "169.0", + "fastq_r1": "214830_S20_R1_001.fastq.gz", + "fastq_r2": "214830_S20_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "12145.94", + "number_of_base_pairs_sequenced": "799104", + "number_of_variants_AF_greater_75percent": "22", + "number_of_variants_with_effect": "10", + "per_Ns": "12,15", + "per_genome_greater_10x": "88,0", + "per_reads_host": "14,21", + "per_reads_virus": "84,72", + "per_unmapped": "1,06959", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "354342", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214830", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "22" + }, + "214831": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "8b674c46bded0f8e5e9b89cff4a7c1de", + "consensus_sequence_R1_name": "214831_S21_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "5414313cf3edb5bc6ab9c74a81237277", + "consensus_sequence_R2_name": "214831_S21_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "214831_S21_R1_001.fastq.gz", + "fastq_r2": "214831_S21_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "77513.96", + "number_of_base_pairs_sequenced": "639128", + "number_of_variants_AF_greater_75percent": "3", + "number_of_variants_with_effect": "2", + "per_Ns": "77,51", + "per_genome_greater_10x": "22,0", + "per_reads_host": "84,93", + "per_reads_virus": "4,43", + "per_unmapped": "10,6459", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "253440", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214831", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "3" + }, + "214832": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "0", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "a118e8522bf3b2cf34020fe4f44536a0", + "consensus_sequence_R1_name": "214832_S22_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "7e8ee7de84d4a0c83da2fa4d86744795", + "consensus_sequence_R2_name": "214832_S22_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "214832_S22_R1_001.fastq.gz", + "fastq_r2": "214832_S22_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "nan", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "nan", + "number_of_base_pairs_sequenced": "24072", + "number_of_variants_AF_greater_75percent": "0", + "number_of_variants_with_effect": "0", + "per_Ns": "nan", + "per_genome_greater_10x": "nan", + "per_reads_host": "73,00", + "per_reads_virus": "7,21", + "per_unmapped": "19,7941", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "5052", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214832", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "0" + }, + "214833": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "13a584fb7800fa9d900d848555bec36f", + "consensus_sequence_R1_name": "214833_S23_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "634d553b612db4a272537b445827c3ae", + "consensus_sequence_R2_name": "214833_S23_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "16.0", + "fastq_r1": "214833_S23_R1_001.fastq.gz", + "fastq_r2": "214833_S23_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "44935.29", + "number_of_base_pairs_sequenced": "901584", + "number_of_variants_AF_greater_75percent": "12", + "number_of_variants_with_effect": "5", + "per_Ns": "44,94", + "per_genome_greater_10x": "55,00000000000001", + "per_reads_host": "37,82", + "per_reads_virus": "26,96", + "per_unmapped": "35,2283", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "326700", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214833", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "12" + }, + "214834": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "a93fc6c69a31da28cbbde419f704d2e3", + "consensus_sequence_R1_name": "214834_S24_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "71b565731008044ca6115528230b1bcc", + "consensus_sequence_R2_name": "214834_S24_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "85.0", + "fastq_r1": "214834_S24_R1_001.fastq.gz", + "fastq_r2": "214834_S24_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "17289.24", + "number_of_base_pairs_sequenced": "887704", + "number_of_variants_AF_greater_75percent": "17", + "number_of_variants_with_effect": "8", + "per_Ns": "17,29", + "per_genome_greater_10x": "83,0", + "per_reads_host": "18,96", + "per_reads_virus": "77,18", + "per_unmapped": "3,85588", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "384218", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214834", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "17" + }, + "214835": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30399", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "548177db795b4cb098bd27d7e8182ce5", + "consensus_sequence_R1_name": "214835_S25_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "8208d8f05c4dd2dedc7f8b017d1f00b4", + "consensus_sequence_R2_name": "214835_S25_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "32.0", + "fastq_r1": "214835_S25_R1_001.fastq.gz", + "fastq_r2": "214835_S25_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "32555.18", + "number_of_base_pairs_sequenced": "965352", + "number_of_variants_AF_greater_75percent": "17", + "number_of_variants_with_effect": "7", + "per_Ns": "32,56", + "per_genome_greater_10x": "67,0", + "per_reads_host": "24,23", + "per_reads_virus": "46,94", + "per_unmapped": "28,8362", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "347078", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214835", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "17" + }, + "214836": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "e6c4dd1c4cb471b3c4dd09ee40f10591", + "consensus_sequence_R1_name": "214836_S26_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "5addc2c98ff7d6458a4b319e25731654", + "consensus_sequence_R2_name": "214836_S26_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "9.0", + "fastq_r1": "214836_S26_R1_001.fastq.gz", + "fastq_r2": "214836_S26_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "50530.05", + "number_of_base_pairs_sequenced": "959712", + "number_of_variants_AF_greater_75percent": "8", + "number_of_variants_with_effect": "4", + "per_Ns": "50,53", + "per_genome_greater_10x": "49,0", + "per_reads_host": "41,06", + "per_reads_virus": "31,73", + "per_unmapped": "27,2183", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "362840", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214836", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "8" + }, + "214837": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "c1765535970d038196f72ece4a9b8624", + "consensus_sequence_R1_name": "214837_S27_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "17670b749961800dfdc995cf3c4c6faf", + "consensus_sequence_R2_name": "214837_S27_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "52.0", + "fastq_r1": "214837_S27_R1_001.fastq.gz", + "fastq_r2": "214837_S27_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "25351.97", + "number_of_base_pairs_sequenced": "939160", + "number_of_variants_AF_greater_75percent": "17", + "number_of_variants_with_effect": "7", + "per_Ns": "25,35", + "per_genome_greater_10x": "75,0", + "per_reads_host": "28,24", + "per_reads_virus": "55,88", + "per_unmapped": "15,8814", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "400430", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214837", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "17" + }, + "214838": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "72480d9c23afea096a89ed456c9efdb9", + "consensus_sequence_R1_name": "214838_S2_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "6cddef3197226f00bd7eb8c437e94900", + "consensus_sequence_R2_name": "214838_S2_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "1.0", + "fastq_r1": "214838_S2_R1_001.fastq.gz", + "fastq_r2": "214838_S2_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "73340.47", + "number_of_base_pairs_sequenced": "312196", + "number_of_variants_AF_greater_75percent": "5", + "number_of_variants_with_effect": "3", + "per_Ns": "73,34", + "per_genome_greater_10x": "27,0", + "per_reads_host": "45,15", + "per_reads_virus": "5,49", + "per_unmapped": "49,3622", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "113674", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214838", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "5" + }, + "220338": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30377", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "c9f6715f0fe6933126aa9a36d8190102", + "consensus_sequence_R1_name": "220338_S3_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "6ab5c2b6fa4005479da188e7bb7d584b", + "consensus_sequence_R2_name": "220338_S3_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "28.0", + "fastq_r1": "220338_S3_R1_001.fastq.gz", + "fastq_r2": "220338_S3_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "31881.92", + "number_of_base_pairs_sequenced": "531888", + "number_of_variants_AF_greater_75percent": "35", + "number_of_variants_with_effect": "23", + "per_Ns": "31,88", + "per_genome_greater_10x": "68,0", + "per_reads_host": "34,56", + "per_reads_virus": "49,6", + "per_unmapped": "15,8376", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "214610", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220338", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "35" + }, + "220339": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "eea39c299ca945536c2d0583bc358d41", + "consensus_sequence_R1_name": "220339_S4_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "391b790d0d105fd9e8c6c037df8a18d4", + "consensus_sequence_R2_name": "220339_S4_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "401.0", + "fastq_r1": "220339_S4_R1_001.fastq.gz", + "fastq_r2": "220339_S4_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "8766.06", + "number_of_base_pairs_sequenced": "693024", + "number_of_variants_AF_greater_75percent": "49", + "number_of_variants_with_effect": "35", + "per_Ns": "8,77", + "per_genome_greater_10x": "91,0", + "per_reads_host": "4,33", + "per_reads_virus": "95,17", + "per_unmapped": "0,494074", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "310682", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220339", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "49" + }, + "220407": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "ed9a7efa18f6a228c246953e1cd2aa64", + "consensus_sequence_R1_name": "220407_S5_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "1400d0c52ad2dc2231c1fe87e0bd1dde", + "consensus_sequence_R2_name": "220407_S5_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "220407_S5_R1_001.fastq.gz", + "fastq_r2": "220407_S5_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "80175.9", + "number_of_base_pairs_sequenced": "467076", + "number_of_variants_AF_greater_75percent": "9", + "number_of_variants_with_effect": "7", + "per_Ns": "80,18", + "per_genome_greater_10x": "20,0", + "per_reads_host": "77,29", + "per_reads_virus": "2,12", + "per_unmapped": "20,5887", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "188574", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220407", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "9" + }, + "220433": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "1044ad9179937f00d76aa07b7c3f8ddc", + "consensus_sequence_R1_name": "220433_S6_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "3d39eeffbd2983251f7153b1792cb865", + "consensus_sequence_R2_name": "220433_S6_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "3.0", + "fastq_r1": "220433_S6_R1_001.fastq.gz", + "fastq_r2": "220433_S6_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "64030.36", + "number_of_base_pairs_sequenced": "358868", + "number_of_variants_AF_greater_75percent": "16", + "number_of_variants_with_effect": "13", + "per_Ns": "64,03", + "per_genome_greater_10x": "36,0", + "per_reads_host": "20,19", + "per_reads_virus": "23,34", + "per_unmapped": "56,4768", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "69680", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220433", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "16" + }, + "220518": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "52d22b2aa4ad47f110cdd5527cc46e58", + "consensus_sequence_R1_name": "220518_S7_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "9884025a080d74f187017475c2e140f7", + "consensus_sequence_R2_name": "220518_S7_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "2.0", + "fastq_r1": "220518_S7_R1_001.fastq.gz", + "fastq_r2": "220518_S7_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "63283.94", + "number_of_base_pairs_sequenced": "327884", + "number_of_variants_AF_greater_75percent": "18", + "number_of_variants_with_effect": "13", + "per_Ns": "63,28", + "per_genome_greater_10x": "37,0", + "per_reads_host": "34,32", + "per_reads_virus": "31,01", + "per_unmapped": "34,6713", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "87796", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220518", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "18" + }, + "220529": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "2b7815a5748ed76de208cac55e9265da", + "consensus_sequence_R1_name": "220529_S28_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "38ce949c2a98a1bf4dbcd444483860e1", + "consensus_sequence_R2_name": "220529_S28_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "86.0", + "fastq_r1": "220529_S28_R1_001.fastq.gz", + "fastq_r2": "220529_S28_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "21148.96", + "number_of_base_pairs_sequenced": "556432", + "number_of_variants_AF_greater_75percent": "42", + "number_of_variants_with_effect": "30", + "per_Ns": "21,15", + "per_genome_greater_10x": "79,0", + "per_reads_host": "19,97", + "per_reads_virus": "79,09", + "per_unmapped": "0,943396", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "243588", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220529", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "42" + }, + "220530": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30377", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "d9337ab6c46b757b0cf76e725b1fed45", + "consensus_sequence_R1_name": "220530_S29_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "f474a72ba984af48d96b95ee3d9201a6", + "consensus_sequence_R2_name": "220530_S29_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "27.0", + "fastq_r1": "220530_S29_R1_001.fastq.gz", + "fastq_r2": "220530_S29_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "32758.79", + "number_of_base_pairs_sequenced": "605520", + "number_of_variants_AF_greater_75percent": "38", + "number_of_variants_with_effect": "27", + "per_Ns": "32,76", + "per_genome_greater_10x": "67,0", + "per_reads_host": "15,61", + "per_reads_virus": "63,63", + "per_unmapped": "20,7631", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "258694", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220530", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "38" + }, + "220531": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "ffe0db11dd065f384e13c6be26599e84", + "consensus_sequence_R1_name": "220531_S30_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "e0b1d1d6557f03cfca04005721244270", + "consensus_sequence_R2_name": "220531_S30_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "317.0", + "fastq_r1": "220531_S30_R1_001.fastq.gz", + "fastq_r2": "220531_S30_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.18", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "9950.48", + "number_of_base_pairs_sequenced": "722540", + "number_of_variants_AF_greater_75percent": "57", + "number_of_variants_with_effect": "44", + "per_Ns": "9,95", + "per_genome_greater_10x": "90,0", + "per_reads_host": "4,66", + "per_reads_virus": "95,17", + "per_unmapped": "0,174275", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "322478", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220531", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "57" + }, + "220532": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "98c6011d1e0537c3f02bec11cd3c2aef", + "consensus_sequence_R1_name": "220532_S31_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "fc770dc2431972bf859d64b3fb7b24cd", + "consensus_sequence_R2_name": "220532_S31_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "29.0", + "fastq_r1": "220532_S31_R1_001.fastq.gz", + "fastq_r2": "220532_S31_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "31517.67", + "number_of_base_pairs_sequenced": "564332", + "number_of_variants_AF_greater_75percent": "35", + "number_of_variants_with_effect": "25", + "per_Ns": "31,52", + "per_genome_greater_10x": "68,0", + "per_reads_host": "37,18", + "per_reads_virus": "60,51", + "per_unmapped": "2,31521", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "240540", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220532", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "35" + }, + "220533": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "6dd04393363653aa94aadb887dcfbb5a", + "consensus_sequence_R1_name": "220533_S32_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "e752ffb157d139701ecf2e765ce4642f", + "consensus_sequence_R2_name": "220533_S32_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "220533_S32_R1_001.fastq.gz", + "fastq_r2": "220533_S32_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "85292.45", + "number_of_base_pairs_sequenced": "344700", + "number_of_variants_AF_greater_75percent": "7", + "number_of_variants_with_effect": "5", + "per_Ns": "85,29", + "per_genome_greater_10x": "15,0", + "per_reads_host": "92,17", + "per_reads_virus": "5,2", + "per_unmapped": "2,63632", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "146454", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220533", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "7" + }, + "220534": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30393", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "003a1179f4afd449ba2f04f65d5bc071", + "consensus_sequence_R1_name": "220534_S33_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "632355ba64f7de8c471d98028394a792", + "consensus_sequence_R2_name": "220534_S33_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "505.0", + "fastq_r1": "220534_S33_R1_001.fastq.gz", + "fastq_r2": "220534_S33_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "8908.14", + "number_of_base_pairs_sequenced": "849756", + "number_of_variants_AF_greater_75percent": "58", + "number_of_variants_with_effect": "43", + "per_Ns": "8,91", + "per_genome_greater_10x": "91,0", + "per_reads_host": "5,02", + "per_reads_virus": "94,39", + "per_unmapped": "0,588441", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "383046", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220534", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "58" + }, + "220535": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "abd5dea7f0ebb92d4dc44b6777aa14df", + "consensus_sequence_R1_name": "220535_S34_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "38ad2aaf11d71d43e8c9e2996419c9c7", + "consensus_sequence_R2_name": "220535_S34_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "39.0", + "fastq_r1": "220535_S34_R1_001.fastq.gz", + "fastq_r2": "220535_S34_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "28359.21", + "number_of_base_pairs_sequenced": "862996", + "number_of_variants_AF_greater_75percent": "36", + "number_of_variants_with_effect": "25", + "per_Ns": "28,36", + "per_genome_greater_10x": "72,0", + "per_reads_host": "31,88", + "per_reads_virus": "66,5", + "per_unmapped": "1,61188", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "372112", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220535", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "36" + }, + "220536": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "943d099043ef8d374519526972cb1e10", + "consensus_sequence_R1_name": "220536_S35_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "95540b3c187f2be5b390e028a53744f0", + "consensus_sequence_R2_name": "220536_S35_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "27.0", + "fastq_r1": "220536_S35_R1_001.fastq.gz", + "fastq_r2": "220536_S35_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "33555.27", + "number_of_base_pairs_sequenced": "800800", + "number_of_variants_AF_greater_75percent": "35", + "number_of_variants_with_effect": "23", + "per_Ns": "33,56", + "per_genome_greater_10x": "66,0", + "per_reads_host": "40,80", + "per_reads_virus": "57,41", + "per_unmapped": "1,79007", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "342222", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220536", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "35" + }, + "220537": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "4f8c1abacae760cc659902491acf01be", + "consensus_sequence_R1_name": "220537_S36_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "ba90ceea113af1b1154cfc88791130f4", + "consensus_sequence_R2_name": "220537_S36_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "171.0", + "fastq_r1": "220537_S36_R1_001.fastq.gz", + "fastq_r2": "220537_S36_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "14735.01", + "number_of_base_pairs_sequenced": "672896", + "number_of_variants_AF_greater_75percent": "51", + "number_of_variants_with_effect": "36", + "per_Ns": "14,74", + "per_genome_greater_10x": "85,0", + "per_reads_host": "5,27", + "per_reads_virus": "94,05", + "per_unmapped": "0,676578", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "296640", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220537", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "51" + }, + "220538": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "262b2daea97f27716b45e4e42769fac2", + "consensus_sequence_R1_name": "220538_S37_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "001cde855df87b0bd1a53ee2f2b6b3ef", + "consensus_sequence_R2_name": "220538_S37_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "154.0", + "fastq_r1": "220538_S37_R1_001.fastq.gz", + "fastq_r2": "220538_S37_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.18", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "16056.61", + "number_of_base_pairs_sequenced": "714756", + "number_of_variants_AF_greater_75percent": "52", + "number_of_variants_with_effect": "38", + "per_Ns": "16,06", + "per_genome_greater_10x": "84,0", + "per_reads_host": "18,18", + "per_reads_virus": "81,11", + "per_unmapped": "0,707308", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "316128", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220538", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "52" + }, + "220539": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "0da2a117841ac93bf98b592f80d0701b", + "consensus_sequence_R1_name": "220539_S38_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "3b9d109620122fe4f8d0395474cffa73", + "consensus_sequence_R2_name": "220539_S38_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "50.0", + "fastq_r1": "220539_S38_R1_001.fastq.gz", + "fastq_r2": "220539_S38_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "26759.9", + "number_of_base_pairs_sequenced": "809356", + "number_of_variants_AF_greater_75percent": "36", + "number_of_variants_with_effect": "24", + "per_Ns": "26,76", + "per_genome_greater_10x": "73,0", + "per_reads_host": "13,69", + "per_reads_virus": "84,19", + "per_unmapped": "2,12075", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "350584", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220539", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "36" + }, + "220540": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30384", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "b7b0f6e63aa5f35bfed0c8e0af929346", + "consensus_sequence_R1_name": "220540_S39_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "ad64eff1e7994f31758f3539a68ee8f4", + "consensus_sequence_R2_name": "220540_S39_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "3.0", + "fastq_r1": "220540_S39_R1_001.fastq.gz", + "fastq_r2": "220540_S39_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "67334.78", + "number_of_base_pairs_sequenced": "825928", + "number_of_variants_AF_greater_75percent": "18", + "number_of_variants_with_effect": "13", + "per_Ns": "67,33", + "per_genome_greater_10x": "33,0", + "per_reads_host": "75,36", + "per_reads_virus": "21,13", + "per_unmapped": "3,50609", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "349706", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220540", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "18" + }, + "220541": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "07d0690f4a552688557f5dff824ee164", + "consensus_sequence_R1_name": "220541_S40_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "79ec4ae16bfedc05b0ed0d2ab1cf5dc3", + "consensus_sequence_R2_name": "220541_S40_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "131.0", + "fastq_r1": "220541_S40_R1_001.fastq.gz", + "fastq_r2": "220541_S40_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "18378.61", + "number_of_base_pairs_sequenced": "758264", + "number_of_variants_AF_greater_75percent": "44", + "number_of_variants_with_effect": "31", + "per_Ns": "18,38", + "per_genome_greater_10x": "82,0", + "per_reads_host": "21,89", + "per_reads_virus": "77,32", + "per_unmapped": "0,790127", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "314886", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220541", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "44" + }, + "220542": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "9df1b3c139ed67e0b47ae0bc2055f3d2", + "consensus_sequence_R1_name": "220542_S41_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "11e101b892f6429d4762f7e8acb8f8fa", + "consensus_sequence_R2_name": "220542_S41_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "17.0", + "fastq_r1": "220542_S41_R1_001.fastq.gz", + "fastq_r2": "220542_S41_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "43636.49", + "number_of_base_pairs_sequenced": "664240", + "number_of_variants_AF_greater_75percent": "29", + "number_of_variants_with_effect": "21", + "per_Ns": "43,64", + "per_genome_greater_10x": "56,00000000000001", + "per_reads_host": "16,82", + "per_reads_virus": "79,65", + "per_unmapped": "3,52348", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "290650", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220542", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "29" + }, + "220543": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "781a331bd3c3083fc068928fa9e62b61", + "consensus_sequence_R1_name": "220543_S42_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "761f5cf6ec9e70a3c6052e6717181987", + "consensus_sequence_R2_name": "220543_S42_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "340.0", + "fastq_r1": "220543_S42_R1_001.fastq.gz", + "fastq_r2": "220543_S42_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "11098.1", + "number_of_base_pairs_sequenced": "878380", + "number_of_variants_AF_greater_75percent": "53", + "number_of_variants_with_effect": "40", + "per_Ns": "11,10", + "per_genome_greater_10x": "89,0", + "per_reads_host": "1,61", + "per_reads_virus": "98,18", + "per_unmapped": "0,202193", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "393188", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220543", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "53" + }, + "220544": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "3a7d06d1d0eeb293377fbc40b8331a78", + "consensus_sequence_R1_name": "220544_S43_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "354eb8ebd7da78d0e4061ff45510b247", + "consensus_sequence_R2_name": "220544_S43_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "44.0", + "fastq_r1": "220544_S43_R1_001.fastq.gz", + "fastq_r2": "220544_S43_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "28081.5", + "number_of_base_pairs_sequenced": "688892", + "number_of_variants_AF_greater_75percent": "43", + "number_of_variants_with_effect": "33", + "per_Ns": "28,08", + "per_genome_greater_10x": "72,0", + "per_reads_host": "11,17", + "per_reads_virus": "87,53", + "per_unmapped": "1,30092", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "301018", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220544", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "43" + }, + "220545": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "58cbb67f568d4f59d0a11d4df132412c", + "consensus_sequence_R1_name": "220545_S44_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "45f3f30a76c6c9099cc10a54a55326ac", + "consensus_sequence_R2_name": "220545_S44_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "50.0", + "fastq_r1": "220545_S44_R1_001.fastq.gz", + "fastq_r2": "220545_S44_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "24270.61", + "number_of_base_pairs_sequenced": "603932", + "number_of_variants_AF_greater_75percent": "38", + "number_of_variants_with_effect": "25", + "per_Ns": "24,27", + "per_genome_greater_10x": "76,0", + "per_reads_host": "4,82", + "per_reads_virus": "90,72", + "per_unmapped": "4,46695", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "265640", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220545", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "38" + }, + "220546": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "54dfda3d4b9ec99067f1b57166c733f0", + "consensus_sequence_R1_name": "220546_S45_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "e03a1e702b7b0ff54bb57f95e47e99a2", + "consensus_sequence_R2_name": "220546_S45_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "127.0", + "fastq_r1": "220546_S45_R1_001.fastq.gz", + "fastq_r2": "220546_S45_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "19218.42", + "number_of_base_pairs_sequenced": "582380", + "number_of_variants_AF_greater_75percent": "47", + "number_of_variants_with_effect": "33", + "per_Ns": "19,22", + "per_genome_greater_10x": "81,0", + "per_reads_host": "13,78", + "per_reads_virus": "85,43", + "per_unmapped": "0,783104", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "257054", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220546", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "47" + }, + "220599": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30402", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "5a476df56ca10e519da0937245b5f843", + "consensus_sequence_R1_name": "220599_S8_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "b0bf94bbd7290a0b2193025cda14dc02", + "consensus_sequence_R2_name": "220599_S8_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "6.0", + "fastq_r1": "220599_S8_R1_001.fastq.gz", + "fastq_r2": "220599_S8_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "56967.53", + "number_of_base_pairs_sequenced": "455576", + "number_of_variants_AF_greater_75percent": "19", + "number_of_variants_with_effect": "17", + "per_Ns": "56,97", + "per_genome_greater_10x": "43,0", + "per_reads_host": "78,98", + "per_reads_virus": "12,17", + "per_unmapped": "8,8507", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "189680", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220599", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "19" + }, + "220600": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "0", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "721c349a644be2b184dc599c70b9d286", + "consensus_sequence_R1_name": "220600_S9_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "ba22af6782ec196a43e76cec80a36741", + "consensus_sequence_R2_name": "220600_S9_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "220600_S9_R1_001.fastq.gz", + "fastq_r2": "220600_S9_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "nan", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "nan", + "number_of_base_pairs_sequenced": "593296", + "number_of_variants_AF_greater_75percent": "0", + "number_of_variants_with_effect": "0", + "per_Ns": "nan", + "per_genome_greater_10x": "nan", + "per_reads_host": "90,28", + "per_reads_virus": "0,03", + "per_unmapped": "9,68751", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "245316", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220600", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "0" + }, + "220601": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30374", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "d6bd2082fb4fe550b1627c5b534776ec", + "consensus_sequence_R1_name": "220601_S10_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "f395c916a68376183577b7810af918bc", + "consensus_sequence_R2_name": "220601_S10_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "798.0", + "fastq_r1": "220601_S10_R1_001.fastq.gz", + "fastq_r2": "220601_S10_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "2972.29", + "number_of_base_pairs_sequenced": "689656", + "number_of_variants_AF_greater_75percent": "59", + "number_of_variants_with_effect": "42", + "per_Ns": "2,97", + "per_genome_greater_10x": "97,0", + "per_reads_host": "9,11", + "per_reads_virus": "90,49", + "per_unmapped": "0,394747", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "296392", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220601", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "59" + }, + "220624": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "0", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "a997404b1926f1bd2330d95e4092bf06", + "consensus_sequence_R1_name": "220603_S11_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "aed63e4ee67de18ce6ff50c0a2fa3674", + "consensus_sequence_R2_name": "220603_S11_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "nan", + "fastq_r1": "220624_S46_R1_001.fastq.gz", + "fastq_r2": "220624_S46_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "nan", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "nan", + "number_of_base_pairs_sequenced": "600", + "number_of_variants_AF_greater_75percent": "0", + "number_of_variants_with_effect": "0", + "per_Ns": "nan", + "per_genome_greater_10x": "nan", + "per_reads_host": "13,64", + "per_reads_virus": "77,27", + "per_unmapped": "9,09091", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "132", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220624", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "0" + }, + "220625": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "f867c8e39ff543445b4bbf1ca774e1ac", + "consensus_sequence_R1_name": "220624_S46_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "d0cd564bec34963ef961abfc7e9206fa", + "consensus_sequence_R2_name": "220624_S46_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "25.0", + "fastq_r1": "220625_S47_R1_001.fastq.gz", + "fastq_r2": "220625_S47_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "33264.19", + "number_of_base_pairs_sequenced": "700960", + "number_of_variants_AF_greater_75percent": "35", + "number_of_variants_with_effect": "25", + "per_Ns": "33,26", + "per_genome_greater_10x": "67,0", + "per_reads_host": "29,79", + "per_reads_virus": "61,09", + "per_unmapped": "9,12288", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "295422", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220625", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "35" + }, + "220626": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "e71e817a639c9a688ee281aff5d958e2", + "consensus_sequence_R1_name": "220625_S47_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "a2f2cd43085caa4b198b5b7ffa0b0a09", + "consensus_sequence_R2_name": "220625_S47_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "19.0", + "fastq_r1": "220626_S48_R1_001.fastq.gz", + "fastq_r2": "220626_S48_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "41391.19", + "number_of_base_pairs_sequenced": "650276", + "number_of_variants_AF_greater_75percent": "30", + "number_of_variants_with_effect": "21", + "per_Ns": "41,39", + "per_genome_greater_10x": "59,0", + "per_reads_host": "30,45", + "per_reads_virus": "65,49", + "per_unmapped": "4,05793", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "280118", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220626", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "30" + }, + "220627": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "6f8e8886752788c05b779ab4c2dae7f7", + "consensus_sequence_R1_name": "220626_S48_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "1c57c19841c6d25daa917135696dfabb", + "consensus_sequence_R2_name": "220626_S48_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "62.0", + "fastq_r1": "220627_S49_R1_001.fastq.gz", + "fastq_r2": "220627_S49_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "24468.01", + "number_of_base_pairs_sequenced": "645692", + "number_of_variants_AF_greater_75percent": "35", + "number_of_variants_with_effect": "22", + "per_Ns": "24,47", + "per_genome_greater_10x": "76,0", + "per_reads_host": "3,45", + "per_reads_virus": "95,74", + "per_unmapped": "0,804571", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "284748", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220627", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "35" + }, + "220628": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30393", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "8b56a2a1d181eefc5469790c16f5d175", + "consensus_sequence_R1_name": "220627_S49_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "e4fbdcea92c7e7e762c494bdec1216dc", + "consensus_sequence_R2_name": "220627_S49_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "303.0", + "fastq_r1": "220628_S50_R1_001.fastq.gz", + "fastq_r2": "220628_S50_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "10406.77", + "number_of_base_pairs_sequenced": "781184", + "number_of_variants_AF_greater_75percent": "49", + "number_of_variants_with_effect": "33", + "per_Ns": "10,41", + "per_genome_greater_10x": "90,0", + "per_reads_host": "1,54", + "per_reads_virus": "98,18", + "per_unmapped": "0,275407", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "346396", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220628", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "49" + }, + "220629": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "4c712e5ae5a5097aef16ab14123bff84", + "consensus_sequence_R1_name": "220628_S50_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "5ecbf9332f8f085bca8ff71095161d27", + "consensus_sequence_R2_name": "220628_S50_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "270.0", + "fastq_r1": "220629_S51_R1_001.fastq.gz", + "fastq_r2": "220629_S51_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.15.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "11175.05", + "number_of_base_pairs_sequenced": "737532", + "number_of_variants_AF_greater_75percent": "55", + "number_of_variants_with_effect": "40", + "per_Ns": "11,18", + "per_genome_greater_10x": "89,0", + "per_reads_host": "2,90", + "per_reads_virus": "96,75", + "per_unmapped": "0,349633", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "322910", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220629", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "55" + }, + "220630": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "5f7f51e074fc61cc678ef706634dc2f2", + "consensus_sequence_R1_name": "220629_S51_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "c243d5d71e4dab63d4b5e5399c9336ae", + "consensus_sequence_R2_name": "220629_S51_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "25.0", + "fastq_r1": "220630_S52_R1_001.fastq.gz", + "fastq_r2": "220630_S52_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "34692.85", + "number_of_base_pairs_sequenced": "972160", + "number_of_variants_AF_greater_75percent": "34", + "number_of_variants_with_effect": "25", + "per_Ns": "34,69", + "per_genome_greater_10x": "65,0", + "per_reads_host": "22,45", + "per_reads_virus": "73,86", + "per_unmapped": "3,69838", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "415182", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220630", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "34" + }, + "220631": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "bfaa0e4aef054c0bb369f935278e8364", + "consensus_sequence_R1_name": "220630_S52_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "df070c9a8e583211f198bc67bb16b3bc", + "consensus_sequence_R2_name": "220630_S52_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "81.0", + "fastq_r1": "220631_S53_R1_001.fastq.gz", + "fastq_r2": "220631_S53_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "28007.9", + "number_of_base_pairs_sequenced": "672640", + "number_of_variants_AF_greater_75percent": "34", + "number_of_variants_with_effect": "23", + "per_Ns": "28,01", + "per_genome_greater_10x": "72,0", + "per_reads_host": "41,16", + "per_reads_virus": "57,7", + "per_unmapped": "1,14487", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "295142", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220631", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "34" + }, + "220633": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30395", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "3286b5c58719476e8832de107c366bc4", + "consensus_sequence_R1_name": "220631_S53_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "ca6a1e4503d3892e60f86b24ac26e7c1", + "consensus_sequence_R2_name": "220631_S53_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "122.0", + "fastq_r1": "220633_S54_R1_001.fastq.gz", + "fastq_r2": "220633_S54_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "AY.127", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "18918.92", + "number_of_base_pairs_sequenced": "619768", + "number_of_variants_AF_greater_75percent": "47", + "number_of_variants_with_effect": "31", + "per_Ns": "18,92", + "per_genome_greater_10x": "81,0", + "per_reads_host": "26,51", + "per_reads_virus": "72,86", + "per_unmapped": "0,632422", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "274184", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220633", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "47" + }, + "220634": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "893f9aa89c1cb03e02b7caf66c71ca80", + "consensus_sequence_R1_name": "220633_S54_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "33c3e32357450b511a5657b0c22aa9fb", + "consensus_sequence_R2_name": "220633_S54_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "168.0", + "fastq_r1": "220634_S55_R1_001.fastq.gz", + "fastq_r2": "220634_S55_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "17170.77", + "number_of_base_pairs_sequenced": "993232", + "number_of_variants_AF_greater_75percent": "43", + "number_of_variants_with_effect": "31", + "per_Ns": "17,17", + "per_genome_greater_10x": "83,0", + "per_reads_host": "3,26", + "per_reads_virus": "96,03", + "per_unmapped": "0,711273", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "440478", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220634", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "43" + }, + "220635": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "53382ddf2ccadc39d6022e073a293c61", + "consensus_sequence_R1_name": "220634_S55_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "a49c009e9eeb68c46b8dafd0da05eadb", + "consensus_sequence_R2_name": "220634_S55_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "28.0", + "fastq_r1": "220635_S56_R1_001.fastq.gz", + "fastq_r2": "220635_S56_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "34870.18", + "number_of_base_pairs_sequenced": "872228", + "number_of_variants_AF_greater_75percent": "33", + "number_of_variants_with_effect": "23", + "per_Ns": "34,87", + "per_genome_greater_10x": "65,0", + "per_reads_host": "32,49", + "per_reads_virus": "61,03", + "per_unmapped": "6,48049", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "373722", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220635", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "33" + }, + "220636": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "211658e701aaf7fb483a9a2b64a0f917", + "consensus_sequence_R1_name": "220635_S56_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "51bf5c329fc240c8e8f1baefc37d5c1b", + "consensus_sequence_R2_name": "220635_S56_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "36.0", + "fastq_r1": "220636_S57_R1_001.fastq.gz", + "fastq_r2": "220636_S57_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "29841.41", + "number_of_base_pairs_sequenced": "778600", + "number_of_variants_AF_greater_75percent": "40", + "number_of_variants_with_effect": "29", + "per_Ns": "29,84", + "per_genome_greater_10x": "70,0", + "per_reads_host": "20,28", + "per_reads_virus": "74,28", + "per_unmapped": "5,43445", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "333686", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220636", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "40" + }, + "220637": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "1284ba20e1bee6a584d3f10042ffc38b", + "consensus_sequence_R1_name": "220636_S57_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "72334caba85e114c076aef467e6987e1", + "consensus_sequence_R2_name": "220636_S57_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "29.0", + "fastq_r1": "220637_S58_R1_001.fastq.gz", + "fastq_r2": "220637_S58_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "33481.66", + "number_of_base_pairs_sequenced": "766164", + "number_of_variants_AF_greater_75percent": "37", + "number_of_variants_with_effect": "27", + "per_Ns": "33,48", + "per_genome_greater_10x": "67,0", + "per_reads_host": "5,43", + "per_reads_virus": "74,84", + "per_unmapped": "19,7223", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "329688", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220637", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "37" + }, + "220638": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "a36aa48dd53b5938a160ac094fd753cb", + "consensus_sequence_R1_name": "220637_S58_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "1c7c1bb51da20c0e8c9661436f598cc2", + "consensus_sequence_R2_name": "220637_S58_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "245.0", + "fastq_r1": "220638_S59_R1_001.fastq.gz", + "fastq_r2": "220638_S59_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "13828.29", + "number_of_base_pairs_sequenced": "930584", + "number_of_variants_AF_greater_75percent": "46", + "number_of_variants_with_effect": "32", + "per_Ns": "13,83", + "per_genome_greater_10x": "86,0", + "per_reads_host": "2,46", + "per_reads_virus": "96,27", + "per_unmapped": "1,26541", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "412358", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220638", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "46" + }, + "220639": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30393", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "b6ecf898c0b6e15c7cded228718da4f8", + "consensus_sequence_R1_name": "220638_S59_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "d4d3b6325ef1ca17f6d36953b050f532", + "consensus_sequence_R2_name": "220638_S59_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "195.0", + "fastq_r1": "220639_S60_R1_001.fastq.gz", + "fastq_r2": "220639_S60_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "14942.8", + "number_of_base_pairs_sequenced": "630980", + "number_of_variants_AF_greater_75percent": "56", + "number_of_variants_with_effect": "38", + "per_Ns": "14,94", + "per_genome_greater_10x": "85,0", + "per_reads_host": "2,28", + "per_reads_virus": "97,46", + "per_unmapped": "0,266295", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "280140", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220639", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "56" + }, + "220640": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "fa2af2a995a99809496fd34602a16542", + "consensus_sequence_R1_name": "220639_S60_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "a89de5a74985de324670f41ef730b29b", + "consensus_sequence_R2_name": "220639_S60_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "22.0", + "fastq_r1": "220640_S61_R1_001.fastq.gz", + "fastq_r2": "220640_S61_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "35726.71", + "number_of_base_pairs_sequenced": "684808", + "number_of_variants_AF_greater_75percent": "31", + "number_of_variants_with_effect": "22", + "per_Ns": "35,73", + "per_genome_greater_10x": "64,0", + "per_reads_host": "10,69", + "per_reads_virus": "82,8", + "per_unmapped": "6,50954", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "293692", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220640", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "31" + }, + "220642": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "55f36a57fd7e621483b6653a84e6edad", + "consensus_sequence_R1_name": "220640_S61_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "40da6eac77dcd3c82f9531e3bc5f4e9c", + "consensus_sequence_R2_name": "220640_S61_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "41.0", + "fastq_r1": "220642_S62_R1_001.fastq.gz", + "fastq_r2": "220642_S62_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "28827.62", + "number_of_base_pairs_sequenced": "654296", + "number_of_variants_AF_greater_75percent": "35", + "number_of_variants_with_effect": "23", + "per_Ns": "28,83", + "per_genome_greater_10x": "71,0", + "per_reads_host": "10,65", + "per_reads_virus": "85,33", + "per_unmapped": "4,02172", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "283734", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220642", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "35" + }, + "220644": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30395", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "424e46a2427c6c8f7c80aae322f22bc3", + "consensus_sequence_R1_name": "220642_S62_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "cd7a5f03488794eeec2b42e3c5dd73dd", + "consensus_sequence_R2_name": "220642_S62_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "15.0", + "fastq_r1": "220644_S63_R1_001.fastq.gz", + "fastq_r2": "220644_S63_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.617.2", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "42353.49", + "number_of_base_pairs_sequenced": "675724", + "number_of_variants_AF_greater_75percent": "29", + "number_of_variants_with_effect": "17", + "per_Ns": "42,35", + "per_genome_greater_10x": "57,99999999999999", + "per_reads_host": "37,11", + "per_reads_virus": "58,29", + "per_unmapped": "4,60138", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "287392", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220644", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "29" + }, + "220646": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "727dcb0ceeef62ce68f9482188180ed8", + "consensus_sequence_R1_name": "220644_S63_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "68173f24d15e48a74b7b46a0937a5436", + "consensus_sequence_R2_name": "220644_S63_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "31.0", + "fastq_r1": "220646_S64_R1_001.fastq.gz", + "fastq_r2": "220646_S64_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "33424.79", + "number_of_base_pairs_sequenced": "771276", + "number_of_variants_AF_greater_75percent": "34", + "number_of_variants_with_effect": "25", + "per_Ns": "33,42", + "per_genome_greater_10x": "67,0", + "per_reads_host": "17,94", + "per_reads_virus": "78,47", + "per_unmapped": "3,58353", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "329228", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220646", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "34" + }, + "220647": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "ea407b7488895db5d210dcd5fa2e8e07", + "consensus_sequence_R1_name": "220646_S64_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "425f1969ebcdd626729c94668f6c9985", + "consensus_sequence_R2_name": "220646_S64_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "64.0", + "fastq_r1": "220647_S65_R1_001.fastq.gz", + "fastq_r2": "220647_S65_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "22356.8", + "number_of_base_pairs_sequenced": "854180", + "number_of_variants_AF_greater_75percent": "38", + "number_of_variants_with_effect": "26", + "per_Ns": "22,36", + "per_genome_greater_10x": "78,0", + "per_reads_host": "11,77", + "per_reads_virus": "86,52", + "per_unmapped": "1,70902", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "372318", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220647", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "38" + }, + "220648": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "93db59751dfb1685e149bfea5d1fa40d", + "consensus_sequence_R1_name": "220647_S65_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "dc234549b290f2649e7ae285aee6ff5b", + "consensus_sequence_R2_name": "220647_S65_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "170.0", + "fastq_r1": "220648_S66_R1_001.fastq.gz", + "fastq_r2": "220648_S66_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "14413.81", + "number_of_base_pairs_sequenced": "988352", + "number_of_variants_AF_greater_75percent": "51", + "number_of_variants_with_effect": "36", + "per_Ns": "14,41", + "per_genome_greater_10x": "86,0", + "per_reads_host": "3,72", + "per_reads_virus": "91,43", + "per_unmapped": "4,85191", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "433376", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220648", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "51" + }, + "220649": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "86dc347a67cbc91692e8a44be43a1c49", + "consensus_sequence_R1_name": "220648_S66_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "ca841d76f56036e00b663be25fe32313", + "consensus_sequence_R2_name": "220648_S66_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "13.0", + "fastq_r1": "220649_S67_R1_001.fastq.gz", + "fastq_r2": "220649_S67_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "45717.34", + "number_of_base_pairs_sequenced": "828056", + "number_of_variants_AF_greater_75percent": "27", + "number_of_variants_with_effect": "19", + "per_Ns": "45,72", + "per_genome_greater_10x": "54,0", + "per_reads_host": "50,23", + "per_reads_virus": "43,11", + "per_unmapped": "6,65581", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "343850", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220649", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "27" + }, + "220650": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "d26d54d5f3771b92405cd0bed11e22f4", + "consensus_sequence_R1_name": "220649_S67_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "d820aca88a3637fb44bb648d9a6a03c1", + "consensus_sequence_R2_name": "220649_S67_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "31.0", + "fastq_r1": "220650_S68_R1_001.fastq.gz", + "fastq_r2": "220650_S68_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "32119.91", + "number_of_base_pairs_sequenced": "845436", + "number_of_variants_AF_greater_75percent": "39", + "number_of_variants_with_effect": "28", + "per_Ns": "32,12", + "per_genome_greater_10x": "68,0", + "per_reads_host": "14,41", + "per_reads_virus": "83,7", + "per_unmapped": "1,88494", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "357146", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220650", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "39" + }, + "220651": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "bfda7de91c9f4a1b938e2fd8809cb37b", + "consensus_sequence_R1_name": "220650_S68_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "2b596c41ad86057f9b45c60e1e551139", + "consensus_sequence_R2_name": "220650_S68_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "66.0", + "fastq_r1": "220651_S69_R1_001.fastq.gz", + "fastq_r2": "220651_S69_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "27495.99", + "number_of_base_pairs_sequenced": "797860", + "number_of_variants_AF_greater_75percent": "36", + "number_of_variants_with_effect": "26", + "per_Ns": "27,50", + "per_genome_greater_10x": "73,0", + "per_reads_host": "6,10", + "per_reads_virus": "92,77", + "per_unmapped": "1,13609", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "345044", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220651", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "36" + }, + "220652": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "762264514323d8ba65ba3cf1b9ad7947", + "consensus_sequence_R1_name": "220651_S69_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "f9f7118d4eacdcff059f92a352e8f832", + "consensus_sequence_R2_name": "220651_S69_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "24.0", + "fastq_r1": "220652_S70_R1_001.fastq.gz", + "fastq_r2": "220652_S70_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "34803.27", + "number_of_base_pairs_sequenced": "915916", + "number_of_variants_AF_greater_75percent": "33", + "number_of_variants_with_effect": "24", + "per_Ns": "34,80", + "per_genome_greater_10x": "65,0", + "per_reads_host": "24,15", + "per_reads_virus": "72,05", + "per_unmapped": "3,80244", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "375890", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220652", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "33" + }, + "220653": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "73e730f342ec9788d715639c982d34be", + "consensus_sequence_R1_name": "220652_S70_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "aa16ec3d36d7b457696f81ddd489340b", + "consensus_sequence_R2_name": "220652_S70_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "44.0", + "fastq_r1": "220653_S71_R1_001.fastq.gz", + "fastq_r2": "220653_S71_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "27857.33", + "number_of_base_pairs_sequenced": "751236", + "number_of_variants_AF_greater_75percent": "39", + "number_of_variants_with_effect": "27", + "per_Ns": "27,86", + "per_genome_greater_10x": "72,0", + "per_reads_host": "7,98", + "per_reads_virus": "90,63", + "per_unmapped": "1,3858", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "324144", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220653", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "39" + }, + "220654": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30393", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "899d9eedeb862ed2b766b2ef38cdef89", + "consensus_sequence_R1_name": "220653_S71_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "d0613e93a129a7f935f44e9f8fe6dad7", + "consensus_sequence_R2_name": "220653_S71_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "8.0", + "fastq_r1": "220654_S72_R1_001.fastq.gz", + "fastq_r2": "220654_S72_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "52462.03", + "number_of_base_pairs_sequenced": "664256", + "number_of_variants_AF_greater_75percent": "25", + "number_of_variants_with_effect": "19", + "per_Ns": "52,46", + "per_genome_greater_10x": "48,0", + "per_reads_host": "43,90", + "per_reads_virus": "42,63", + "per_unmapped": "13,4672", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "281982", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220654", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "25" + }, + "220655": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "1cd9636649fce7e6f8172fc4696362b6", + "consensus_sequence_R1_name": "220654_S72_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "43eabe0ccbc64a6762f07691b6cb9b78", + "consensus_sequence_R2_name": "220654_S72_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "57.0", + "fastq_r1": "220655_S73_R1_001.fastq.gz", + "fastq_r2": "220655_S73_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "23705.17", + "number_of_base_pairs_sequenced": "600872", + "number_of_variants_AF_greater_75percent": "39", + "number_of_variants_with_effect": "27", + "per_Ns": "23,71", + "per_genome_greater_10x": "76,0", + "per_reads_host": "7,34", + "per_reads_virus": "91,34", + "per_unmapped": "1,32183", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "264482", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220655", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "39" + }, + "220656": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "cb1756e84c0c854814aa8ccfa6ef9965", + "consensus_sequence_R1_name": "220655_S73_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "c1c5d439d06d28839122cee8ec2a8a26", + "consensus_sequence_R2_name": "220655_S73_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "144.0", + "fastq_r1": "220656_S74_R1_001.fastq.gz", + "fastq_r2": "220656_S74_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "15919.43", + "number_of_base_pairs_sequenced": "786352", + "number_of_variants_AF_greater_75percent": "50", + "number_of_variants_with_effect": "36", + "per_Ns": "15,92", + "per_genome_greater_10x": "84,0", + "per_reads_host": "2,45", + "per_reads_virus": "97,21", + "per_unmapped": "0,340416", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "342522", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220656", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "50" + }, + "220657": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "c582439856aaa10e58cde21b07323dbe", + "consensus_sequence_R1_name": "220656_S74_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "d4645048ab66627ec855d1231b4207a6", + "consensus_sequence_R2_name": "220656_S74_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "7.0", + "fastq_r1": "220657_S75_R1_001.fastq.gz", + "fastq_r2": "220657_S75_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "55263.07", + "number_of_base_pairs_sequenced": "818228", + "number_of_variants_AF_greater_75percent": "24", + "number_of_variants_with_effect": "17", + "per_Ns": "55,26", + "per_genome_greater_10x": "45,0", + "per_reads_host": "41,91", + "per_reads_virus": "54,61", + "per_unmapped": "3,4863", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "342082", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220657", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "24" + }, + "220658": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "2cd60ea9dc7358578f2b11f3e2712898", + "consensus_sequence_R1_name": "220657_S75_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "4de125f0f89a02caf060259ff7ba97fd", + "consensus_sequence_R2_name": "220657_S75_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "18.0", + "fastq_r1": "220658_S76_R1_001.fastq.gz", + "fastq_r2": "220658_S76_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "41141.25", + "number_of_base_pairs_sequenced": "675544", + "number_of_variants_AF_greater_75percent": "28", + "number_of_variants_with_effect": "21", + "per_Ns": "41,14", + "per_genome_greater_10x": "59,0", + "per_reads_host": "16,85", + "per_reads_virus": "80,7", + "per_unmapped": "2,44736", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "289618", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220658", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "28" + }, + "220659": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30377", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "b3c9abf04e41fb878ebde52383f27cd1", + "consensus_sequence_R1_name": "220658_S76_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "b8d3fb213057c4d1f909e187789ad8f1", + "consensus_sequence_R2_name": "220658_S76_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "26.0", + "fastq_r1": "220659_S77_R1_001.fastq.gz", + "fastq_r2": "220659_S77_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "32347.13", + "number_of_base_pairs_sequenced": "668900", + "number_of_variants_AF_greater_75percent": "36", + "number_of_variants_with_effect": "26", + "per_Ns": "32,35", + "per_genome_greater_10x": "68,0", + "per_reads_host": "22,53", + "per_reads_virus": "75,32", + "per_unmapped": "2,15929", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "288798", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220659", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "36" + }, + "220660": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "efbc2790b3cf0ff305a2c6c949c3101c", + "consensus_sequence_R1_name": "220659_S77_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "12d26a620109b24172e517a03edca796", + "consensus_sequence_R2_name": "220659_S77_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "127.0", + "fastq_r1": "220660_S78_R1_001.fastq.gz", + "fastq_r2": "220660_S78_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "18810.22", + "number_of_base_pairs_sequenced": "750556", + "number_of_variants_AF_greater_75percent": "43", + "number_of_variants_with_effect": "30", + "per_Ns": "18,81", + "per_genome_greater_10x": "81,0", + "per_reads_host": "4,75", + "per_reads_virus": "94,72", + "per_unmapped": "0,530311", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "325092", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220660", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "43" + }, + "220661": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "5da90c106b6c5791dc1058da6fb88d58", + "consensus_sequence_R1_name": "220660_S78_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "fae8c6af23e3fac5a9a448712c4613b6", + "consensus_sequence_R2_name": "220660_S78_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "129.0", + "fastq_r1": "220661_S79_R1_001.fastq.gz", + "fastq_r2": "220661_S79_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "16464.8", + "number_of_base_pairs_sequenced": "711320", + "number_of_variants_AF_greater_75percent": "43", + "number_of_variants_with_effect": "28", + "per_Ns": "16,46", + "per_genome_greater_10x": "84,0", + "per_reads_host": "8,35", + "per_reads_virus": "90,28", + "per_unmapped": "1,36789", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "314206", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220661", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "43" + }, + "220663": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "507dba14ebad10c2c2e697bd506b3b17", + "consensus_sequence_R1_name": "220661_S79_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "00762bb19883db46289ef9047e9aa662", + "consensus_sequence_R2_name": "220661_S79_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "146.0", + "fastq_r1": "220663_S80_R1_001.fastq.gz", + "fastq_r2": "220663_S80_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "18592.75", + "number_of_base_pairs_sequenced": "870740", + "number_of_variants_AF_greater_75percent": "47", + "number_of_variants_with_effect": "32", + "per_Ns": "18,59", + "per_genome_greater_10x": "81,0", + "per_reads_host": "4,62", + "per_reads_virus": "94,78", + "per_unmapped": "0,600517", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "377508", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220663", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "47" + }, + "220664": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30393", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "b2f14cba5c1a9c8b2ed589afb69238f3", + "consensus_sequence_R1_name": "220663_S80_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "8325daa2cb930e554c1a576ae529555b", + "consensus_sequence_R2_name": "220663_S80_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "4.0", + "fastq_r1": "220664_S81_R1_001.fastq.gz", + "fastq_r2": "220664_S81_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "58981.74", + "number_of_base_pairs_sequenced": "897896", + "number_of_variants_AF_greater_75percent": "28", + "number_of_variants_with_effect": "23", + "per_Ns": "58,98", + "per_genome_greater_10x": "41,0", + "per_reads_host": "61,73", + "per_reads_virus": "28,58", + "per_unmapped": "9,68748", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "367268", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220664", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "28" + }, + "220665": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "2b7fca4ad9e1cf5b4d719337538b2ece", + "consensus_sequence_R1_name": "220664_S81_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "43d5cb91ffc5575a15a276c22eaf8db8", + "consensus_sequence_R2_name": "220664_S81_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "230.0", + "fastq_r1": "220665_S82_R1_001.fastq.gz", + "fastq_r2": "220665_S82_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "13497.06", + "number_of_base_pairs_sequenced": "708956", + "number_of_variants_AF_greater_75percent": "51", + "number_of_variants_with_effect": "38", + "per_Ns": "13,50", + "per_genome_greater_10x": "87,0", + "per_reads_host": "2,17", + "per_reads_virus": "97,48", + "per_unmapped": "0,349256", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "310374", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220665", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "51" + }, + "220666": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "c79e735d65c1122bcb54e63adf003e98", + "consensus_sequence_R1_name": "220665_S82_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "a4cf0e9a3dbdefd46a928cf18acf5f08", + "consensus_sequence_R2_name": "220665_S82_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "47.0", + "fastq_r1": "220666_S83_R1_001.fastq.gz", + "fastq_r2": "220666_S83_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "28329.1", + "number_of_base_pairs_sequenced": "787408", + "number_of_variants_AF_greater_75percent": "38", + "number_of_variants_with_effect": "26", + "per_Ns": "28,33", + "per_genome_greater_10x": "72,0", + "per_reads_host": "7,00", + "per_reads_virus": "86,58", + "per_unmapped": "6,41453", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "335192", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220666", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "38" + }, + "220667": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30389", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "9a60ceb8c32a4d67947c1ffcb3334223", + "consensus_sequence_R1_name": "220666_S83_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "f11417d562a870b154d8034c60cb997f", + "consensus_sequence_R2_name": "220666_S83_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "4.0", + "fastq_r1": "220667_S84_R1_001.fastq.gz", + "fastq_r2": "220667_S84_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.617.2", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "59956.51", + "number_of_base_pairs_sequenced": "597444", + "number_of_variants_AF_greater_75percent": "21", + "number_of_variants_with_effect": "14", + "per_Ns": "59,96", + "per_genome_greater_10x": "40,0", + "per_reads_host": "56,16", + "per_reads_virus": "38,52", + "per_unmapped": "5,31613", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "240438", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220667", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "21" + }, + "220668": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "2791ff16b7e94f806b2c00442890112c", + "consensus_sequence_R1_name": "220667_S84_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "fe3a55379689e22cf111e1287c32f45f", + "consensus_sequence_R2_name": "220667_S84_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "127.0", + "fastq_r1": "220668_S85_R1_001.fastq.gz", + "fastq_r2": "220668_S85_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "18867.1", + "number_of_base_pairs_sequenced": "713060", + "number_of_variants_AF_greater_75percent": "49", + "number_of_variants_with_effect": "31", + "per_Ns": "18,87", + "per_genome_greater_10x": "81,0", + "per_reads_host": "2,04", + "per_reads_virus": "97,56", + "per_unmapped": "0,400449", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "310152", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220668", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "49" + }, + "220669": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30393", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "87479e96b34106748b1cb342dce3d24e", + "consensus_sequence_R1_name": "220668_S85_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "01fc8610639039fae3a5039aa0f8ce3e", + "consensus_sequence_R2_name": "220668_S85_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "15.0", + "fastq_r1": "220669_S86_R1_001.fastq.gz", + "fastq_r2": "220669_S86_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "42413.19", + "number_of_base_pairs_sequenced": "644744", + "number_of_variants_AF_greater_75percent": "33", + "number_of_variants_with_effect": "23", + "per_Ns": "42,41", + "per_genome_greater_10x": "57,99999999999999", + "per_reads_host": "25,63", + "per_reads_virus": "68,56", + "per_unmapped": "5,8085", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "259344", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220669", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "33" + }, + "220670": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "0374abb4d11b9bc701faf6e20e0b39b9", + "consensus_sequence_R1_name": "220669_S86_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "866f493a5fca69d1219839e25a2c59c1", + "consensus_sequence_R2_name": "220669_S86_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "32.0", + "fastq_r1": "220670_S87_R1_001.fastq.gz", + "fastq_r2": "220670_S87_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "32002.81", + "number_of_base_pairs_sequenced": "754608", + "number_of_variants_AF_greater_75percent": "36", + "number_of_variants_with_effect": "25", + "per_Ns": "32,00", + "per_genome_greater_10x": "68,0", + "per_reads_host": "16,25", + "per_reads_virus": "81,76", + "per_unmapped": "1,99641", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "310808", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220670", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "36" + }, + "220671": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30386", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "ce29b2a53257a6682ca4d29590fb1397", + "consensus_sequence_R1_name": "220670_S87_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "2cb2bfa6bdd8d37e073ce386a51eb222", + "consensus_sequence_R2_name": "220670_S87_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "90.0", + "fastq_r1": "220671_S88_R1_001.fastq.gz", + "fastq_r2": "220671_S88_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "21514.37", + "number_of_base_pairs_sequenced": "933080", + "number_of_variants_AF_greater_75percent": "43", + "number_of_variants_with_effect": "31", + "per_Ns": "21,51", + "per_genome_greater_10x": "78,0", + "per_reads_host": "10,42", + "per_reads_virus": "88,29", + "per_unmapped": "1,29158", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "410196", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220671", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "43" + }, + "220672": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "1901c8ee421c8ff3a461e73b9990548d", + "consensus_sequence_R1_name": "220671_S88_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "63075cf1ff8543ed1f685c34f145af53", + "consensus_sequence_R2_name": "220671_S88_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "161.0", + "fastq_r1": "220672_S89_R1_001.fastq.gz", + "fastq_r2": "220672_S89_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.17", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "17224.3", + "number_of_base_pairs_sequenced": "670560", + "number_of_variants_AF_greater_75percent": "45", + "number_of_variants_with_effect": "33", + "per_Ns": "17,22", + "per_genome_greater_10x": "83,0", + "per_reads_host": "3,41", + "per_reads_virus": "96,0", + "per_unmapped": "0,594324", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "281328", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220672", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "45" + }, + "220675": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30381", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "f853f727c6c77062f9f0eb388b975cac", + "consensus_sequence_R1_name": "220672_S89_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "9508309f8b2d201cb8fcd9a96e4592b2", + "consensus_sequence_R2_name": "220672_S89_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "35.0", + "fastq_r1": "220675_S90_R1_001.fastq.gz", + "fastq_r2": "220675_S90_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "30192.09", + "number_of_base_pairs_sequenced": "689132", + "number_of_variants_AF_greater_75percent": "37", + "number_of_variants_with_effect": "23", + "per_Ns": "30,19", + "per_genome_greater_10x": "70,0", + "per_reads_host": "11,63", + "per_reads_virus": "84,1", + "per_unmapped": "4,26784", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "286140", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220675", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "37" + }, + "220677": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "21c8ef96c84e40fecfbb3749e28b759c", + "consensus_sequence_R1_name": "220675_S90_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "dad21b92354af748bacf678f6ad1c6a6", + "consensus_sequence_R2_name": "220675_S90_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "12.0", + "fastq_r1": "220677_S91_R1_001.fastq.gz", + "fastq_r2": "220677_S91_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "46409.34", + "number_of_base_pairs_sequenced": "756360", + "number_of_variants_AF_greater_75percent": "30", + "number_of_variants_with_effect": "22", + "per_Ns": "46,41", + "per_genome_greater_10x": "54,0", + "per_reads_host": "33,44", + "per_reads_virus": "60,25", + "per_unmapped": "6,3085", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "310914", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220677", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "30" + }, + "220678": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "9a56b40a023af7395a3610284425d3d2", + "consensus_sequence_R1_name": "220677_S91_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "0bcc1af3d0663e0cfa0edfedd533155e", + "consensus_sequence_R2_name": "220677_S91_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "47.0", + "fastq_r1": "220678_S92_R1_001.fastq.gz", + "fastq_r2": "220678_S92_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "29038.41", + "number_of_base_pairs_sequenced": "714064", + "number_of_variants_AF_greater_75percent": "38", + "number_of_variants_with_effect": "26", + "per_Ns": "29,04", + "per_genome_greater_10x": "71,0", + "per_reads_host": "11,96", + "per_reads_virus": "86,75", + "per_unmapped": "1,29275", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "305936", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220678", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "38" + }, + "220679": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "bf9a3a7ec821fcf98a3e7383150175ed", + "consensus_sequence_R1_name": "220678_S92_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "a2bd2aafc26b8ff21e95dfea74c4e38f", + "consensus_sequence_R2_name": "220678_S92_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "98.0", + "fastq_r1": "220679_S93_R1_001.fastq.gz", + "fastq_r2": "220679_S93_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "BA.1.1.1", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "19021.01", + "number_of_base_pairs_sequenced": "814408", + "number_of_variants_AF_greater_75percent": "42", + "number_of_variants_with_effect": "30", + "per_Ns": "19,02", + "per_genome_greater_10x": "81,0", + "per_reads_host": "5,73", + "per_reads_virus": "93,41", + "per_unmapped": "0,859727", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "346738", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220679", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "42" + }, + "220680": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "4f253c4b5432f2ab3e446190703da03b", + "consensus_sequence_R1_name": "220679_S93_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "f4b182052d1fb7f0690719572d5e37d3", + "consensus_sequence_R2_name": "220679_S93_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "63.0", + "fastq_r1": "220680_S94_R1_001.fastq.gz", + "fastq_r2": "220680_S94_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "24712.26", + "number_of_base_pairs_sequenced": "632376", + "number_of_variants_AF_greater_75percent": "40", + "number_of_variants_with_effect": "27", + "per_Ns": "24,71", + "per_genome_greater_10x": "75,0", + "per_reads_host": "12,07", + "per_reads_virus": "84,28", + "per_unmapped": "3,65682", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "276880", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220680", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "40" + }, + "220684": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30401", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "10528a48df6f6310d5b53fa402a9446f", + "consensus_sequence_R1_name": "220680_S94_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "994e273a5f941151c52727fa27732e03", + "consensus_sequence_R2_name": "220680_S94_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "11.0", + "fastq_r1": "220684_S95_R1_001.fastq.gz", + "fastq_r2": "220684_S95_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "0", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "49070.3", + "number_of_base_pairs_sequenced": "889808", + "number_of_variants_AF_greater_75percent": "26", + "number_of_variants_with_effect": "21", + "per_Ns": "49,07", + "per_genome_greater_10x": "51,0", + "per_reads_host": "56,51", + "per_reads_virus": "38,03", + "per_unmapped": "5,45281", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "363996", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220684", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "26" + }, + "220685": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30387", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "87a281cb1cc7a5f246aeaf5ed33b2d3f", + "consensus_sequence_R1_name": "220684_S95_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "be0059af911542a8329249cc6fd4b50e", + "consensus_sequence_R2_name": "220684_S95_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "63.0", + "fastq_r1": "220685_S96_R1_001.fastq.gz", + "fastq_r2": "220685_S96_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "0", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "Unassigned", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "25046.84", + "number_of_base_pairs_sequenced": "756012", + "number_of_variants_AF_greater_75percent": "35", + "number_of_variants_with_effect": "25", + "per_Ns": "25,05", + "per_genome_greater_10x": "75,0", + "per_reads_host": "9,21", + "per_reads_virus": "89,28", + "per_unmapped": "1,51439", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "327920", + "reference_genome_accession": "NC_045512.2", + "sample_name": "220685", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "35" + } +} \ No newline at end of file diff --git a/relecov_tools/example_data/upload_bioinfo_metadata/bioinfo_metadata_test.json b/relecov_tools/example_data/upload_bioinfo_metadata/bioinfo_metadata_test.json new file mode 100644 index 00000000..a0aa0203 --- /dev/null +++ b/relecov_tools/example_data/upload_bioinfo_metadata/bioinfo_metadata_test.json @@ -0,0 +1,58 @@ +{ + "214821": { + "analysis_date": "2022-05-12 14:55:38.607792", + "assembly": "None", + "assembly_params": "None", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "bioinformatics_protocol_software_version": "2.4.1", + "commercial_open_source_both": "open-source", + "consensus_genome_length": "30396", + "consensus_params": "-p vcf -f", + "consensus_sequence_R1_md5": "dbcc703ccb7da3002fee6c0486199009", + "consensus_sequence_R1_name": "214821_S12_R1_001.fastq.gz", + "consensus_sequence_R2_md5": "b76fba963664b532004c4ce7153ae14f", + "consensus_sequence_R2_name": "214821_S12_R2_001.fastq.gz", + "consensus_sequence_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "consensus_sequence_software_version": "1.14", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "dehosting_method_software_version": "2.1.2", + "depth_of_coverage_threshold": ">10x", + "depth_of_coverage_value": "287.0", + "fastq_r1": "214821_S12_R1_001.fastq.gz", + "fastq_r2": "214821_S12_R2_001.fastq.gz", + "if_assembly_other": "None", + "if_bioinformatic_protocol_is_other_specify": "None", + "if_consensus_other": "None", + "if_lineage_identification_other": "None", + "if_mapping_other": "None", + "if_preprocessing_other": "None", + "lineage_analysis_software_name": "pangolin", + "lineage_analysis_software_version": "4.0.6", + "lineage_identification_date": "2022-05-12 14:55:38.607792", + "lineage_name": "B.1.177", + "long_table_path": "/data/UCCT_Relecov/COD-2100-MAD-CNM/20220208", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "mapping_software_version": "2.4.4", + "ns_per_100_kbp": "8442.32", + "number_of_base_pairs_sequenced": "633884", + "number_of_variants_AF_greater_75percent": "19", + "number_of_variants_with_effect": "9", + "per_Ns": "8,44", + "per_genome_greater_10x": "92,0", + "per_reads_host": "1,08", + "per_reads_virus": "98,54", + "per_unmapped": "0,380247", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "preprocessing_software_version": "0.23.2", + "qc_filtered": "285604", + "reference_genome_accession": "NC_045512.2", + "sample_name": "214821", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS", + "variant_calling_software_version": "1.3.1", + "variant_designation": "19" + } +} diff --git a/relecov_tools/example_data/upload_bioinfo_metadata/metadata_lab_test.json b/relecov_tools/example_data/upload_bioinfo_metadata/metadata_lab_test.json new file mode 100644 index 00000000..01bc3b8f --- /dev/null +++ b/relecov_tools/example_data/upload_bioinfo_metadata/metadata_lab_test.json @@ -0,0 +1,84 @@ +[ + { + "amplicon_protocol": "ARTIC", + "amplicon_version": "ARTIC v3", + "analysis_authors": "", + "anatomical_material": "Nasopharyngeal exudate", + "author_submitter": "", + "authors": "A. Monzón; F. Casas; I. Jiménez; M. Camarero; P. Zaballos; ROMAN SOTO; S. Cuesta, I.; S. Iglesias-Caballero; S. Pozo; S. Varona; SERGIO; Sandonís; V. Vázquez-Morón", + "biosample_accession_ENA": "", + "collecting_institution": "Hospital Clínico Universitario Virgen de la Arrixaca", + "collecting_institution_address": "Ctra. Madrid-Cartagena, s/n, El Palmar", + "collecting_institution_email": "", + "collecting_lab_sample_id": "16065902", + "collection_device": "", + "collector_name": "", + "common_name": "Severe acute respiratory syndrome", + "design_description": "Design Description", + "diagnostic_pcr_Ct_value_1": "18", + "diagnostic_pcr_Ct_value_2": "", + "enrichment_protocol": "Amplicon", + "environmental_material": "", + "environmental_site": "Swab", + "experiment_alias": "NOT_FOUND", + "experiment_title": "Example project for ENA submission RELECOV", + "fastq_r1": "2018086_R1.fastq.gz", + "fastq_r1_md5": "eab8b05ef27f4f5cba5cddf6ad627de2", + "fastq_r2": "2018086_R2.fastq.gz", + "fastq_r2_md5": "d82a37aa970df2b8bf8f547ca7c18ac8", + "flowcell_kit": "", + "gene_name_1": "ORF E", + "gene_name_2": "", + "geo_loc_city": "Murcia", + "geo_loc_country": "Spain", + "geo_loc_latitude": "37.9861", + "geo_loc_longitude": "-1.1303", + "geo_loc_state": "Murcia", + "gisaid_id": "EPI_ISL_862545", + "host_age": "26", + "host_common_name": "Human", + "host_disease": "COVID-19", + "host_gender": "Hombre", + "host_scientific_name": "Homo Sapiens", + "if_amplicon_protocol_if_other_especify": "", + "if_enrichment_protocol_is_other_specify": "", + "isolate_sample_id": "16065902", + "library_kit": "", + "library_layout": "Paired", + "library_preparation_kit": "Illumina DNA Prep", + "library_selection": "PCR", + "library_source": "Viral RNA", + "library_strategy": "Amplicon", + "microbiology_lab_sample_id": "2018086", + "number_of_samples_in_run": "60", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "public_health_sample_id_sivies": "No consta", + "purpose_sampling": "Surveillance", + "r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720", + "rna_extraction_protocol": "RT-PCR", + "runID": "MiSeaq_GN_195", + "run_alias": "NOT_FOUND.fastq.gz", + "sample_collection_date": "2020-12-20", + "sample_description": "Sample for surveillance", + "sample_received_date": "2020-12-22", + "sample_storage_conditions": "-80 C", + "sequence_file_R1_fastq": "2018086_R1.fastq.gz", + "sequence_file_R2_fastq": "2018086_R2.fastq.gz", + "sequencing_date": "2021-01-13", + "sequencing_instrument_model": "Illumina MiSeq", + "sequencing_instrument_platform": "Illumina", + "sequencing_sample_id": "2018086", + "study_alias": "RELECOV", + "study_id": "ERP137164", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N,", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "202074288", + "tax_id": "2697049", + "type": "betacoronavirus", + "virus_name": "" + } +] diff --git a/relecov_tools/gisaid_upload.py b/relecov_tools/gisaid_upload.py new file mode 100644 index 00000000..6e4fd346 --- /dev/null +++ b/relecov_tools/gisaid_upload.py @@ -0,0 +1,297 @@ +import logging + +# from pyparsing import col +import rich.console +import sys +import pandas as pd +import os + +import relecov_tools.utils +from Bio import SeqIO +from relecov_tools.config_json import ConfigJson + + +# import site + + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class GisaidUpload: + def __init__( + self, + user=None, + passwd=None, + client_id=None, + token=None, + gisaid_json=None, + fasta_path=None, + output_path=None, + frameshift=None, + proxy_config=None, + single=False, + gzip=False, + ): + if ( + token is None + ): # borrar comentario: solo si no existe el token necesita user, passwd y client_id + self.token = None + print("Token is not introduced, creating a new one...") + if user is None: + self.user = relecov_tools.utils.prompt_text( + msg="Enter your username defined in GISAID" + ) + else: + self.user = user + if passwd is None: + self.passwd = relecov_tools.utils.prompt_password( + msg="Enter your password to GISAID" + ) + else: + self.passwd = passwd + if client_id is None: + self.client_id = relecov_tools.utils.prompt_password( + msg="Enter your client-ID to GISAID. Email clisupport@gisaid.org to request client-ID" + ) + else: + self.client_id = client_id + else: + self.token = token + if gisaid_json is None: + self.gisaid_json = relecov_tools.utils.prompt_path( + msg="Select the GISAID json file to upload" + ) + else: + self.gisaid_json = gisaid_json + if output_path is None: + self.output_path = relecov_tools.utils.prompt_path( + msg="Select the folder to store the log files" + ) + else: + self.output_path = output_path + if fasta_path is None: + self.fasta_path = relecov_tools.utils.prompt_path( + msg="Select path to fasta file/s" + ) + else: + self.fasta_path = fasta_path + if frameshift is None: + self.frameshift = relecov_tools.utils.prompt_selection( + msg="Select frameshift notification", + choices=["catch_all", "catch_novel", "catch_none"], + ) + else: + self.frameshift = frameshift + # Add proxy settings: username:password@proxy:port (optional) + if proxy_config is None: + # borrar comentario: este mensaje no me convence + self.proxy_config = None + print("Proxy configuration is not set") + else: + self.proxy_config = proxy_config + self.single = single + self.gzip = gzip + + # Metadatos + + def complete_mand_fields(self, dataframe): + """Complete mandatory empty fields with 'unknown'""" + dataframe.loc[dataframe["covv_gender"] == "", "covv_gender"] = "unknown" + dataframe.loc[dataframe["covv_patient_age"] == "", "covv_patient_age"] = ( + "unknown" + ) + + authors = [authors_field for authors_field in dataframe["covv_authors"]] + if "" in authors or "unknown" in authors: + log.error("Invalid value for author. This field is required in full") + stderr.print( + "[red] Invalid value for authors. This field is required in full, 'unknown' is not allowed" + ) + sys.exit(1) + + dataframe.loc[dataframe["covv_subm_lab_addr"] == "", "covv_subm_lab_addr"] = ( + "unknown" + ) + dataframe.loc[dataframe["covv_subm_lab"] == "", "covv_subm_lab"] = "unknown" + dataframe.loc[dataframe["covv_orig_lab_addr"] == "", "covv_orig_lab_addr"] = ( + "unknown" + ) + dataframe.loc[dataframe["covv_orig_lab"] == "", "covv_orig_lab"] = "unknown" + dataframe.loc[dataframe["covv_patient_status"] == "", "covv_patient_status"] = ( + "unknown" + ) + dataframe.loc[dataframe["covv_type"] == "", "covv_type"] = "betacoronavirus" + dataframe.loc[dataframe["covv_passage"] == "", "covv_passage"] = "Original" + + config_json = ConfigJson() + gisaid_config = config_json.get_configuration("GISAID_configuration") + submitter_id = gisaid_config["submitter"] + dataframe.loc[dataframe["submitter"] == "", "submitter"] = submitter_id + + bioinfo_config = config_json.get_configuration("bioinfo_analysis") + assembly_method = bioinfo_config["fixed_values"][ + "bioinformatics_protocol_software_name" + ] + dataframe.loc[ + dataframe["covv_assembly_method"] == "", "covv_assembly_method" + ] = assembly_method + + return dataframe + + def metadata_to_csv(self): + """Transform metadata json to csv""" + data = relecov_tools.utils.read_json_file(self.gisaid_json) + df_data = pd.DataFrame(data) + + config_json = ConfigJson() + fields = config_json.get_configuration("gisaid_csv_headers") + col_df = list(df_data.columns) + for field in fields: + if field not in col_df: + df_data.insert(4, field, "") + + config_lab_json = ConfigJson() + lab_json_conf = config_lab_json.get_topic_data( + "lab_metadata", "laboratory_data" + ) + lab_json_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "conf", lab_json_conf["file"] + ) + lab_json = relecov_tools.utils.read_json_file(lab_json_file) + for lab in lab_json: + for i in range(len(df_data)): + if lab["collecting_institution"] == df_data["covv_orig_lab"][i]: + df_data["covv_location"][i] = " / ".join( + [ + "Europe", + lab["geo_loc_country"], + lab["geo_loc_state"], + lab["geo_loc_city"], + ] + ) + + df_data.replace("not provided", "unknown", inplace=True) + df_data_comp = self.complete_mand_fields(df_data) + df_data_path = os.path.join(self.output_path, "meta_gisaid.csv") + if not os.path.exists(self.output_path): + os.mkdir(self.output_path) + df_data_comp.to_csv(df_data_path, index=False) + return df_data_path + + # generar template con cli3 + # ADD TOKEN WARNING and file token .authtoken + # add bash from cli3 + """ + os.system( + "cli3 upload --database EpiCoV --token ./gisaid.authtoken --metadata gisaid_template.csv --fasta multi.fasta --frameshift (OPTIONAL, default: catch_all) --failed --proxy --log" + ) + cli3 upload + --database EpiCoV + --token ./gisaid.authtoken + --metadata gisaid_template.csv + --fasta multi.fasta + --frameshift (OPTIONAL, default: catch_all) + --failed default creates file failed.out where the failed records will be + --proxy + --log default creates file failed.out where the log will be ) + + """ + + def create_multifasta(self): + """Create multifasta from single fastas (if --single)""" + if self.single: + gather_fastas_path = os.path.join(self.fasta_path, "*.fa*") + if self.gzip: + os.system( + "zcat %s > %s/multifasta.fasta" + % (gather_fastas_path, self.output_path) + ) + else: + os.system( + "cat %s > %s/multifasta.fasta" + % (gather_fastas_path, self.output_path) + ) + multifasta = "%s/multifasta.fasta" % self.output_path + + else: + if self.gzip: + os.system( + "zcat %s > %s/multifasta.fasta" + % (self.fasta_path, self.output_path) + ) + multifasta = "%s/multifasta.fasta" % self.output_path + else: + multifasta = self.fasta_path + return multifasta + + def change_headers(self, multifasta): + """Transform multifasta ids/headers to GISAID format""" + data = relecov_tools.utils.read_json_file(self.gisaid_json) + virus_name = [name["covv_virus_name"] for name in data] + multi_gis_path = os.path.join( + self.output_path, "processed_multifasta_gisaid.fasta" + ) + with open(multifasta) as old_fasta, open(multi_gis_path, "a") as new_fasta: + records = SeqIO.parse(old_fasta, "fasta") + for record in records: + for name in virus_name: + if name.split("/")[-2].split("-")[-1] in record.id: + record.id = name + record.description = name + SeqIO.write(record, new_fasta, "fasta") + return multi_gis_path + + def cli3_auth(self): + """Create authenticate token""" + os.system( + "cli3 authenticate --username %s --password %s --client_id %s" + % (self.user, self.passwd, self.client_id) + ) + self.token = "gisaid.authtoken" + + def cli3_upload(self): + """Upload to GISAID""" + if self.proxy_config is None: + os.system( + "cli3 upload --token %s --metadata %s --fasta %s --frameshift %s" + % ( + self.token, + self.metadata_to_csv(), + self.change_headers(self.create_multifasta()), + self.frameshift, + ) + ) + else: + os.system( + "cli3 upload --token %s --metadata %s --fasta %s --frameshift %s --proxy %s" + % ( + self.token, + self.metadata_to_csv(), + self.change_headers(self.create_multifasta()), + self.frameshift, + self.proxy_config, + ) + ) + + def gisaid_upload(self): + """Upload to GISAID""" + if self.token is None: + self.cli3_auth() + self.cli3_upload() + + """" + Upload + Subir con cli3 + Token + Opción de configurar proxy + def upload(self): + Create the required files and upload to ENA + self.convert_input_json_to_ena() + self.create_structure_to_ena() + """ diff --git a/relecov_tools/institution_scripts/ISCIII.py b/relecov_tools/institution_scripts/ISCIII.py new file mode 100644 index 00000000..17235c63 --- /dev/null +++ b/relecov_tools/institution_scripts/ISCIII.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +import sys + +import logging +import rich.console +import relecov_tools.utils + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +def replace_originating_lab(metadata, f_data, mapped_fields, heading): + """Replace the format text in the originating lab and replace by the ones + defined in the System + """ + for row in metadata[1:]: + for key, val in mapped_fields.items(): + meta_idx = heading.index(key) + try: + row[meta_idx] = f_data[row[meta_idx].strip()][val].strip() + except KeyError as e: + log.error("Value %s does not exist ", e) + stderr.print(f"[red] Value {e} does not exist") + sys.exit(1) + return metadata + + +def added_seq_inst_model(metadata, f_data, mapped_fields, heading): + """Set the type of sequencer instrument based on the run name""" + s_idx = heading.index("Sample ID given for sequencing") + for row in metadata[1:]: + for key, val in mapped_fields.items(): + m_idx = heading.index(key) + try: + run_name = f_data[str(row[s_idx])][val].lower() + except KeyError as e: + log.error("Value %s does not exist ", e) + stderr.print(f"[red] Value {e} does not exist") + sys.exit(1) + if "nextseq" in run_name: + row[m_idx] = "Illumina NextSeq 500" + elif "next_seq" in run_name: + row[m_idx] = "Illumina NextSeq 500" + elif "miseq" in run_name: + row[m_idx] = "Illumina MiSeq" + elif "miseaq" in run_name: + row[m_idx] = "Illumina MiSeq" + elif "novaseq" in run_name: + row[m_idx] = "Illumina NovaSeq 6000" + else: + log.error("Value %s is not defined in the mapping ", run_name) + stderr.print(f"[red] Value {run_name} is not defined in the mapping") + sys.exit(1) + return metadata + + +def translate_gender_to_english(metadata, f_data, mapped_fields, heading): + """Translate into english the host gender that is written in spanish""" + map_dict = { + "hombre": "Male", + "mujer": "Female", + "genero no-binario": "Non-binary Gender", + "genero no-binario": "Non-binary Gender", + "desconocido": "Not Provided", + "unknown": "Not Provided", + } + for row in metadata[1:]: + for key, val in mapped_fields.items(): + m_idx = heading.index(key) + if row[m_idx] is None or row[m_idx] == "": + row[m_idx] = "Not Provided" + continue + item = row[m_idx].lower() + if item in map_dict: + row[m_idx] = map_dict[item] + else: + log.error("The '%s' is not a valid data for translation", row[m_idx]) + stderr.print( + "f[red] The '{row[m_idx]}' is not a valid data for translation" + ) + sys.exit(1) + return metadata + + +def translate_specimen_source(metadata, f_data, mapped_fields, heading): + """Translate into english the "muestra" that is written in spanish""" + for row in metadata[1:]: + for key, val in mapped_fields.items(): + m_idx = heading.index(key) + if row[m_idx] is None: + row[m_idx] = "not provided" + elif "ASPIRADO NASOFARÍNGEO" in row[m_idx].upper(): + row[m_idx] = "Nasopharynx Aspiration" + elif "ASPIRADO BRONQUIAL" in row[m_idx].upper(): + row[m_idx] = "Bronchus Aspiration" + elif "ESPUTO" in row[m_idx].upper(): + row[m_idx] = "Sputum" + elif "EXTRACTO" in row[m_idx].upper(): + row[m_idx] = "Scraping" + elif "EXUDADO FARÍNGEO" in row[m_idx].upper(): + row[m_idx] = "Pharynx Swabbing" + elif "EXUDADO NASOFARÍNGEO" in row[m_idx].upper(): + row[m_idx] = "Nasopharynx Swabbing" + elif "EXUDADO OROFARINGEO" in row[m_idx].upper(): + row[m_idx] = "Oropharynx Swabbing" + elif "PLACENTA" in row[m_idx].upper(): + row[m_idx] = "Placenta" + elif "SALIVA" in row[m_idx].upper(): + row[m_idx] = "Saliva" + else: + log.error("The field is not correctly written or is not filled") + stderr.print("The field is not correctly written or not filled") + sys.exit(1) + return metadata + + +def translate_purpose_seq_to_english(metadata, f_data, mapped_fields, heading): + """Fetch the first words of the option to group them according the + schema + """ + map_dict = { + "estudio variante": "Targeted surveillance (non-random sampling)", + "trabajador/a granja visones": "Targeted surveillance (non-random sampling)", + "sospecha reinfección": "Re-infection surveillance", + "i-move-covid": "Research", + "irag": "Research", + "muestreo aleatorio": "Baseline surveillance (random sampling)", + "paciente vacunado": "Vaccine escape surveillance", + "posible variante": "Sample has epidemiological link to Variant of Concern (VoC)", + "no consta": "Not Collected", + "brote": "Cluster/Outbreak investigation", + "viaje": "Surveillance of international border crossing by air travel or ground transport", + "posible variante": "Sample has epidemiological link to Variant of Concern (VoC)", + } + for row in metadata[1:]: + for key, val in mapped_fields.items(): + m_idx = heading.index(key) + if row[m_idx] is None or row[m_idx] == "": + row[m_idx] = "Not Provided" + continue + item = row[m_idx].lower() + if item in map_dict: + row[m_idx] = map_dict[item] + elif "brote" in item: + row[m_idx] = map_dict["brote"] + elif "viaje" in item: + row[m_idx] = map_dict["viaje"] + elif "posible variante" in item: + row[m_idx] = map_dict["posible variante"] + else: + log.error("The '%s' is not a valid data for translation", row[m_idx]) + stderr.print( + "f[red] The {row[m_idx]} is not a valid data for translation" + ) + sys.exit(1) + return metadata + + +def translate_nucleic_acid_extract_prot(metadata, f_data, mapped_fields, heading): + """Fetch the short name given in the input laboratory file and change for + the one is allow according to schema + """ + for row in metadata[1:]: + for key, val in mapped_fields.items(): + m_idx = heading.index(key) + if "NA" in row[m_idx]: + row[m_idx] = "Not Applicable" + elif "opentrons" in row[m_idx].lower(): + row[m_idx] = "Opentrons custom rna extraction protocol" + else: + # allow from now on until more options are available + continue + return metadata + + +def findout_library_layout(metadata, f_data, mapped_fields, heading): + """Read the file and by checking if read2_cycles is 0 set to Single otherwise + to paired""" + s_idx = heading.index("Sample ID given for sequencing") + for row in metadata[1:]: + for key, val in mapped_fields.items(): + m_idx = heading.index(key) + try: + if f_data[str(row[s_idx])][val] == "0": + row[m_idx] = "Single" + else: + row[m_idx] = "Paired" + except KeyError as e: + log.error("The %s is not defined in function findout_library_layout", e) + stderr.print( + f"[red] {e} is not defined in function findout_library_layout" + ) + sys.exit(1) + return metadata diff --git a/test/__init__.py b/relecov_tools/institution_scripts/__init__.py similarity index 100% rename from test/__init__.py rename to relecov_tools/institution_scripts/__init__.py diff --git a/relecov_tools/json_schema.py b/relecov_tools/json_schema.py new file mode 100644 index 00000000..1faee553 --- /dev/null +++ b/relecov_tools/json_schema.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +import logging +from collections import OrderedDict + +log = logging.getLogger(__name__) + + +class PhagePlusSchema: + def __init__(self, schema): + self.schema = schema + self.ontology = {} + for key, values in schema["properties"].items(): + self.ontology[values["ontology"]] = key + self.properties = list(schema["properties"].keys()) + + def get_gontology(self, property_item): + """Return the geontology value for a property in the schema""" + try: + return self.schema["properties"][property_item]["ontology"] + except KeyError as e: + log.error("geontology value %s %s", property_item, e) + return None + + def maping_schemas_based_on_geontology(self, mapped_to_schema): + """Return a dictionnary with the properties of the mapped_to_schema + as key and properties of phagePlusSchema as value + """ + mapped_dict = OrderedDict() + for key, values in mapped_to_schema["properties"].items(): + try: + mapped_dict[key] = self.ontology[values["ontology"]] + except KeyError as e: + log.error("Enable to map schema, because of %s is not defined", e) + # There is no exact match on ontology. Search for the parent + # to be implemented later + pass + return mapped_dict + + def get_schema_properties(self): + """Return the properties defined in the schema""" + return self.properties diff --git a/relecov_tools/json_validation.py b/relecov_tools/json_validation.py new file mode 100755 index 00000000..cb0cd05a --- /dev/null +++ b/relecov_tools/json_validation.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python +import logging +import rich.console +from jsonschema import Draft202012Validator +import sys +import os +import openpyxl + +import relecov_tools.utils +import relecov_tools.assets.schema_utils.jsonschema_draft +from relecov_tools.config_json import ConfigJson +from relecov_tools.log_summary import LogSum + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class SchemaValidation: + def __init__( + self, json_data_file=None, json_schema_file=None, metadata=None, out_folder=None + ): + """Validate json file against the schema""" + + if json_schema_file is None: + config_json = ConfigJson() + schema_name = config_json.get_topic_data("json_schemas", "relecov_schema") + json_schema_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "schema", schema_name + ) + + self.json_schema = relecov_tools.utils.read_json_file(json_schema_file) + + if json_data_file is None: + json_data_file = relecov_tools.utils.prompt_path( + msg="Select the json file to be validated" + ) + + if out_folder is None: + self.out_folder = relecov_tools.utils.prompt_path( + msg="Select the folder where excel file with invalid data will be saved" + ) + else: + self.out_folder = out_folder + + # Read and check json to validate file + if not os.path.isfile(json_data_file): + stderr.print("[red] Json file does not exists") + sys.exit(1) + self.json_data_file = json_data_file + out_path = os.path.dirname(os.path.realpath(self.json_data_file)) + self.lab_code = out_path.split("/")[-2] + self.logsum = LogSum( + output_location=self.out_folder, unique_key=self.lab_code, path=out_path + ) + + stderr.print("[blue] Reading the json file") + self.json_data = relecov_tools.utils.read_json_file(json_data_file) + self.metadata = metadata + try: + self.sample_id_field = self.get_sample_id_field() + except ValueError as e: + self.sample_id_field = None + self.SAMPLE_FIELD_ERROR = str(e) + + def validate_schema(self): + """Validate json schema against draft""" + relecov_tools.assets.schema_utils.jsonschema_draft.check_schema_draft( + self.json_schema, "2020-12" + ) + + def get_sample_id_field(self): + """Find the name of the field used to track the samples in the given schema""" + # TODO: Include this field in configuration.json + sample_id_ontology = "GENEPIO:0000079" + ontology_match = [ + x + for x, y in self.json_schema["properties"].items() + if y.get("ontology") == sample_id_ontology + ] + if ontology_match: + sample_id_field = ontology_match[0] + else: + error_text = f"No valid sample ID field ({sample_id_ontology}) in schema" + raise ValueError(error_text) + return sample_id_field + + def validate_instances(self): + """Validate data instances against a validated json schema""" + + # create validator + validator = Draft202012Validator(self.json_schema) + + validated_json_data = [] + invalid_json = [] + errors = {} + error_keys = {} + if self.sample_id_field is None: + log_text = f"Logs keys set to None. Reason: {self.SAMPLE_FIELD_ERROR}" + self.logsum.add_warning(sample=self.sample_id_field, entry=log_text) + stderr.print("[blue] Start processing the json file") + for item_row in self.json_data: + # validate(instance=item_row, schema=json_schema) + sample_id_value = item_row.get(self.sample_id_field) + if validator.is_valid(item_row): + validated_json_data.append(item_row) + self.logsum.feed_key(sample=sample_id_value) + else: + # Count error types + for error in validator.iter_errors(item_row): + try: + error_keys[error.message] = error.absolute_path[0] + except Exception: + error_keys[error.message] = error.message + if error.message in errors: + errors[error.message] += 1 + else: + errors[error.message] = 1 + if error_keys[error.message] == error.message: + error_text = error.message + else: + error_text = f"{error_keys[error.message]}:{error.message}" + self.logsum.add_error(sample=sample_id_value, entry=error_text) + # append row with errors + invalid_json.append(item_row) + + # Summarize errors + stderr.print("[blue] --------------------") + stderr.print("[blue] VALIDATION SUMMARY") + stderr.print("[blue] --------------------") + for error_type in errors.keys(): + num_of_errors = str(errors[error_type]) + field_with_error = str(error_keys[error_type]) + error_text = "{} samples failed validation for {}:\n{}" + error_text = error_text.format(num_of_errors, field_with_error, error_type) + self.logsum.add_warning(entry=error_text) + stderr.print(f"[red]{error_text}") + stderr.print("[red] --------------------") + + return validated_json_data, invalid_json + + def create_invalid_metadata(self, invalid_json, metadata, out_folder): + """Create a new sub excel file having only the samples that were invalid. + Samples name are checking the Sequencing sample id which are in + column B (index 1). + The rows that match the value collected from json file on tag + collecting_lab_sample_id are removed from excel + """ + if self.sample_id_field is None: + log_text = f"Invalid excel file won't be created: {self.SAMPLE_FIELD_ERROR}" + self.logsum.add_error(entry=log_text) + return + log.error("Some of the samples in json metadata were not validated") + stderr.print("[red] Some of the Samples are not validate") + if metadata is None: + metadata = relecov_tools.utils.prompt_path( + msg="Select the metadata file to select those not-validated samples." + ) + if not os.path.isfile(metadata): + log.error("Metadata file %s does not exist", metadata) + stderr.print( + "[red] Unable to create excel file for invalid samples. Metadata file ", + metadata, + " does not exist", + ) + sys.exit(1) + sample_list = [] + stderr.print("Start preparation of invalid samples") + for row in invalid_json: + sample_list.append(str(row[self.sample_id_field])) + wb = openpyxl.load_workbook(metadata) + # TODO: Include this as a key in configuration.json + ws_sheet = wb["METADATA_LAB"] + row_to_del = [] + for row in ws_sheet.iter_rows(min_row=5, max_row=ws_sheet.max_row): + # if not data on row 1 and 2 assume that no more data are in file + # then start deleting rows + if not row[2].value and not row[1].value: + break + if str(row[2].value) not in sample_list: + row_to_del.append(row[0].row) + stderr.print("Collected rows to create the excel file") + if len(row_to_del) > 0: + row_to_del.sort(reverse=True) + for idx in row_to_del: + try: + ws_sheet.delete_rows(idx) + except TypeError as e: + log.error( + "Unable to delete row %s from metadata file because of", + idx, + e, + ) + stderr.print(f"[red] Unable to delete row {idx} becuase of {e}") + sys.exit(1) + os.makedirs(out_folder, exist_ok=True) + new_name = "invalid_" + os.path.basename(metadata) + m_file = os.path.join(out_folder, new_name) + stderr.print("Saving excel file with the invalid samples") + wb.save(m_file) + return + + def create_validated_json(self, valid_json_data, out_folder): + """Create a copy of the input json file, keeping only the validated samples + + Args: + valid_json_data (list(dict)): List of samples metadata as dictionaries + out_folder (str): path to folder where file will be created + """ + file_name = "_".join(["validated", os.path.basename(self.json_data_file)]) + file_path = os.path.join(out_folder, file_name) + log.info("Saving Json file with the validated samples as %s", file_name) + relecov_tools.utils.write_json_fo_file(valid_json_data, file_path) + return + + def validate(self): + """Validate samples from metadata, create an excel with invalid samples, + and a json file with the validated ones + """ + self.validate_schema() + valid_json_data, invalid_json = self.validate_instances() + if not invalid_json: + stderr.print("[green]Sucessful validation, no invalid file created!!") + else: + log_text = "Summary: %s valid and %s invalid samples" + self.logsum.add_warning( + entry=log_text % (len(valid_json_data), len(invalid_json)) + ) + self.create_invalid_metadata(invalid_json, self.metadata, self.out_folder) + if valid_json_data: + self.create_validated_json(valid_json_data, self.out_folder) + else: + log_text = "All the samples were invalid. No valid file created" + self.logsum.add_error(entry=log_text) + stderr.print(f"[red]{log_text}") + self.logsum.create_error_summary(called_module="validate") diff --git a/relecov_tools/launch_pipeline.py b/relecov_tools/launch_pipeline.py new file mode 100644 index 00000000..1c7c06fb --- /dev/null +++ b/relecov_tools/launch_pipeline.py @@ -0,0 +1,244 @@ +import json +import re +import os +import sys +import shutil +import datetime +import logging +import rich.console + +import relecov_tools.utils + + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class LaunchPipeline: + def __init__( + self, + input_folder=None, + template=None, + output_folder=None, + pipeline_conf_file=None, + ): + current_date = datetime.date.today().strftime("%Y%m%d") + if input_folder is None: + self.input_folder = relecov_tools.utils.prompt_path( + msg="Select the folder which contains the fastq file of samples" + ) + else: + self.input_folder = input_folder + if not os.path.exists(self.input_folder): + log.error("Input folder %s does not exist ", self.input_folder) + stderr.print("[red] Input folder " + self.input_folder + " does not exist") + sys.exit(1) + if template is None: + self.template = relecov_tools.utils.prompt_path( + msg="Select the path which contains the template structure" + ) + else: + self.template = template + if not os.path.exists(self.template): + log.error("Template folder %s does not exist ", self.template) + stderr.print("[red] Template folder " + self.template + " does not exist") + sys.exit(1) + if pipeline_conf_file is None: + pipeline_conf_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "conf", + "configuration.json", + ) + if not os.path.exists(pipeline_conf_file): + log.error( + "Pipeline config file %s does not exist ", self.pipeline_conf_file + ) + stderr.print( + "[red] Pipeline config file " + + self.pipeline_conf_file + + " does not exist" + ) + sys.exit(1) + conf_settings = relecov_tools.utils.read_json_file(pipeline_conf_file) + data = conf_settings["pipeline_launch"] + if ( + "analysis_name" not in data + or "sample_stored_folder" not in data + or "sample_link_folder" not in data + ): + log.error("Invalid pipeline config file %s ", self.pipeline_conf_file) + stderr.print( + "[red] Invalid pipeline config file " + self.pipeline_conf_file + ) + sys.exit(1) + + if output_folder is None: + output_folder = relecov_tools.utils.prompt_path( + msg="Select the output folder" + ) + # Create the output folder if not exists + try: + os.makedirs(output_folder, exist_ok=True) + except OSError or FileExistsError as e: + log.error("Unable to create output folder %s ", e) + stderr.print("[red] Unable to create output folder ", e) + sys.exit(1) + # Update the output folder with the current date and analysis name + + self.output_folder = os.path.join( + output_folder, current_date + "_" + data["analysis_name"] + ) + if os.path.exists(self.output_folder): + msg = "Analysis folder already exists and it will be deleted. Do you want to continue? Y/N" + confirmation = relecov_tools.utils.prompt_yn_question(msg) + if confirmation is False: + sys.exit(1) + shutil.rmtree(self.output_folder) + + self.analysis_folder = os.path.join(self.output_folder, data["analysis_folder"]) + self.copied_sample_folder = os.path.join( + self.output_folder, data["sample_stored_folder"] + ) + self.linked_sample_folder = os.path.join( + self.analysis_folder, data["sample_link_folder"] + ) + + def join_valid_items(self): + def get_latest_lab_folder(self): + lab_folders = [f.path for f in os.scandir(self.input_folder) if f.is_dir()] + lab_latest_folders = {} + latest_date = datetime.datetime.strptime("20220101", "%Y%m%d").date() + for lab_folder in lab_folders: + existing_upload_folders = False + last_folder_date = datetime.datetime.strptime( + "20220101", "%Y%m%d" + ).date() + scan_folder = os.path.join(self.input_folder, lab_folder) + lab_sub_folders = [ + f.path for f in os.scandir(scan_folder) if f.is_dir() + ] + for lab_sub_folder in lab_sub_folders: + f_name = os.path.basename(lab_sub_folder) + f_date_match = re.match(r"(^\d{8}).*", f_name) + if not f_date_match: + continue + f_date = f_date_match.group(1) + try: + sub_f_date = datetime.datetime.strptime(f_date, "%Y%m%d").date() + except ValueError: + continue + if sub_f_date > last_folder_date: + last_folder_date = sub_f_date + latest_folder_name = lab_sub_folder + existing_upload_folders = True + if existing_upload_folders: + lab_latest_folders[lab_folder] = { + "path": latest_folder_name, + "date": last_folder_date, + } + if last_folder_date > latest_date: + latest_date = last_folder_date + log.info("Latest date to process is %s", latest_date) + stderr.print("[blue] Collecting samples from ", latest_date) + return lab_latest_folders, latest_date + + upload_lab_folders, latest_date = get_latest_lab_folder(self) + samples_data = [] + for lab, data_folder in upload_lab_folders.items(): + # check if laboratory folder is the latest date to process + if data_folder["date"] != latest_date: + continue + # fetch the validate file and get sample id and r1 and r2 file path + validate_files = [ + os.path.join(data_folder["path"], f) + for f in os.listdir(data_folder["path"]) + if f.startswith("validated_processed_metadata") and f.endswith(".json") + ] + if not validate_files: + continue + for validate_file in validate_files: + validate_file_path = os.path.join(data_folder["path"], validate_file) + with open(validate_file_path) as fh: + data = json.load(fh) + for item in data: + sample = {} + sample["sequencing_sample_id"] = item["sequencing_sample_id"] + sample["r1_fastq_file_path"] = os.path.join( + item["r1_fastq_file_path"], item["sequence_file_R1_fastq"] + ) + if "r2_fastq_file_path" in item: + sample["r2_fastq_file_path"] = os.path.join( + item["r2_fastq_file_path"], item["sequence_file_R2_fastq"] + ) + samples_data.append(sample) + lab_code = lab.split("/")[-1] + log.info("Collecting samples for %s", lab_code) + stderr.print("[blue] Collecting samples for ", lab_code) + return samples_data + + def pipeline_exc(self): + # copy template folder and subfolders in output folder + shutil.copytree(self.template, self.output_folder) + # create the 00_reads folder + os.makedirs(self.linked_sample_folder, exist_ok=True) + # collect json with all validated samples + samples_data = self.join_valid_items() + + # iterate over the sample_data to copy the fastq files in the output folder + file_errors = [] + copied_samples = 0 + if len(samples_data) == 0: + stderr.print("[yellow] No samples were found. Deleting analysis folder") + shutil.rmtree(self.analysis_folder) + sys.exit(0) + for item in samples_data: + # fetch the file extension + ext_found = re.match(r".*(fastq.*|bam)", item["r1_fastq_file_path"]) + ext = ext_found.group(1) + sequencing_r1_sample_id = item["sequencing_sample_id"] + "_R1." + ext + # copy r1 sequencing file into the output folder + raw_folder = os.path.join(self.analysis_folder, self.copied_sample_folder) + try: + shutil.copy(item["r1_fastq_file_path"], raw_folder) + # create simlink for the r1 + sample_r1_link_path = os.path.join( + self.linked_sample_folder, sequencing_r1_sample_id + ) + os.symlink(item["r1_fastq_file_path"], sample_r1_link_path) + except FileNotFoundError as e: + log.error("File not found %s", e) + file_errors.append(item["r1_fastq_file_path"]) + continue + copied_samples += 1 + # check if there is a r2 file + if "r2_fastq_file_path" in item: + sequencing_r2_sample_id = item["sequencing_sample_id"] + "_R2." + ext + try: + shutil.copy(item["r2_fastq_file_path"], raw_folder) + sample_r2_link_path = os.path.join( + self.linked_sample_folder, sequencing_r2_sample_id + ) + os.symlink(item["r2_fastq_file_path"], sample_r2_link_path) + except FileNotFoundError as e: + log.error("File not found %s", e) + file_errors.append(item["r2_fastq_file_path"]) + continue + if len(file_errors) > 0: + stderr.print( + "[red] Files do not found. Unable to copy", + "[red] " + str(len(file_errors)), + "[red]sample files", + ) + msg = "Do you want to delete analysis folder? Y/N" + confirmation = relecov_tools.utils.prompt_yn_question(msg) + if confirmation: + shutil.rmtree(self.output_folder) + sys.exit(1) + stderr.print("[green] Samples copied: ", copied_samples) + stderr.print("[blue] Pipeline launched successfully") + return diff --git a/relecov_tools/log_summary.py b/relecov_tools/log_summary.py new file mode 100755 index 00000000..09b3bdc8 --- /dev/null +++ b/relecov_tools/log_summary.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +import logging +import json +import os +import inspect +import sys +import copy +from rich.console import Console +from datetime import datetime +from collections import OrderedDict +from relecov_tools.utils import rich_force_colors + + +# from relecov_tools.rest_api import RestApi + +log = logging.getLogger(__name__) +stderr = Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=rich_force_colors(), +) + + +class LogSum: + def __init__( + self, + output_location: str = None, + only_samples: bool = False, + unique_key: str = None, + path: str = None, + ): + if not os.path.exists(output_location): + raise FileNotFoundError("Output location does not exist") + else: + self.output_location = output_location + if only_samples and unique_key: + stderr.print("[red]LogSum only_samples and unique_key are incompatible") + sys.exit(1) + # if only_samples is given, no "samples" key will be added to logs + if only_samples: + self.only_samples = True + else: + self.only_samples = False + # if unique_key is given, all entries will be saved inside that key by default + if unique_key: + self.unique_key = unique_key + else: + self.unique_key = None + # if path is given, all keys will include a field "path" with this value + if path: + self.path = path + else: + self.path = None + self.logs = {} + return + + def feed_key(self, key=None, sample=None): + """Run update_summary() with no entry nor log_type. Add a new empty key""" + if self.unique_key: + key = self.unique_key + self.update_summary(log_type=None, key=key, entry=None, sample=sample) + + def add_error(self, entry, key=None, sample=None): + """Run update_summary() with log_type as errors""" + if self.unique_key: + key = self.unique_key + log.error(entry) + self.update_summary(log_type="errors", key=key, entry=entry, sample=sample) + return + + def add_warning(self, entry, key=None, sample=None): + """Run update_summary() with log_type as warnings""" + if self.unique_key: + key = self.unique_key + log.warning(entry) + self.update_summary(log_type="warnings", key=key, entry=entry, sample=sample) + return + + def update_summary(self, log_type, key, entry, sample=None): + """Create a dictionary with a defined structure for each new key. Add the + entry to the dictionary if it already exists. Add it to samples if its a sample + + Args: + key (str): Name of the key holding the logs. A folder or a sample. + log_type (str): Type of log being added. Either 'errors' or 'warnings' + entry (str): Content message of the log. + sample (str, optional): Name of a sample within key if the log is for it + one sample instead of the whole key/folder. Defaults to None. + """ + feed_dict = OrderedDict({"valid": True, "errors": [], "warnings": []}) + # Removing strange characters + current_key = str(key).replace("./", "") + if self.only_samples and sample is not None: + log.warning( + "No samples record can be added if only_samples is set to True. " + + f"Record will be added to {current_key}" + ) + sample = None + entry, sample = (str(entry), str(sample)) + if current_key not in self.logs.keys(): + self.logs[current_key] = copy.deepcopy(feed_dict) + if self.path and "path" not in self.logs[current_key]: + self.logs[current_key].update({"path": self.path}) + if not self.only_samples: + self.logs[current_key]["samples"] = OrderedDict() + if log_type is None: + if sample != "None" and sample not in self.logs[current_key]["samples"]: + self.logs[current_key]["samples"][sample] = copy.deepcopy(feed_dict) + return + if sample == "None": + self.logs[current_key][log_type].append(entry) + else: + if sample not in self.logs[current_key]["samples"].keys(): + self.logs[current_key]["samples"][sample] = copy.deepcopy(feed_dict) + self.logs[current_key]["samples"][sample][log_type].append(entry) + return + + def create_error_summary(self, called_module=None, filename=None): + """Dump the log summary dictionary into a file with json format. If any of + the 'errors' key is not empty, the parent key value 'valid' is set to false. + + Args: + filename (str, optional): Name of the output file. Defaults to None. + """ + for key in self.logs.keys(): + if self.logs[key]["errors"]: + self.logs[key]["valid"] = False + if not self.only_samples: + for sample in self.logs[key]["samples"].keys(): + if self.logs[key]["samples"][sample]["errors"]: + self.logs[key]["samples"][sample]["valid"] = False + if not called_module: + try: + called_module = [ + f.function for f in inspect.stack() if "__main__.py" in f.filename + ][0] + except IndexError: + called_module = "" + if not filename: + date = datetime.today().strftime("%Y%m%d%-H%M%S") + filename = "_".join([date, called_module, "log_summary.json"]) + summary_path = os.path.join(self.output_location, filename) + with open(summary_path, "w", encoding="utf-8") as f: + f.write( + json.dumps(self.logs, indent=4, sort_keys=False, ensure_ascii=False) + ) + stderr.print(f"Process log summary saved in {summary_path}") + return diff --git a/relecov_tools/mail.py b/relecov_tools/mail.py new file mode 100644 index 00000000..a4c3b392 --- /dev/null +++ b/relecov_tools/mail.py @@ -0,0 +1,67 @@ +""" +============================================================= +HEADER +============================================================= +INSTITUTION: BU-ISCIII +AUTHOR: Guillermo J. Gorines Cordero +MAIL: guillermo.gorines@urjc.es +VERSION: 0 +CREATED: 7-3-2022 +REVISED: 7-3-2022 +REVISED BY: guillermo.gorines@urjc.es +DESCRIPTION: + + Includes the Email, and its associated methods. + +REQUIREMENTS: + -Python + +TO DO: + + +================================================================ +END_OF_HEADER +================================================================ +""" + +# Imports +import smtplib + +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + + +class Email: + def __init__(self, receiver, sender, password, subject): + self.receiver = receiver + self.sender = sender + self.password = password + self.subject = subject + self.text = "" + self.html = False + + def write_message(self, text): + self.message = text + return + + def generate_HTML(self): + pass + return + + def send_message(self): + msg = MIMEMultipart("alternative") + msg["To"] = self.receiver + msg["From"] = self.sender + msg["Subject"] = self.subject + + text_part = MIMEText(self.text, "plain") + msg.attach(text_part) + + if self.html: + html_part = MIMEText(self.html, "html") + msg.attach(html_part) + + # open server, send email, close email + server = smtplib.SMTP("localhost") + server.sendmail(self.sender, self.receiver, msg.as_string()) + server.quit() diff --git a/relecov_tools/map_schema.py b/relecov_tools/map_schema.py new file mode 100755 index 00000000..326534b2 --- /dev/null +++ b/relecov_tools/map_schema.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python +from collections import OrderedDict +from datetime import datetime +import json +import jsonschema +from jsonschema import Draft202012Validator +from relecov_tools.config_json import ConfigJson +import logging +import rich.console +import os +import sys + +# import jsonschema +import relecov_tools.utils + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class MappingSchema: + def __init__( + self, + relecov_schema=None, + json_file=None, + destination_schema=None, + schema_file=None, + output_folder=None, + ): + config_json = ConfigJson() + self.config_json = config_json + if relecov_schema is None: + relecov_schema = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "schema", + config_json.get_topic_data("json_schemas", "relecov_schema"), + ) + else: + if not os.path.isfile(relecov_schema): + log.error("Relecov schema file %s does not exist", relecov_schema) + stderr.print( + "[red] Relecov schema " + relecov_schema + " does not exist" + ) + exit(1) + rel_schema_json = relecov_tools.utils.read_json_file(relecov_schema) + try: + Draft202012Validator.check_schema(rel_schema_json) + except jsonschema.ValidationError: + log.error("Relecov schema does not fulfill Draft 202012 Validation ") + stderr.print( + "[red] Relecov schema does not fulfill Draft 202012 Validation" + ) + sys.exit(1) + self.relecov_schema = rel_schema_json + + if json_file is None: + json_file = relecov_tools.utils.prompt_path( + msg="Select the json file which have the data to map" + ) + if not os.path.isfile(json_file): + log.error("json data file %s does not exist ", json_file) + stderr.print(f"[red] json data file {json_file} does not exist") + sys.exit(1) + self.json_data = relecov_tools.utils.read_json_file(json_file) + self.json_file = json_file + if destination_schema is None: + self.destination_schema = relecov_tools.utils.prompt_selection( + msg="Select ENA, GISAID for already defined schemas or other for custom", + choices=["ENA", "GISAID", "other"], + ) + else: + self.destination_schema = destination_schema + if self.destination_schema == "other": + if schema_file is None: + self.schema_file = relecov_tools.utils.prompt_path( + msg="Select the json schema file to map your data" + ) + else: + self.schema_file = schema_file + if not os.path.exists(self.schema_file): + log.error( + "Schema file %s to map your data does not exist ", + self.metadata_file, + ) + sys.exit(1) + with open(self.schema_file, "r") as fh: + json_schema = json.load(fh) + try: + Draft202012Validator.check_schema(json_schema) + except jsonschema.ValidationError: + stderr.print( + "[red] Json schema does not fulfill Draft 202012 Validation" + ) + sys.exit(1) + elif self.destination_schema == "ENA": + self.schema_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "schema", + config_json.get_topic_data("json_schemas", "ena_schema"), + ) + elif self.destination_schema == "GISAID": + self.schema_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "schema", + config_json.get_topic_data("json_schemas", "gisaid_schema"), + ) + else: + stderr.print("[red] Invalid option for mapping to schena") + sys.exit(1) + with open(self.schema_file, "r") as fh: + self.mapped_to_schema = json.load(fh) + + self.ontology = {} + for key, values in self.relecov_schema["properties"].items(): + if values["ontology"] == "0": + continue + self.ontology[values["ontology"]] = key + self.output_folder = output_folder + + if os.path.exists(os.path.join(output_folder, "mapping_errors.log")): + os.remove(os.path.join(output_folder, "mapping_errors.log")) + + def maping_schemas_based_on_geontology(self): + """Return a dictionary with the properties of the mapped_to_schema as key and + properties of Relecov Schema as value + """ + mapped_dict = OrderedDict() + errors = {} + required_fields = self.mapped_to_schema["required"] + + for key, values in self.mapped_to_schema["properties"].items(): + if values["ontology"] == "0": + continue + try: + mapped_dict[key] = self.ontology[values["ontology"]] + except KeyError as e: + if key in required_fields: + stderr.print( + f"[red]Required field {key} ontology missing in relecov schema" + ) + sys.exit(1) + else: + errors[key] = str(e) + if len(errors) >= 1: + output_errs = "\n".join(f"{field}:{info}" for field, info in errors.items()) + invalid_ontologies = str([field for field in errors.keys()]).strip("[]") + log.error("Invalid ontology for: " + invalid_ontologies) + stderr.print("[yellow]\nGot unmapped ontologies. Check mapping_errors.log") + with open("mapping_errors.log", "w") as errs: + errs.write("Ontology mapping errors:\n" + output_errs + "\n") + return mapped_dict + + def mapping_json_data(self, mapping_schema_dict): + """Convert phage plus data to the requested schema""" + mapped_data = [] + + for data in self.json_data: + map_sample_dict = OrderedDict() + for item, value in mapping_schema_dict.items(): + try: + data[value] = data[value].split(" [", 1)[0] + + map_sample_dict[item] = data[value] + except KeyError as e: + log.info("Property %s not set in the source data", e) + mapped_data.append(map_sample_dict) + return mapped_data + + def additional_formating(self, mapped_json_data): + """Update data that needs additional formating such as + word splitting and include fields with fixed values. + """ + additional_data = self.config_json.get_topic_data( + "ENA_fields", "additional_formating" + ) + fixed_fields = self.config_json.get_topic_data("ENA_fields", "ena_fixed_fields") + + if self.destination_schema == "ENA": + for idx in range(len(self.json_data)): + for key, value in fixed_fields.items(): + mapped_json_data[idx][key] = value + for key, _ in additional_data.items(): + """ + Some fields in ENA need special formatting such as sample_id+date. + Instead of directly merging them, -- is used as delimiter. + Also, the Not Provided fields are skipped in this process + """ + formated_data = { + x: "--".join( + [ + self.json_data[idx].get(f, "").split(" [", 1)[0] + for f in y + if "Not Provided" + not in self.json_data[idx].get(f, "").split(" [", 1)[0] + ] + ) + for x, y in additional_data.items() + } + if "fastq_filepath" in key: + formated_data[key] = formated_data[key].replace("--", "/") + mapped_json_data[idx][key] = formated_data[key] + elif self.destination_schema == "GISAID": + for idx in range(len(self.json_data)): + mapped_json_data[idx]["covv_type"] = "betacoronavirus" + """ + This is a temporal solution for library_strategy. Once the values are also + mapped by the ontology (not only the fields) this should not be necessary + """ + for sample in mapped_json_data: + if not sample.get("library_strategy"): + continue + sample["library_strategy"] = sample["library_strategy"].strip(" strategy") + + return mapped_json_data + + def check_required_fields(self, mapped_json_data, dest_schema): + """Checks which required fields are Not Provided""" + if dest_schema == "ENA": + # The block below can probably go into an auxiliar function + required_fields = self.mapped_to_schema["required"] + for sample in mapped_json_data: + missing_required = [x for x in required_fields if x not in sample] + for field in missing_required: + sample[field] = "Not Provided" + try: + not_provided_fields = { + sample["isolate"]: [ + field + for field in required_fields + if "Not Provided" in sample[field] + ] + for sample in mapped_json_data + } + except KeyError as e: + print(f"Field {e} could not be found in json data. Aborting") + print() + sys.exit(1) + notprov_report = "\n".join( + f"Sample {key}: {str(val).strip('[]')}" + for key, val in not_provided_fields.items() + ) + stderr.print( + f"[yellow]\nSome required fields for {dest_schema} were Not Provided:", + "[yellow]\nCheck mapping_errors.log for more details", + ) + with open("mapping_errors.log", "a") as errs: + errs.write("Required fields Not Provided:\n" + notprov_report) + else: + # Only ENA's schema is supported as yet + return + return mapped_json_data + + def write_json_fo_file(self, mapped_json_data): + """Write metadata to json file""" + os.makedirs(self.output_folder, exist_ok=True) + time = datetime.now().strftime("%Y_%m_%d") + file_name = ( + "mapped_metadata" + "_" + self.destination_schema + "_" + time + ".json" + ) + json_file = os.path.join(self.output_folder, file_name) + stderr.print("Writting mapped data to json file:", json_file) + with open(json_file, "w", encoding="utf-8") as fh: + fh.write( + json.dumps( + mapped_json_data, indent=4, sort_keys=True, ensure_ascii=False + ) + ) + return True + + def map_to_data_to_new_schema(self): + """Mapping the json data from relecov schema to the requested one""" + mapping_schema_dict = self.maping_schemas_based_on_geontology() + mapped_json_data = self.mapping_json_data(mapping_schema_dict) + updated_json_data = self.additional_formating(mapped_json_data) + self.check_required_fields(mapped_json_data, self.destination_schema) + self.write_json_fo_file(updated_json_data) + stderr.print(f"[green]Finished mapping to {self.destination_schema} schema") + return diff --git a/relecov_tools/metadata_homogeneizer.py b/relecov_tools/metadata_homogeneizer.py new file mode 100644 index 00000000..2d8cc534 --- /dev/null +++ b/relecov_tools/metadata_homogeneizer.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python +import os +import sys +import logging +import rich.console + +import relecov_tools.utils +from relecov_tools.config_json import ConfigJson + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class MetadataHomogeneizer: + """MetadataHomogeneizer object""" + + def __init__(self, institution=None, directory=None, output_folder=None): + # open config + self.config_json = ConfigJson() + # read heading from config + self.heading = self.config_json.get_topic_data( + "lab_metadata", "metadata_lab_heading" + ) + + # handle institution + if institution is None: + self.institution = relecov_tools.utils.prompt_selection( + msg="Select the available mapping institution", + choices=["isciii", "hugtip", "hunsc-iter"], + ).upper() + else: + self.institution = institution.upper() + + mapping_json_file = os.path.join( + os.path.dirname(__file__), + "schema", + "institution_schemas", + self.config_json.get_topic_data( + "institution_mapping_file", self.institution + ), + ) + + self.mapping_json_data = relecov_tools.utils.read_json_file(mapping_json_file) + + if directory is None: + directory = relecov_tools.utils.prompt_path( + msg="Select the directory which contains additional files for metadata" + ) + if not os.path.exists(directory): + log.error("Folder for additional files %s does not exist ", directory) + stderr.print( + "[red] Folder for additional files " + directory + " does not exist" + ) + sys.exit(1) + + try: + lab_metadata = self.mapping_json_data["required_files"]["metadata_file"][ + "file_name" + ] + except KeyError: + log.error("Metadata File is not defined in schema") + stderr.print("[red] Metadata File is not defined in schema") + sys.exit(1) + + metadata_path = os.path.join(directory, lab_metadata) + + if not os.path.isfile(metadata_path): + log.error("Metadata File %s does not exists", metadata_path) + stderr.print("[red] Metadata File " + metadata_path + "does not exists") + sys.exit(1) + self.lab_metadata = self.mapping_json_data["required_files"]["metadata_file"] + self.lab_metadata["file_name"] = metadata_path + + self.additional_files = [] + + if len(self.mapping_json_data["required_files"]) > 1: + for key, values in self.mapping_json_data["required_files"].items(): + if key == "metadata_file": + continue + if values["file_name"] == "": + self.additional_files.append(values) + continue + f_path = os.path.join(directory, values["file_name"]) + if not os.path.isfile(f_path): + log.error("Additional file %s does not exist ", f_path) + stderr.print("[red] Additional file " + f_path + " does not exist") + sys.exit(1) + values["file_name"] = f_path + self.additional_files.append(values) + + # Check if python file is defined + function_file = self.mapping_json_data["python_file"] + + if function_file == "": + self.function_file = None + else: + self.function_file = os.path.join( + os.path.dirname(__file__), "institution_scripts", function_file + ) + if not os.path.isfile(self.function_file): + log.error("File with functions %s does not exist ", self.function_file) + stderr.print( + "[red] File with functions " + + self.function_file + + " does not exist" + ) + sys.exit(1) + if output_folder is None: + self.output_folder = relecov_tools.utils.prompt_path( + msg="Select the output folder" + ) + else: + self.output_folder = output_folder + self.processed_metadata = False + + def mapping_metadata(self, ws_data): + map_fields = self.mapping_json_data["required_files"]["metadata_file"][ + "mapped_fields" + ] + map_data = [] + for row in ws_data: + row_data = {} + for dest_map, orig_map in map_fields.items(): + row_data[dest_map] = row[orig_map] + map_data.append(row_data) + + return map_data + + def add_fixed_fields(self, mapped_data): + add_data = [self.heading] + fixed_fields = self.mapping_json_data["fixed_fields"] + for row in mapped_data: + new_row_data = [] + for field in self.heading: + if field in row: + data = row[field] + elif field in fixed_fields: + data = fixed_fields[field] + else: + data = "" + new_row_data.append(data) + add_data.append(new_row_data) + return add_data + + def handling_files(self, file_data, data_to_add): + """Added information based on the required file configuration. + The first time this function is called is for mapping the laboratory + metadata to ISCIII. For this time mapping_metadata method is used. + and return the list that is going to be used later for adding/modifing + information + """ + if file_data["file_name"] != "": + f_name = file_data["file_name"] + stderr.print("[blue] Starting processing file " + f_name) + if f_name.endswith(".json"): + data = relecov_tools.utils.read_json_file(f_name) + elif f_name.endswith(".tsv"): + data = relecov_tools.utils.read_csv_file_return_dict(f_name, "\t") + elif f_name.endswith(".csv"): + data = relecov_tools.utils.read_csv_file_return_dict(f_name, ",") + elif f_name.endswith(".xlsx"): + header_flag = self.metadata_processing.get("header_flag") + data = relecov_tools.utils.read_excel_file( + f_name, "Sheet", header_flag, leave_empty=True + ) + else: + log.error("Additional file extension %s is not supported ", f_name) + stderr.print( + "[red] Additional file extension " + f_name + " is not supported" + ) + sys.exit(1) + else: + data = "" + if not self.processed_metadata: + self.processed_metadata = True + return self.mapping_metadata(data) + + if file_data["function"] == "None": + mapping_idx = self.heading.index(file_data["mapped_key"]) + for row in data_to_add[1:]: + # new_row_data = [] + s_value = str(row[mapping_idx]) + try: + item_data = data[s_value] + except KeyError: + log.info( + "Additional file %s does not have the information for %s ", + f_name, + s_value, + ) + stderr.print( + "[yellow] Additional file " + + f_name + + " does not have information for " + + str(s_value) + ) + continue + # sys.exit(1) + for m_field, f_field in file_data["mapped_fields"].items(): + try: + meta_idx = self.heading.index(m_field) + except ValueError as e: + log.error("Field %s does not exist in Metadata ", e) + stderr.print(f"[red] Field {e} does not exist") + sys.exit(1) + row[meta_idx] = item_data[f_field] + + else: + func_name = file_data["function"] + stderr.print("[yellow] Start processing function " + func_name) + exec( + "from relecov_tools.institution_scripts." + + self.institution + + " import " + + func_name + ) + # somehow this overrides additional_data working as a pointer + eval( + func_name + + "(data_to_add, data, file_data['mapped_fields'], self.heading)" + ) + + stderr.print("[green] Succesful processing of additional file ") + return data_to_add + + def converting_metadata(self): + stderr.print("[blue] Reading the metadata file to convert") + + # metadata_file contains the primary source of information. First we map it. + mapped_data = self.handling_files(self.lab_metadata, "") + stderr.print("[green] Successful conversion mapping to ISCIII metadata") + stderr.print("[blue] Adding fixed information") + + # Then we add the fixed data + additional_data = self.add_fixed_fields(mapped_data) + + # Fetch the additional files and include the information in metadata + stderr.print("[blue] reading and mapping de information that are in files") + for additional_file in self.additional_files: + additional_data = self.handling_files(additional_file, additional_data) + + # write to excel mapped data + f_name = os.path.join(self.output_folder, "converted_metadata_lab.xlsx") + stderr.print("[blue] Dumping information to excel") + post_process = {"insert_rows": 3, "insert_cols": 1} + relecov_tools.utils.write_to_excel_file( + additional_data, f_name, "METADATA_LAB", post_process + ) + stderr.print("[green] Complete process for mapping to ISCIII metadata") + return diff --git a/relecov_tools/pipeline_manager.py b/relecov_tools/pipeline_manager.py new file mode 100644 index 00000000..c611934e --- /dev/null +++ b/relecov_tools/pipeline_manager.py @@ -0,0 +1,283 @@ +import json +import re +import os +import sys +import shutil +import datetime +import logging +import rich.console + +import relecov_tools.utils + + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class LaunchPipeline: + def __init__( + self, + input_folder=None, + template=None, + output_folder=None, + pipeline_conf_file=None, + ): + current_date = datetime.date.today().strftime("%Y%m%d") + if input_folder is None: + self.input_folder = relecov_tools.utils.prompt_path( + msg="Select the folder which contains the fastq file of samples" + ) + else: + self.input_folder = input_folder + if not os.path.exists(self.input_folder): + log.error("Input folder %s does not exist ", self.input_folder) + stderr.print("[red] Input folder " + self.input_folder + " does not exist") + sys.exit(1) + if template is None: + self.template = relecov_tools.utils.prompt_path( + msg="Select the path which contains the template structure" + ) + else: + self.template = template + if not os.path.exists(self.template): + log.error("Template folder %s does not exist ", self.template) + stderr.print("[red] Template folder " + self.template + " does not exist") + sys.exit(1) + if pipeline_conf_file is None: + pipeline_conf_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "conf", + "configuration.json", + ) + if not os.path.exists(pipeline_conf_file): + log.error("Pipeline config file %s does not exist ", pipeline_conf_file) + stderr.print( + "[red] Pipeline config file " + pipeline_conf_file + " does not exist" + ) + sys.exit(1) + conf_settings = relecov_tools.utils.read_json_file(pipeline_conf_file) + try: + data = conf_settings["launch_pipeline"] + # get_topic_data("launch_pipeline", "analysis_name") + except KeyError: + log.error("Invalid pipeline config file %s ", pipeline_conf_file) + stderr.print("[red] Invalid pipeline config file " + pipeline_conf_file) + if ( + "analysis_name" not in data + or "sample_stored_folder" not in data + or "sample_link_folder" not in data + ): + log.error("Invalid pipeline config file %s ", self.pipeline_conf_file) + stderr.print( + "[red] Invalid pipeline config file " + self.pipeline_conf_file + ) + sys.exit(1) + + if output_folder is None: + output_folder = relecov_tools.utils.prompt_path( + msg="Select the output folder" + ) + # Create the output folder if not exists + try: + os.makedirs(output_folder, exist_ok=True) + except OSError or FileExistsError as e: + log.error("Unable to create output folder %s ", e) + stderr.print("[red] Unable to create output folder ", e) + sys.exit(1) + # Update the output folder with the current date and analysis name + + self.output_folder = os.path.join( + output_folder, current_date + "_" + data["analysis_name"] + ) + if os.path.exists(self.output_folder): + msg = "Analysis folder already exists and it will be deleted. Do you want to continue? Y/N" + confirmation = relecov_tools.utils.prompt_yn_question(msg) + if confirmation is False: + sys.exit(1) + shutil.rmtree(self.output_folder) + + self.analysis_folder = os.path.join(self.output_folder, data["analysis_folder"]) + self.copied_sample_folder = os.path.join( + self.output_folder, data["sample_stored_folder"] + ) + self.linked_sample_folder = os.path.join( + self.analysis_folder, data["sample_link_folder"] + ) + + def join_valid_items(self): + def get_latest_lab_folder(self): + lab_folders = [f.path for f in os.scandir(self.input_folder) if f.is_dir()] + lab_latest_folders = {} + latest_date = datetime.datetime.strptime("20220101", "%Y%m%d").date() + for lab_folder in lab_folders: + existing_upload_folders = False + last_folder_date = datetime.datetime.strptime( + "20220101", "%Y%m%d" + ).date() + scan_folder = os.path.join(self.input_folder, lab_folder) + lab_sub_folders = [ + f.path for f in os.scandir(scan_folder) if f.is_dir() + ] + for lab_sub_folder in lab_sub_folders: + f_name = os.path.basename(lab_sub_folder) + f_date_match = re.match(r"(^\d{8}).*", f_name) + if not f_date_match: + continue + f_date = f_date_match.group(1) + try: + sub_f_date = datetime.datetime.strptime(f_date, "%Y%m%d").date() + except ValueError: + continue + if sub_f_date > last_folder_date: + last_folder_date = sub_f_date + latest_folder_name = lab_sub_folder + existing_upload_folders = True + if existing_upload_folders: + lab_latest_folders[lab_folder] = { + "path": latest_folder_name, + "date": last_folder_date, + } + if last_folder_date > latest_date: + latest_date = last_folder_date + log.info("Latest date to process is %s", latest_date) + stderr.print("[blue] Collecting samples from ", latest_date) + return lab_latest_folders, latest_date + + upload_lab_folders, latest_date = get_latest_lab_folder(self) + samples_data = [] + for lab, data_folder in upload_lab_folders.items(): + # check if laboratory folder is the latest date to process + if data_folder["date"] != latest_date: + continue + # fetch the validate file and get sample id and r1 and r2 file path + validate_files = [ + os.path.join(data_folder["path"], f) + for f in os.listdir(data_folder["path"]) + if f.startswith("validated_lab_metadata") and f.endswith(".json") + ] + if not validate_files: + continue + for validate_file in validate_files: + validate_file_path = os.path.join(data_folder["path"], validate_file) + with open(validate_file_path) as fh: + data = json.load(fh) + for item in data: + sample = {} + sample["sequencing_sample_id"] = item["sequencing_sample_id"] + sample["r1_fastq_file_path"] = os.path.join( + item["r1_fastq_filepath"], item["sequence_file_R1_fastq"] + ) + if "r2_fastq_file_path" in item: + sample["r2_fastq_file_path"] = os.path.join( + item["r1_fastq_filepath"], item["sequence_file_R2_fastq"] + ) + samples_data.append(sample) + lab_code = lab.split("/")[-1] + log.info("Collecting samples for %s", lab_code) + stderr.print("[blue] Collecting samples for ", lab_code) + return samples_data + + def pipeline_exc(self): + # copy template folder and subfolders in output folder + shutil.copytree(self.template, self.output_folder) + # create the 00_reads folder + os.makedirs(self.linked_sample_folder, exist_ok=True) + # collect json with all validated samples + samples_data = self.join_valid_items() + + # iterate over the sample_data to copy the fastq files in the output folder + file_errors = [] + copied_samples = 0 + if len(samples_data) == 0: + stderr.print("[yellow] No samples were found. Deleting analysis folder") + shutil.rmtree(self.analysis_folder) + sys.exit(0) + for item in samples_data: + # fetch the file extension + ext_found = re.match(r".*(fastq.*|bam)", item["r1_fastq_file_path"]) + ext = ext_found.group(1) + sequencing_r1_sample_id = item["sequencing_sample_id"] + "_R1." + ext + # copy r1 sequencing file into the output folder + raw_folder = os.path.join(self.analysis_folder, self.copied_sample_folder) + + try: + shutil.copy(item["r1_fastq_file_path"], raw_folder) + # create simlink for the r1 + sample_r1_link_path = os.path.join( + self.linked_sample_folder, sequencing_r1_sample_id + ) + os.symlink(item["r1_fastq_file_path"], sample_r1_link_path) + except FileNotFoundError as e: + log.error("File not found %s", e) + file_errors.append(item["r1_fastq_file_path"]) + continue + copied_samples += 1 + # check if there is a r2 file + if "r2_fastq_file_path" in item: + sequencing_r2_sample_id = item["sequencing_sample_id"] + "_R2." + ext + try: + shutil.copy(item["r2_fastq_file_path"], raw_folder) + sample_r2_link_path = os.path.join( + self.linked_sample_folder, sequencing_r2_sample_id + ) + os.symlink(item["r2_fastq_file_path"], sample_r2_link_path) + except FileNotFoundError as e: + log.error("File not found %s", e) + file_errors.append(item["r2_fastq_file_path"]) + continue + if len(file_errors) > 0: + stderr.print( + "[red] Files do not found. Unable to copy", + "[red] " + str(len(file_errors)), + "[red]sample files", + ) + msg = "Do you want to delete analysis folder? Y/N" + confirmation = relecov_tools.utils.prompt_yn_question(msg) + if confirmation: + shutil.rmtree(self.output_folder) + sys.exit(1) + stderr.print("[green] Samples copied: ", copied_samples) + stderr.print("[blue] Pipeline launched successfully") + return + + +class ResultUpload: + def __init__(self, input_folder=None, conf_file=None): + if input_folder is None: + self.input_folder = relecov_tools.utils.prompt_path( + msg="Select the folder which contains the results" + ) + else: + self.input_folder = input_folder + if not os.path.exists(self.input_folder): + log.error("Input folder %s does not exist ", self.input_folder) + stderr.print("[red] Input folder " + self.input_folder + " does not exist") + sys.exit(1) + + conf_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "conf", + "configuration.json", + ) + if not os.path.exists(conf_file): + log.error("Configuration file %s does not exist ", self.conf_file) + stderr.print( + "[red] Pipeline config file " + + self.pipeline_conf_file + + " does not exist" + ) + sys.exit(1) + conf_settings = relecov_tools.utils.read_json_file(conf_file) + try: + data = conf_settings["pipelines"]["relecov"] + except KeyError: + log.error("Invalid pipeline config file %s ", self.pipeline_conf_file) + stderr.print( + "[red] Invalid pipeline config file " + self.pipeline_conf_file + ) + stderr.print(f"[blue] Configuration file loaded {data}") diff --git a/relecov_tools/read_bioinfo_metadata.py b/relecov_tools/read_bioinfo_metadata.py new file mode 100755 index 00000000..fbb1b1e9 --- /dev/null +++ b/relecov_tools/read_bioinfo_metadata.py @@ -0,0 +1,666 @@ +#!/usr/bin/env python +import os +import sys +import logging +import rich.console +import re +from bs4 import BeautifulSoup + +import relecov_tools.utils +from relecov_tools.config_json import ConfigJson + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class BioinfoReportLog: + def __init__(self, log_report=None): + if not log_report: + self.report = {"error": {}, "valid": {}, "warning": {}} + else: + self.report = log_report + + def update_log_report(self, method_name, status, message): + """Update the progress log report with the given method name, status, and message. + + Args: + method_name (str): The name of the method being logged. + status (str): The status of the log message, can be one of 'valid', 'error', or 'warning'. + message (str): The message to be logged. + + Returns: + dict: The updated progress log report. + + Raises: + ValueError: If an invalid status is provided. + """ + if status == "valid": + self.report["valid"].setdefault(method_name, []).append(message) + return self.report + elif status == "error": + self.report["error"].setdefault(method_name, []).append(message) + return self.report + elif status == "warning": + self.report["warning"].setdefault(method_name, []).append(message) + return self.report + else: + raise ValueError("Invalid status provided.") + + def print_log_report(self, name, sections): + """Prints the log report by calling util's function. + + Args: + name (str): The name of the log report. + sections (list of str): The sections of the log report to be printed. + + Returns: + None + """ + relecov_tools.utils.print_log_report(self.report, name, sections) + + +# TODO: Add method to validate bioinfo_config.json file requirements. +class BioinfoMetadata(BioinfoReportLog): + def __init__( + self, + readlabmeta_json_file=None, + input_folder=None, + output_folder=None, + software=None, + ): + # Init process log + super().__init__() + self.log_report = BioinfoReportLog() + + # Parse read-lab-meta-data + if readlabmeta_json_file is None: + readlabmeta_json_file = relecov_tools.utils.prompt_path( + msg="Select the json file that was created by the read-lab-metadata" + ) + if not os.path.isfile(readlabmeta_json_file): + self.log_report.update_log_report( + self.__init__.__name__, + "error", + f"file {readlabmeta_json_file} does not exist", + ) + sys.exit( + self.log_report.print_log_report(self.__init__.__name__, ["error"]) + ) + self.readlabmeta_json_file = readlabmeta_json_file + + # Initialize j_data object + stderr.print("[blue]Reading lab metadata json") + self.j_data = self.collect_info_from_lab_json() + + # Parse input/output folder + if input_folder is None: + self.input_folder = relecov_tools.utils.prompt_path( + msg="Select the input folder" + ) + else: + self.input_folder = input_folder + if output_folder is None: + self.output_folder = relecov_tools.utils.prompt_path( + msg="Select the output folder" + ) + else: + self.output_folder = output_folder + + # Parse bioinfo configuration + self.bioinfo_json_file = os.path.join( + os.path.dirname(__file__), "conf", "bioinfo_config.json" + ) + if software is None: + software = relecov_tools.utils.prompt_path( + msg="Select the software, pipeline or tool use in the bioinformatic analysis: " + ) + self.software_name = software + available_software = self.get_available_software(self.bioinfo_json_file) + bioinfo_config = ConfigJson(self.bioinfo_json_file) + if self.software_name in available_software: + self.software_config = bioinfo_config.get_configuration(self.software_name) + else: + self.log_report.update_log_report( + self.__init__.__name__, + "error", + f"No configuration available for '{self.software_name}'. Currently, the only available software options are:: {', '.join(available_software)}", + ) + sys.exit( + self.log_report.print_log_report(self.__init__.__name__, ["error"]) + ) + + def get_available_software(self, json): + """Get list of available software in configuration + + Args: + json (str): Path to bioinfo configuration json file. + + Returns: + available_software: List containing available software defined in json. + """ + config = relecov_tools.utils.read_json_file(json) + available_software = list(config.keys()) + return available_software + + def scann_directory(self): + """Scanns bioinfo analysis directory and identifies files according to the file name patterns defined in the software configuration json. + + Returns: + files_found: A dictionary containing file paths found based on the definitions provided in the bioinformatic JSON file within the software scope (self.software_config). + """ + method_name = f"{self.scann_directory.__name__}" + total_files = sum(len(files) for _, _, files in os.walk(self.input_folder)) + files_found = {} + + for topic_key, topic_scope in self.software_config.items(): + if "fn" not in topic_scope: # try/except fn + self.log_report.update_log_report( + method_name, + "warning", + f"No 'fn' (file pattern) found in '{self.software_name}.{topic_key}'.", + ) + continue + for root, _, files in os.walk(self.input_folder, topdown=True): + matching_files = [ + os.path.join(root, file_name) + for file_name in files + if re.search(topic_scope["fn"], file_name) + ] + if len(matching_files) >= 1: + files_found[topic_key] = matching_files + if len(files_found) < 1: + self.log_report.update_log_report( + method_name, + "error", + f"No files found in '{self.input_folder}' according to '{os.path.basename(self.bioinfo_json_file)}' file name patterns.", + ) + sys.exit(self.log_report.print_log_report(method_name, ["error"])) + else: + self.log_report.update_log_report( + self.scann_directory.__name__, + "valid", + f"Scannig process succeed. Scanned {total_files} files.", + ) + self.log_report.print_log_report(method_name, ["valid", "warning"]) + return files_found + + def validate_software_mandatory_files(self, files_dict): + """Validates the presence of all mandatory files as defined in the software configuration JSON. + + Args: + files_dict (dict{str:str}): A dictionary containing file paths found based on the definitions provided in the bioinformatic JSON file within the software scope (self.software_config). + """ + method_name = f"{self.validate_software_mandatory_files.__name__}" + missing_required = [] + for key in self.software_config: + if self.software_config[key].get("required") is True: + try: + files_dict[key] + except KeyError: + missing_required.append(key) + continue + else: + continue + if len(missing_required) >= 1: + self.log_report.update_log_report( + method_name, + "error", + f"Missing mandatory files in {self.software_name}.{key}:{', '.join(missing_required)}", + ) + sys.exit(self.log_report.print_log_report(method_name, ["error"])) + else: + self.log_report.update_log_report( + method_name, "valid", "Successfull validation of mandatory files." + ) + self.log_report.print_log_report(method_name, ["valid", "warning"]) + return + + def add_bioinfo_results_metadata(self, files_dict, j_data): + """Adds metadata from bioinformatics results to j_data. + It first calls file_handlers and then maps the handled + data into j_data. + + Args: + files_dict (dict{str:str}): A dictionary containing file paths found based on the definitions provided in the bioinformatic JSON file within the software scope (self.software_config). + + j_data (list(dict{str:str}): A list of dictionaries containing metadata lab (list item per sample). + + Returns: + j_data_mapped: A list of dictionaries with bioinformatics metadata mapped into j_data. + """ + method_name = f"{self.add_bioinfo_results_metadata.__name__}" + for key in self.software_config.keys(): + # Update bioinfo cofiguration key/scope + self.current_config_key = key + # This skip files that will be parsed with other methods + if key == "workflow_summary" or key == "fixed_values": + continue + try: + files_dict[key] + stderr.print(f"[blue]Start processing {self.software_name}.{key}") + except KeyError: + self.log_report.update_log_report( + method_name, + "warning", + f"No file path found for '{self.software_name}.{key}'", + ) + continue + # Handling files + data_to_map = self.handling_files(files_dict[key]) + # Mapping data to j_data + if data_to_map: + j_data_mapped = self.mapping_over_table( + j_data=j_data, + map_data=data_to_map, + mapping_fields=self.software_config[key]["content"], + table_name=files_dict[key], + ) + else: + self.log_report.update_log_report( + method_name, + "warning", + f"No metadata found to perform standard mapping when processing '{self.software_name}.{key}'", + ) + continue + self.log_report.print_log_report(method_name, ["valid", "warning"]) + return j_data_mapped + + def handling_files(self, file_list): + """Handles different file formats to extract data regardless of their structure. The goal is to extract the data contained in files specified in ${file_list}, using either 'standard' handlers defined in this class or pipeline-specific file handlers. + (inspired from ./metadata_homogenizer.py) + + A file handler method must generate a data structure as follow: + { + 'SAMPLE1': { + 'field1': 'value1' + 'field2': 'value2' + 'field3': 'value3' + }, + SAMPLE2': { + 'field1': 'value1' + 'field2': 'value2' + 'field3': 'value3' + }, + ... + } + Note: ensure that 'field1','field2','field3' corresponds with the values especifies in the 'content' section of each software configuration scope (see: conf/bioinfo_config.json). + + Args: + file_list (list): A list of file path/s to be processed. + + Returns: + data: A dictionary containing bioinfo metadata handled for each sample. + """ + method_name = f"{self.add_bioinfo_results_metadata.__name__}:{self.handling_files.__name__}" + file_name = self.software_config[self.current_config_key].get("fn") + file_extension = os.path.splitext(file_name)[1] + # Parsing key position + try: + self.software_config[self.current_config_key]["sample_col_idx"] + sample_idx_possition = ( + self.software_config[self.current_config_key]["sample_col_idx"] - 1 + ) + except KeyError: + sample_idx_possition = None + self.log_report.update_log_report( + method_name, + "warning", + f"No sample-index-column defined in '{self.software_name}.{self.current_config_key}'. Using default instead.", + ) + # Parsing files + func_name = self.software_config[self.current_config_key]["function"] + if func_name is None: + if file_name.endswith(".csv"): + data = relecov_tools.utils.read_csv_file_return_dict( + file_name=file_list[0], sep=",", key_position=sample_idx_possition + ) + return data + elif file_name.endswith(".tsv") or file_name.endswith(".tab"): + data = relecov_tools.utils.read_csv_file_return_dict( + file_name=file_list[0], sep="\t", key_position=sample_idx_possition + ) + else: + self.log_report.update_log_report( + method_name, + "error", + f"Unrecognized defined file name extension '{file_extension}' in '{file_name}'.", + ) + sys.exit(self.log_report.print_log_report(method_name, ["error"])) + else: + try: + # Dynamically import the function from the specified module + utils_name = f"relecov_tools.assets.pipeline_utils.{self.software_name}" + import_statement = f"from {utils_name} import {func_name}" + exec(import_statement) + # Get method name and execute it. + data = eval(func_name + "(file_list)") + except Exception as e: + self.log_report.update_log_report( + self.add_bioinfo_results_metadata.__name__, + "error", + f"Error occurred while parsing '{func_name}': {e}.", + ) + sys.exit(self.log_report.print_log_report(method_name, ["error"])) + return data + + def mapping_over_table(self, j_data, map_data, mapping_fields, table_name): + """Maps bioinformatics metadata from map_data to j_data based on the mapping_fields. + + Args: + j_data (list(dict{str:str}): A list of dictionaries containing metadata lab (one item per sample). + map_data (dict(dict{str:str})): A dictionary containing bioinfo metadata handled by the method handling_files(). + mapping_fields (dict{str:str}): A dictionary of mapping fields defined in the 'content' definition under each software scope (see conf/bioinfo.config). + table_name (str): Path to the mapping file/table. + + Returns: + j_data: updated j_data with bioinformatic metadata mapped in it. + """ + method_name = f"{self.mapping_over_table.__name__}:{self.software_name}.{self.current_config_key}" + errors = [] + field_errors = {} + field_valid = {} + for row in j_data: + # TODO: We should consider an independent module that verifies that sample's name matches this pattern. + # If we add warnings within this module, every time mapping_over_table is invoked it will print redundant warings + sample_match = re.match(r"^[^_]+", row["sequence_file_R1_fastq"]) + if sample_match: + sample_name = sample_match.group() + else: + self.log_report.update_log_report( + method_name, + "warning", + f'Regex failed to find extract sample name from: {row["sequence_file_R1_fastq"]}. Skipping...', + ) + continue + if sample_name in map_data.keys(): + for field, value in mapping_fields.items(): + try: + # FIXME: we have to allow more than one data type to make json validation module work. + row[field] = str(map_data[sample_name][value]) + field_valid[sample_name] = {field: value} + except KeyError as e: + field_errors[sample_name] = {field: e} + row[field] = "Not Provided [GENEPIO:0001668]" + continue + else: + errors.append(sample_name) + for field in mapping_fields.keys(): + row[field] = "Not Provided [GENEPIO:0001668]" + # work around when map_data comes from several per-sample tables/files instead of single table + if len(table_name) > 2: + table_name = os.path.dirname(table_name[0]) + else: + table_name = table_name[0] + # Parse missing sample errors + if errors: + lenerrs = len(errors) + self.log_report.update_log_report( + method_name, + "warning", + f"{lenerrs} samples missing in '{table_name}': {', '.join(errors)}.", + ) + else: + self.log_report.update_log_report( + method_name, + "valid", + f"All samples were successfully found in {table_name}.", + ) + # Parse missing fields errors + # TODO: this stdout can be improved + if len(field_errors) > 0: + self.log_report.update_log_report( + method_name, + "warning", + f"Missing fields in {table_name}:\n\t{field_errors}", + ) + else: + self.log_report.update_log_report( + method_name, + "valid", + f"Successfully mapped fields in {', '.join(field_valid.keys())}.", + ) + # Print report + self.log_report.print_log_report(method_name, ["valid", "warning"]) + return j_data + + def get_multiqc_software_versions(self, file_list, j_data): + """Reads multiqc html file, finds table containing software version info, and map it to j_data + + Args: + file_list (list): A list containing the path to file multiqc_report.html. + j_data (list(dict{str:str}): A list of dictionaries containing metadata lab (one item per sample). + + Returns: + j_data: updated j_data with software details mapped in it. + """ + method_name = f"{self.get_multiqc_software_versions.__name__}" + # Handle multiqc_report.html + f_path = file_list[0] + program_versions = {} + + with open(f_path, "r") as html_file: + html_content = html_file.read() + soup = BeautifulSoup(html_content, features="lxml") + div_id = "mqc-module-section-software_versions" + versions_div = soup.find("div", id=div_id) + if versions_div: + table = versions_div.find("table", class_="table") + if table: + rows = table.find_all("tr") + for row in rows[1:]: # skipping header + columns = row.find_all("td") + if len(columns) == 3: + program_name = columns[1].text.strip() + version = columns[2].text.strip() + program_versions[program_name] = version + else: + self.log_report.update_log_report( + method_name, + "error", + f"HTML entry error in {columns}. HTML table expected format should be \n Process Name\n\n Software \n.", + ) + sys.exit( + self.log_report.print_log_report(method_name, ["error"]) + ) + else: + self.log_report.update_log_report( + method_name, + "error", + f"Unable to locate the table containing software versions in file {f_path} under div section {div_id}.", + ) + sys.exit(self.log_report.print_log_report(method_name, ["error"])) + else: + self.log_report.update_log_report( + self.get_multiqc_software_versions.__name__, + "error", + f"Failed to locate the required '{div_id}' div section in the '{f_path}' file.", + ) + sys.exit(self.log_report.print_log_report(method_name, ["error"])) + # Mapping multiqc sofware versions to j_data + field_errors = {} + for row in j_data: + # Get sample name to track whether version assignment was successful or not. + sample_match = re.match(r"^[^_]+", row["sequence_file_R1_fastq"]) + if sample_match: + sample_name = sample_match.group() + else: + self.log_report.update_log_report( + method_name, + "warning", + f'Regex failed to find extract sample name from: {row["sequence_file_R1_fastq"]}. Skipping...', + ) + continue + + # Append software version and name + software_content_details = self.software_config["workflow_summary"].get( + "content" + ) + for content_key, content_value in software_content_details.items(): + for key, value in content_value.items(): + # Add software versions + if "software_version" in content_key: + try: + row[key] = program_versions[value] + except KeyError as e: + field_errors[sample_name] = {value: e} + row[key] = "Not Provided [GENEPIO:0001668]" + continue + # Add software name + elif "software_name" in content_key: + try: + row[key] = value + except KeyError as e: + field_errors[sample_name] = {value: e} + row[key] = "Not Provided [GENEPIO:0001668]" + continue + + # update progress log + if len(field_errors) > 0: + self.log_report.update_log_report( + method_name, + "warning", + f"Encountered field errors while mapping data: {field_errors}", + ) + else: + self.log_report.update_log_report( + method_name, "valid", "Successfully field mapped data." + ) + self.log_report.print_log_report(method_name, ["valid", "warning"]) + return j_data + + def add_fixed_values(self, j_data): + """Add fixed values to j_data as defined in the bioinformatics configuration (definition: "fixed values") + + Args: + j_data (list(dict{str:str}): A list of dictionaries containing metadata lab (one item per sample). + + Returns: + j_data: updated j_data with fixxed values added in it. + """ + method_name = f"{self.add_fixed_values.__name__}" + try: + f_values = self.software_config["fixed_values"] + for row in j_data: + for field, value in f_values.items(): + row[field] = value + self.log_report.update_log_report( + method_name, "valid", "Fields added successfully." + ) + except KeyError as e: + self.log_report.update_log_report( + method_name, "warning", f"Error found while adding fixed values: {e}" + ) + pass + self.log_report.print_log_report(method_name, ["valid", "warning"]) + return j_data + + def add_bioinfo_files_path(self, files_found_dict, j_data): + """Adds file paths essential for handling and mapping bioinformatics metadata to the j_data. + For each sample in j_data, the function assigns the corresponding file path based on the identified files in files_found_dict. + + If multiple files are identified per configuration item (e.g., viralrecon.mapping_consensus → *.consensus.fa), each sample in j_data receives its respective file path. + If no file path is located, the function appends "Not Provided [GENEPIO:0001668]" to indicate missing data. + + Args: + files_found_dict (dict): A dictionary containing file paths identified for each configuration item. + j_data (list(dict{str:str}): A list of dictionaries containing metadata lab (one item per sample). + + Returns: + j_data: Updated j_data with file paths mapped for bioinformatic metadata. + """ + method_name = f"{self.add_bioinfo_files_path.__name__}" + sample_name_error = 0 + for row in j_data: + sample_match = re.match(r"^[^_]+", row["sequence_file_R1_fastq"]) + if sample_match: + sample_name = sample_match.group() + else: + self.log_report.update_log_report( + method_name, + "warning", + f'Regex failed to find extract sample name from: {row["sequence_file_R1_fastq"]}. Skipping...', + ) + continue + for key, value in files_found_dict.items(): + file_path = "Not Provided [GENEPIO:0001668]" + if value: # Check if value is not empty + for file in value: + if sample_name in file: + file_path = file + break # Exit loop if match found + else: + file_path = value[0] + path_key = f"{self.software_name}_filepath_{key}" + row[path_key] = file_path + self.log_report.print_log_report(method_name, ["warning"]) + if sample_name_error == 0: + self.log_report.update_log_report( + method_name, "valid", "File paths added successfully." + ) + self.log_report.print_log_report(method_name, ["valid", "warning"]) + return j_data + + def collect_info_from_lab_json(self): + """Reads lab metadata from a JSON file and creates a list of dictionaries. + Reads lab metadata from the specified JSON file and converts it into a list of dictionaries. + This list is used to add the rest of the fields. + + Returns: + json_lab_data: A list of dictionaries containing lab metadata (aka j_data). + """ + method_name = f"{self.collect_info_from_lab_json.__name__}" + try: + json_lab_data = relecov_tools.utils.read_json_file( + self.readlabmeta_json_file + ) + except ValueError: + self.log_report.update_log_report( + self.collect_info_from_lab_json.__name__, + "error", + f"Invalid lab-metadata json file: self.{self.readlabmeta_json_file}", + ) + sys.exit(self.log_report.print_log_report(method_name, ["error"])) + return json_lab_data + + def create_bioinfo_file(self): + """Create the bioinfodata json with collecting information from lab + metadata json, mapping_stats, and more information from the files + inside input directory. + + Returns: + bool: True if the bioinfo file creation process was successful. + """ + # Find and validate bioinfo files + stderr.print("[blue]Sanning input directory...") + files_found_dict = self.scann_directory() + stderr.print("[blue]Validating required files...") + self.validate_software_mandatory_files(files_found_dict) + # Add bioinfo metadata to j_data + stderr.print("[blue]Adding bioinfo metadata to read lab metadata...") + self.j_data = self.add_bioinfo_results_metadata(files_found_dict, self.j_data) + stderr.print("[blue]Adding software versions to read lab metadata...") + self.j_data = self.get_multiqc_software_versions( + files_found_dict["workflow_summary"], self.j_data + ) + stderr.print("[blue]Adding fixed values") + self.j_data = self.add_fixed_values(self.j_data) + # Adding files path + stderr.print("[blue]Adding files path to read lab metadata") + self.j_data = self.add_bioinfo_files_path(files_found_dict, self.j_data) + # Generate readlab + bioinfolab processed metadata. + file_name = ( + "bioinfo_" + + os.path.splitext(os.path.basename(self.readlabmeta_json_file))[0] + + ".json" + ) + stderr.print("[blue]Writting output json file") + os.makedirs(self.output_folder, exist_ok=True) + file_path = os.path.join(self.output_folder, file_name) + relecov_tools.utils.write_json_fo_file(self.j_data, file_path) + stderr.print("[green]Sucessful creation of bioinfo analyis file") + return True diff --git a/relecov_tools/read_lab_metadata.py b/relecov_tools/read_lab_metadata.py new file mode 100755 index 00000000..2d4efbda --- /dev/null +++ b/relecov_tools/read_lab_metadata.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python +import json +import logging +import rich.console +import os +import sys +import re +from datetime import datetime as dtime +import relecov_tools.utils +from relecov_tools.config_json import ConfigJson +import relecov_tools.json_schema +from relecov_tools.log_summary import LogSum + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class RelecovMetadata: + def __init__(self, metadata_file=None, sample_list_file=None, output_folder=None): + if metadata_file is None: + self.metadata_file = relecov_tools.utils.prompt_path( + msg="Select the excel file which contains metadata" + ) + else: + self.metadata_file = metadata_file + + if not os.path.exists(self.metadata_file): + log.error("Metadata file %s does not exist ", self.metadata_file) + stderr.print( + "[red] Metadata file " + self.metadata_file + " does not exist" + ) + sys.exit(1) + + if sample_list_file is None: + self.sample_list_file = relecov_tools.utils.prompt_path( + msg="Select the file which contains the sample information" + ) + else: + self.sample_list_file = sample_list_file + + if not os.path.exists(self.sample_list_file): + log.error( + "Sample information file %s does not exist ", self.sample_list_file + ) + stderr.print( + "[red] Sample information " + self.sample_list_file + " does not exist" + ) + sys.exit(1) + + if output_folder is None: + self.output_folder = relecov_tools.utils.prompt_path( + msg="Select the output folder" + ) + else: + self.output_folder = output_folder + out_path = os.path.realpath(self.output_folder) + self.lab_code = out_path.split("/")[-2] + self.logsum = LogSum( + output_location=self.output_folder, unique_key=self.lab_code, path=out_path + ) + config_json = ConfigJson() + # TODO: remove hardcoded schema selection + relecov_schema = config_json.get_topic_data("json_schemas", "relecov_schema") + relecov_sch_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "schema", relecov_schema + ) + self.configuration = config_json + + with open(relecov_sch_path, "r") as fh: + self.relecov_sch_json = json.load(fh) + self.label_prop_dict = {} + + for prop, values in self.relecov_sch_json["properties"].items(): + try: + self.label_prop_dict[values["label"]] = prop + except KeyError: + log.warning("Property %s does not have 'label' attribute", prop) + stderr.print( + "[orange]Property " + prop + " does not have 'label' attribute" + ) + continue + + self.json_req_files = config_json.get_topic_data( + "lab_metadata", "lab_metadata_req_json" + ) + self.schema_name = self.relecov_sch_json["title"] + self.schema_version = self.relecov_sch_json["version"] + self.metadata_processing = config_json.get_topic_data( + "sftp_handle", "metadata_processing" + ) + self.samples_json_fields = config_json.get_topic_data( + "lab_metadata", "samples_json_fields" + ) + + def match_to_json(self, valid_metadata_rows): + """Keep only the rows from samples present in the input file samples.json + + Args: + valid_metadata_rows (list(dict)): List of rows from metadata_lab.xlsx file + + Returns: + clean_metadata_rows(list(dict)): _description_ + missing_samples(list(str)): + """ + samples_json = relecov_tools.utils.read_json_file(self.sample_list_file) + clean_metadata_rows = [] + missing_samples = [] + for row in valid_metadata_rows: + sample_id = str(row["sequencing_sample_id"]).strip() + self.logsum.feed_key(sample=sample_id) + if sample_id in samples_json.keys(): + clean_metadata_rows.append(row) + else: + log_text = "Sample missing in samples data Json file" + self.logsum.add_error(sample=sample_id, entry=log_text) + missing_samples.append(sample_id) + return clean_metadata_rows, missing_samples + + def adding_fixed_fields(self, m_data): + """Include fixed data that are always the same for every sample""" + p_data = self.configuration.get_topic_data("lab_metadata", "fixed_fields") + for idx in range(len(m_data)): + for key, value in p_data.items(): + m_data[idx][key] = value + m_data[idx]["schema_name"] = self.schema_name + m_data[idx]["schema_version"] = self.schema_version + m_data[idx]["submitting_institution_id"] = self.lab_code + return m_data + + def adding_copy_from_other_field(self, m_data): + """Add a new field with information based in another field.""" + p_data = self.configuration.get_topic_data( + "lab_metadata", "required_copy_from_other_field" + ) + for idx in range(len(m_data)): + for key, value in p_data.items(): + m_data[idx][key] = m_data[idx][value] + return m_data + + def adding_post_processing(self, m_data): + """Add fields which values require post processing""" + p_data = self.configuration.get_topic_data( + "lab_metadata", "required_post_processing" + ) + for idx in range(len(m_data)): + for key, p_values in p_data.items(): + value = m_data[idx].get(key) + if not value: + continue + if value in p_values: + p_field, p_set = p_values[value].split("::") + m_data[idx][p_field] = p_set + else: + # Check if key p_values should match only part of the value + for reg_key, reg_value in p_values.items(): + if reg_key in value: + p_field, p_set = reg_value.split("::") + m_data[idx][p_field] = p_set + + return m_data + + def adding_ontology_to_enum(self, m_data): + """Read the schema to get the properties enum and, for those fields + which have an enum property value, replace the value for the one + that is defined in the schema. + """ + enum_dict = {} + for prop, values in self.relecov_sch_json["properties"].items(): + if "enum" in values: + enum_dict[prop] = {} + for enum in values["enum"]: + go_match = re.search(r"(.+) \[\w+:.*", enum) + if go_match: + enum_dict[prop][go_match.group(1)] = enum + else: + enum_dict[prop][enum] = enum + ontology_errors = {} + for idx in range(len(m_data)): + for key, e_values in enum_dict.items(): + if key in m_data[idx]: + if m_data[idx][key] in e_values: + m_data[idx][key] = e_values[m_data[idx][key]] + else: + sample_id = m_data[idx]["sequencing_sample_id"] + log_text = f"No ontology found for {m_data[idx][key]} in {key}" + self.logsum.add_warning(sample=sample_id, entry=log_text) + try: + ontology_errors[key] += 1 + except KeyError: + ontology_errors[key] = 1 + continue + if len(ontology_errors) >= 1: + stderr.print( + "[red] No ontology could be added in:\n", + "\n".join({f"{x} - {y} samples" for x, y in ontology_errors.items()}), + ) + return m_data + + def process_from_json(self, m_data, json_fields): + """Find the labels that are missing in the file to match the given schema.""" + map_field = json_fields["map_field"] + json_data = json_fields["j_data"] + for idx in range(len(m_data)): + sample_id = str(m_data[idx].get("sequencing_sample_id")) + if m_data[idx].get(map_field): + try: + m_data[idx].update(json_data[m_data[idx][map_field]]) + except KeyError as error: + clean_error = re.sub("[\[].*?[\]]", "", str(error.args[0])) + if str(clean_error).lower().strip() == "not provided": + log_text = ( + f"Label {map_field} was not provided in sample " + + f"{sample_id}, auto-completing with Not Provided" + ) + self.logsum.add_warning(sample=sample_id, entry=log_text) + else: + log_text = ( + f"Unknown map_field value {error} for json data: " + + f"{str(map_field)} in sample {sample_id}. Skipped" + ) + self.logsum.add_warning(sample=sample_id, entry=log_text) + continue + # TODO: Include Not Provided as a configuration field + fields_to_add = { + x: "Not Provided [GENEPIO:0001668]" + for x in json_fields["adding_fields"] + } + m_data[idx].update(fields_to_add) + return m_data + + def adding_fields(self, metadata): + """Add information located inside various json file as fields""" + + for key, values in self.json_req_files.items(): + stderr.print(f"[blue]Processing {key}") + f_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "conf", values["file"] + ) + values["j_data"] = relecov_tools.utils.read_json_file(f_path) + metadata = self.process_from_json(metadata, values) + stderr.print(f"[green]Processed {key}") + + # Include Sample information data from sample json file + stderr.print("[blue]Processing sample data file") + s_json = {} + # TODO: Change sequencing_sample_id for some unique ID used in RELECOV database + s_json["map_field"] = "sequencing_sample_id" + s_json["adding_fields"] = self.samples_json_fields + s_json["j_data"] = relecov_tools.utils.read_json_file(self.sample_list_file) + metadata = self.process_from_json(metadata, s_json) + stderr.print("[green]Processed sample data file.") + return metadata + + def read_configuration_json_files(self): + """Read json files defined in configuration lab_metadata_req_json + property + """ + c_files = {} + for item, value in self.json_files.items(): + f_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "conf", value + ) + c_files[item] = relecov_tools.utils.read_json_file(f_path) + return c_files + + def read_metadata_file(self): + """Reads the input metadata file from header row, changes the metadata heading + with their property name values defined in schema. Convert the date columns + value to the yyyy/mm/dd format. Return list of dicts with data + """ + meta_sheet = self.metadata_processing.get("excel_sheet") + header_flag = self.metadata_processing.get("header_flag") + ws_metadata_lab, heading_row_number = relecov_tools.utils.read_excel_file( + self.metadata_file, meta_sheet, header_flag, leave_empty=False + ) + valid_metadata_rows = [] + row_number = heading_row_number + for row in ws_metadata_lab: + row_number += 1 + property_row = {} + try: + sample_id = str(row["Sample ID given for sequencing"]).strip() + except KeyError: + log_text = f"Sample ID given for sequencing empty in row {row_number}" + log.error(log_text) + stderr.print(f"[red]{log_text}") + continue + for key in row.keys(): + # skip the first column of the Metadata lab file + if header_flag in key: + continue + if row[key] is None or "not provided" in str(row[key]).lower(): + log_text = f"{key} not provided for sample {sample_id}" + self.logsum.add_warning(sample=sample_id, entry=log_text) + continue + if "date" in key.lower(): + # Check if date is a string. Format YYYY/MM/DD to YYYY-MM-DD + pattern = r"^\d{4}[-/.]\d{2}[-/.]\d{2}" + if isinstance(row[key], dtime): + row[key] = str(row[key].date()) + elif re.match(pattern, str(row[key])): + row[key] = row[key].replace("/", "-").replace(".", "-") + row[key] = re.match(pattern, row[key]).group(0) + else: + try: + row[key] = str(int(float(str(row[key])))) + log.info("Date given as an integer. Understood as a year") + except (ValueError, TypeError): + log_text = f"Invalid date format in {key}: {row[key]}" + self.logsum.add_error(sample=sample_id, entry=log_text) + stderr.print(f"[red]{log_text} for sample {sample_id}") + continue + elif "sample id" in key.lower(): + if isinstance(row[key], float) or isinstance(row[key], int): + row[key] = str(int(row[key])) + else: + if isinstance(row[key], float) or isinstance(row[key], int): + row[key] = str(row[key]) + if row[key] is not None or "not provided" not in str(row[key]).lower(): + try: + property_row[self.label_prop_dict[key]] = row[key] + except KeyError as e: + log_text = f"Error when mapping the label {str(e)}" + self.logsum.add_error(sample=sample_id, entry=log_text) + stderr.print(f"[red]{log_text}") + continue + + valid_metadata_rows.append(property_row) + return valid_metadata_rows + + def create_metadata_json(self): + stderr.print("[blue]Reading Lab Metadata Excel File") + valid_metadata_rows = self.read_metadata_file() + clean_metadata_rows, missing_samples = self.match_to_json(valid_metadata_rows) + if missing_samples: + num_miss = len(missing_samples) + log.warning( + "%s samples from metadata were not found: %s", num_miss, missing_samples + ) + stderr.print(f"[yellow]{num_miss} samples missing:\n{missing_samples}") + # Continue by adding extra information + stderr.print("[blue]Including additional information") + + extended_metadata = self.adding_fields(clean_metadata_rows) + stderr.print("[blue]Including post processing information") + extended_metadata = self.adding_post_processing(extended_metadata) + extended_metadata = self.adding_copy_from_other_field(extended_metadata) + extended_metadata = self.adding_fixed_fields(extended_metadata) + completed_metadata = self.adding_ontology_to_enum(extended_metadata) + if not completed_metadata: + stderr.print("Metadata was completely empty. No output file generated") + sys.exit(1) + file_code = "lab_metadata_" + self.lab_code + "_" + file_name = file_code + dtime.now().strftime("%Y%m%d%H%M%S") + ".json" + stderr.print("[blue]Writting output json file") + os.makedirs(self.output_folder, exist_ok=True) + self.logsum.create_error_summary(called_module="read-lab-metadata") + file_path = os.path.join(self.output_folder, file_name) + relecov_tools.utils.write_json_fo_file(completed_metadata, file_path) + return True diff --git a/relecov_tools/rest_api.py b/relecov_tools/rest_api.py new file mode 100644 index 00000000..6ee1879e --- /dev/null +++ b/relecov_tools/rest_api.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +import logging +import json +import requests +import rich.console +import relecov_tools.utils + +log = logging.getLogger(__name__) + +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class RestApi: + def __init__(self, server, url): + self.request_url = server + url + self.headers = {"content-type": "application/json"} + + def get_request(self, request_info, parameter, value=None, safe=True): + if parameter == "" or parameter is None: + url_http = str(self.request_url + request_info) + elif isinstance(parameter, dict): + param_value = [] + for key, value in parameter.items(): + param_value.append(key + "=" + value) + url_http = str( + self.request_url + request_info + "?" + "&".join(param_value) + ) + else: + url_http = str( + self.request_url + request_info + "?" + parameter + "=" + value + ) + try: + req = requests.get(url_http, headers=self.headers) + if req.status_code != 200: + if safe: + log.error( + "Unable to get parameters. Received error code %s", + req.status_code, + ) + stderr.print( + "[red] Unable to fetch data. Received error ", req.status_code + ) + return {"ERROR": req.status_code} + return {"DATA": json.loads(req.text)} + except requests.ConnectionError: + log.error("Unable to open connection towards %s", self.request_url) + stderr.print("[red] Unable to open connection towards ", self.request_url) + return {"ERROR": "Server not available"} + + def put_request(self, data, credentials, url): + if isinstance(credentials, dict): + auth = (credentials["user"], credentials["pass"]) + url_http = str(self.request_url + url) + try: + req = requests.put(url_http, data=data, auth=auth) + except requests.ConnectionError: + log.error("Unable to open connection towards %s", self.request_url) + stderr.print("[red] Unable to open connection towards ", self.request_url) + return {"ERROR": "Server not available"} + if req.status_code != 201: + log.error( + "Unable to post parameters. Received error code %s", + req.status_code, + ) + if req.status_code != 500: + stderr.print(f"[red] Unable to put data because {req.text}") + stderr.print(f"[red] Received error {req.status_code}") + return {"ERROR": req.status_code} + return {"Success": req.text} + + def post_request(self, data, credentials, url, file=None): + if isinstance(credentials, dict): + auth = (credentials["user"], credentials["pass"]) + url_http = str(self.request_url + url) + try: + if file: + files = {"upload_file": open(file, "rb")} + req = requests.post( + url_http, files=files, data=data, headers=self.headers, auth=auth + ) + else: + req = requests.post( + url_http, data=data, headers=self.headers, auth=auth + ) + if req.status_code != 201: + log.error( + "Unable to post parameters. Received error code %s", + req.status_code, + ) + stderr.print(f"[red] Received error {req.status_code}") + if req.status_code != 500: + stderr.print(f"[red] Unable to post data because {req.text}") + return {"ERROR": req.status_code, "ERROR_TEST": req.text} + else: + return {"ERROR": req.status_code, "ERROR_TEST": ""} + return {"Success": req.text} + except requests.ConnectionError: + log.error("Unable to open connection towards %s", self.request_url) + stderr.print("[red] Unable to open connection towards ", self.request_url) + return {"ERROR": "Server not available"} diff --git a/relecov_tools/schema/ena_schema.json b/relecov_tools/schema/ena_schema.json new file mode 100644 index 00000000..048d8d05 --- /dev/null +++ b/relecov_tools/schema/ena_schema.json @@ -0,0 +1,1352 @@ +{ + "schema": "ENA", + "version": "2.0.0", + "required": [ + "sample_title", + "collecting institution", + "geographic location (country and/or sea)", + "isolate", + "collector name", + "host scientific name", + "host common name", + "instrument_model", + "host subject id", + "host health state", + "host sex", + "host scientific name", + "scientific_name", + "sample_description", + "library_source", + "library_selection", + "library_strategy", + "library_layout", + "sample_alias", + "study_alias", + "experiment_alias", + "run_alias", + "experiment_title", + "study_title", + "study_type", + "platform", + "file_format", + "file_name" + ], + "type": "object", + "properties": { + "virus identifier": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001123", + "type": "string", + "description": "Unique laboratory identifier assigned to the virus by the investigator. Strain name is not sufficient since it might not be unique due to various passsages of the same virus. Format: up to 50 alphanumeric characters", + "clasification": "Database Identifiers", + "label": "Sample ID given by originating laboratory", + "table": [ + "sample", + "experiments" + ] + }, + "collecting institution": { + "examples": [ + "Public Health Agency of Canada" + ], + "ontology": "GENEPIO:0001153", + "type": "string", + "description": "Name of the institution to which the person collecting the specimen belongs. Format: Institute Name, Institute Address", + "label": "Originating Laboratory", + "table": "sample" + }, + "collector name": { + "examples": [ + "John Smith, unknown" + ], + "ontology": "GENEPIO:0001797", + "type": "string", + "description": "Name of the person who collected the specimen", + "clasification": "Sample collection and processing", + "label": "Sample collector name", + "table": "sample" + }, + "collection date": { + "examples": [ + "3/19/2020" + ], + "ontology": "GENEPIO:0001174", + "type": "string", + "description": "The date on which the sample was collected.", + "format": "date", + "classification": "Sample collection and processing", + "label": "Sample Collection Date", + "table": "sample" + }, + "receipt date": { + "examples": [ + "3/21/2020" + ], + "ontology": "NCIT:C93644", + "type": "string", + "description": "The date on which the sample was received.", + "format": "date", + "clasification": "Sample collection and processing", + "label": "Sample Received Date" + }, + "design_description": { + "examples": [ + "design_description_1" + ], + "ontology": "NCIT:C147139", + "type": "string", + "description": "", + "clasification": "", + "label": "Design description", + "table": [ + "experiments" + ] + }, + "geographic location (country and/or sea)": { + "enum": [ + "Afghanistan [GAZ:00006882]", + "Albania [GAZ:00002953]", + "Algeria [GAZ:00000563]", + "American Samoa [GAZ:00003957]", + "Andorra [GAZ:00002948]", + "Angola [GAZ:00001095]", + "Anguilla [GAZ:00009159]", + "Antarctica [GAZ:00000462]", + "Antigua and Barbuda [GAZ:00006883]", + "Argentina [GAZ:00002928]", + "Armenia [GAZ:00004094]", + "Aruba [GAZ:00004025]", + "Ashmore and Cartier Islands [GAZ:00005901]", + "Australia [GAZ:00000463]", + "Austria [GAZ:00002942]", + "Azerbaijan [GAZ:00004941]", + "Bahamas [GAZ:00002733]", + "Bahrain [GAZ:00005281]", + "Baker Island [GAZ:00007117]", + "Bangladesh [GAZ:00003750]", + "Barbados [GAZ:00001251]", + "Bassas da India [GAZ:00005810]", + "Belarus [GAZ:00006886]", + "Belgium [GAZ:00002938]", + "Belize [GAZ:00002934]", + "Benin [GAZ:00000904]", + "Bermuda [GAZ:00001264]", + "Bhutan [GAZ:00003920]", + "Bolivia [GAZ:00002511]", + "Borneo [GAZ:00025355]", + "Bosnia and Herzegovina [GAZ:00006887]", + "Botswana [GAZ:00001097]", + "Bouvet Island [GAZ:00001453]", + "Brazil [GAZ:00002828]", + "British Virgin Islands [GAZ:00003961]", + "Brunei [GAZ:00003901]", + "Bulgaria [GAZ:00002950]", + "Burkina Faso [GAZ:00000905]", + "Burundi [GAZ:00001090]", + "Cambodia [GAZ:00006888]", + "Cameroon [GAZ:00001093]", + "Canada [GAZ:00002560]", + "Cape Verde [GAZ:00001227]", + "Cayman Islands [GAZ:00003986]", + "Central African Republic [GAZ:00001089]", + "Chad [GAZ:00000586]", + "Chile [GAZ:00002825]", + "China [GAZ:00002845]", + "Christmas Island [GAZ:00005915]", + "Clipperton Island [GAZ:00005838]", + "Cocos Islands [GAZ:00009721]", + "Colombia [GAZ:00002929]", + "Comoros [GAZ:00005820]", + "Cook Islands [GAZ:00053798]", + "Coral Sea Islands [GAZ:00005917]", + "Costa Rica [GAZ:00002901]", + "Cote d'Ivoire [GAZ:00000906]", + "Croatia [GAZ:00002719]", + "Cuba [GAZ:00003762]", + "Curacao [GAZ:00012582]", + "Cyprus [GAZ:00004006]", + "Czech Republic [GAZ:00002954]", + "Democratic Republic of the Congo [GAZ:00001086]", + "Denmark [GAZ:00005852]", + "Djibouti [GAZ:00000582]", + "Dominica [GAZ:00006890]", + "Dominican Republic [GAZ:00003952]", + "Ecuador [GAZ:00002912]", + "Egypt [GAZ:00003934]", + "El Salvador [GAZ:00002935]", + "Equatorial Guinea [GAZ:00001091]", + "Eritrea [GAZ:00000581]", + "Estonia [GAZ:00002959]", + "Eswatini [GAZ:00001099]", + "Ethiopia [GAZ:00000567]", + "Europa Island [GAZ:00005811]", + "Falkland Islands (Islas Malvinas) [GAZ:00001412]", + "Faroe Islands [GAZ:00059206]", + "Fiji [GAZ:00006891]", + "Finland [GAZ:00002937]", + "France [GAZ:00003940]", + "French Guiana [GAZ:00002516]", + "French Polynesia [GAZ:00002918]", + "French Southern and Antarctic Lands [GAZ:00003753]", + "Gabon [GAZ:00001092]", + "Gambia [GAZ:00000907]", + "Gaza Strip [GAZ:00009571]", + "Georgia [GAZ:00004942]", + "Germany [GAZ:00002646]", + "Ghana [GAZ:00000908]", + "Gibraltar [GAZ:00003987]", + "Glorioso Islands [GAZ:00005808]", + "Greece [GAZ:00002945]", + "Greenland [GAZ:00001507]", + "Grenada [GAZ:02000573]", + "Guadeloupe [GAZ:00067142]", + "Guam [GAZ:00003706]", + "Guatemala [GAZ:00002936]", + "Guernsey [GAZ:00001550]", + "Guinea [GAZ:00000909]", + "Guinea-Bissau [GAZ:00000910]", + "Guyana [GAZ:00002522]", + "Haiti [GAZ:00003953]", + "Heard Island and McDonald Islands [GAZ:00009718]", + "Honduras [GAZ:00002894]", + "Hong Kong [GAZ:00003203]", + "Howland Island [GAZ:00007120]", + "Hungary [GAZ:00002952]", + "Iceland [GAZ:00000843]", + "India [GAZ:00002839]", + "Indonesia [GAZ:00003727]", + "Iran [GAZ:00004474]", + "Iraq [GAZ:00004483]", + "Ireland [GAZ:00002943]", + "Isle of Man [GAZ:00052477]", + "Israel [GAZ:00002476]", + "Italy [GAZ:00002650]", + "Jamaica [GAZ:00003781]", + "Jan Mayen [GAZ:00005853]", + "Japan [GAZ:00002747]", + "Jarvis Island [GAZ:00007118]", + "Jersey [GAZ:00001551]", + "Johnston Atoll [GAZ:00007114]", + "Jordan [GAZ:00002473]", + "Juan de Nova Island [GAZ:00005809]", + "Kazakhstan [GAZ:00004999]", + "Kenya [GAZ:00001101]", + "Kerguelen Archipelago [GAZ:00005682]", + "Kingman Reef [GAZ:00007116]", + "Kiribati [GAZ:00006894]", + "Kosovo [GAZ:00011337]", + "Kuwait [GAZ:00005285]", + "Kyrgyzstan [GAZ:00006893]", + "Laos [GAZ:00006889]", + "Latvia [GAZ:00002958]", + "Lebanon [GAZ:00002478]", + "Lesotho [GAZ:00001098]", + "Liberia [GAZ:00000911]", + "Libya [GAZ:00000566]", + "Liechtenstein [GAZ:00003858]", + "Line Islands [GAZ:00007144]", + "Lithuania [GAZ:00002960]", + "Luxembourg [GAZ:00002947]", + "Macau [GAZ:00003202]", + "Madagascar [GAZ:00001108]", + "Malawi [GAZ:00001105]", + "Malaysia [GAZ:00003902]", + "Maldives [GAZ:00006924]", + "Mali [GAZ:00000584]", + "Malta [GAZ:00004017]", + "Marshall Islands [GAZ:00007161]", + "Martinique [GAZ:00067143]", + "Mauritania [GAZ:00000583]", + "Mauritius [GAZ:00003745]", + "Mayotte [GAZ:00003943]", + "Mexico [GAZ:00002852]", + "Micronesia [GAZ:00005862]", + "Midway Islands [GAZ:00007112]", + "Moldova [GAZ:00003897]", + "Monaco [GAZ:00003857]", + "Mongolia [GAZ:00008744]", + "Montenegro [GAZ:00006898]", + "Montserrat [GAZ:00003988]", + "Morocco [GAZ:00000565]", + "Mozambique [GAZ:00001100]", + "Myanmar [GAZ:00006899]", + "Namibia [GAZ:00001096]", + "Nauru [GAZ:00006900]", + "Navassa Island [GAZ:00007119]", + "Nepal [GAZ:00004399]", + "Netherlands [GAZ:00002946]", + "New Caledonia [GAZ:00005206]", + "New Zealand [GAZ:00000469]", + "Nicaragua [GAZ:00002978]", + "Niger [GAZ:00000585]", + "Nigeria [GAZ:00000912]", + "Niue [GAZ:00006902]", + "Norfolk Island [GAZ:00005908]", + "North Korea [GAZ:00002801]", + "North Macedonia [GAZ:00006895]", + "North Sea [GAZ:00002284]", + "Northern Mariana Islands [GAZ:00003958]", + "Norway [GAZ:00002699]", + "Oman [GAZ:00005283]", + "Pakistan [GAZ:00005246]", + "Palau [GAZ:00006905]", + "Panama [GAZ:00002892]", + "Papua New Guinea [GAZ:00003922]", + "Paracel Islands [GAZ:00010832]", + "Paraguay [GAZ:00002933]", + "Peru [GAZ:00002932]", + "Philippines [GAZ:00004525]", + "Pitcairn Islands [GAZ:00005867]", + "Poland [GAZ:00002939]", + "Portugal [GAZ:00004126]", + "Puerto Rico [GAZ:00006935]", + "Qatar [GAZ:00005286]", + "Republic of the Congo [GAZ:00001088]", + "Reunion [GAZ:00003945]", + "Romania [GAZ:00002951]", + "Ross Sea [GAZ:00023304]", + "Russia [GAZ:00002721]", + "Rwanda [GAZ:00001087]", + "Saint Helena [GAZ:00000849]", + "Saint Kitts and Nevis [GAZ:00006906]", + "Saint Lucia [GAZ:00006909]", + "Saint Pierre and Miquelon [GAZ:00003942]", + "Saint Martin [GAZ:00005841]", + "Saint Vincent and the Grenadines [GAZ:02000565]", + "Samoa [GAZ:00006910]", + "San Marino [GAZ:00003102]", + "Sao Tome and Principe [GAZ:00006927]", + "Saudi Arabia [GAZ:00005279]", + "Senegal [GAZ:00000913]", + "Serbia [GAZ:00002957]", + "Seychelles [GAZ:00006922]", + "Sierra Leone [GAZ:00000914]", + "Singapore [GAZ:00003923]", + "Sint Maarten [GAZ:00012579]", + "Slovakia [GAZ:00002956]", + "Slovenia [GAZ:00002955]", + "Solomon Islands [GAZ:00005275]", + "Somalia [GAZ:00001104]", + "South Africa [GAZ:00001094]", + "South Georgia and the South Sandwich Islands [GAZ:00003990]", + "South Korea [GAZ:00002802]", + "South Sudan [GAZ:00233439]", + "Spain [GAZ:00003936]", + "Spratly Islands [GAZ:00010831]", + "Sri Lanka [GAZ:00003924]", + "State of Palestine [GAZ:00002475]", + "Sudan [GAZ:00000560]", + "Suriname [GAZ:00002525]", + "Svalbard [GAZ:00005396]", + "Swaziland [GAZ:00001099]", + "Sweden [GAZ:00002729]", + "Switzerland [GAZ:00002941]", + "Syria [GAZ:00002474]", + "Taiwan [GAZ:00005341]", + "Tajikistan [GAZ:00006912]", + "Tanzania [GAZ:00001103]", + "Thailand [GAZ:00003744]", + "Timor-Leste [GAZ:00006913]", + "Togo [GAZ:00000915]", + "Tokelau [GAZ:00260188]", + "Tonga [GAZ:00006916]", + "Trinidad and Tobago [GAZ:00003767]", + "Tromelin Island [GAZ:00005812]", + "Tunisia [GAZ:00000562]", + "Turkey [GAZ:00000558]", + "Turkmenistan [GAZ:00005018]", + "Turks and Caicos Islands [GAZ:00003955]", + "Tuvalu [GAZ:00009715]", + "USA [GAZ:00002459]", + "Uganda [GAZ:00001102]", + "Ukraine [GAZ:00002724]", + "United Arab Emirates [GAZ:00005282]", + "United Kingdom [GAZ:00002637]", + "Uruguay [GAZ:00002930]", + "Uzbekistan [GAZ:00004979]", + "Vanuatu [GAZ:00006918]", + "Venezuela [GAZ:00002931]", + "Viet Nam [GAZ:00003756]", + "Virgin Islands [GAZ:00003959]", + "Wake Island [GAZ:00007111]", + "Wallis and Futuna [GAZ:00007191]", + "West Bank [GAZ:00009572]", + "Western Sahara [GAZ:00000564]", + "Yemen [GAZ:00005284]", + "Zambia [GAZ:00001107]", + "Zimbabwe [GAZ:00001106]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001181", + "type": "string", + "description": "The country of origin of the sample.", + "examples": [ + "South Africa [GAZ:00001094]" + ], + "clasification": "Sample collection and processing", + "label": "Geo Loc Autonomic Country", + "table": "sample" + }, + "geographic location (region and locality)": { + "ontology": "NCIT:C87189", + "type": "string", + "description": "The county/region of origin of the sample.", + "examples": [ + "Derbyshire" + ], + "clasification": "Sample collection and processing", + "label": "Geo Loc Province" + }, + "geographic location (latitude)": { + "ontology": "OBI:0001620", + "type": "string", + "description": "The latitude coordinates of the geographical location of sample collection.", + "examples": [ + "38.98 N" + ], + "clasification": "Sample collection and processing", + "label": "Geo Loc Latitude" + }, + "geographic location (longitude)": { + "ontology": "OBI:0001621", + "type": "string", + "description": "The longitude coordinates of the geographical location of sample collection.", + "examples": [ + "77.11 W" + ], + "clasification": "Sample collection and processing", + "label": "Geo Loc Longitude" + }, + "isolate": { + "ontology": "GENEPIO:0001644", + "type": "string", + "description": "Identifier of the specific isolate.", + "examples": [ + "SARS-CoV-2/human/USA/CA-CDPH-001/2020" + ], + "genepio_label": "isolate identifier", + "clasification": "Sample collection and processing", + "label": "Sample ID given by originating laboratory", + "table": "sample" + }, + "sample capture status": { + "examples": [ + "Diagnostic testing" + ], + "enum": [ + "Cluster/Outbreak Investigation [GENEPIO:0100001]", + "Diagnostic Testing [GENEPIO:0100002]", + "Research [GENEPIO:0100003]", + "Protocol Testing [GENEPIO:0100024]", + "Surveillance [GENEPIO:0100004]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001198", + "type": "string", + "description": "The reason that the sample was collected.", + "clasification": "Sample collection and processing" + }, + "isolation source host-associated": { + "enum": [ + "Blood [UBERON:0000178]", + "Fluid [UBERON:0006314]", + "Fluid (Cerebrospinal (CSF)) [UBERON:0001359]", + "Fluid (Pericardial) [UBERON:0002409]", + "Fluid (Pleural) [UBERON:0001087]", + "Fluid (Vaginal) [UBERON:0036243]", + "Fluid (Amniotic) [UBERON:0000173]", + "Saliva [UBERON:0001836]", + "Tissue [UBERON:0000479]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001211", + "type": "string", + "description": "A substance obtained from an anatomical part of an organism e.g. tissue, blood.", + "examples": [ + "Blood [UBERON:0000178]" + ], + "classification": "Sample collection and processing", + "label": "Specimen source" + }, + "isolation source non-host-associated": { + "enum": [ + "Air vent [ENVO:03501208]", + "Banknote [ENVO:00003896]", + "Bed rail [ENVO:03501209]", + "Building Floor [ENVO:01000486]", + "Cloth [ENVO:02000058]", + "Control Panel [ENVO:03501210]", + "Door [ENVO:03501220]", + "Door Handle [ENVO:03501211]", + "Face Mask [OBI:0002787]", + "Face Shield [OBI:0002791]", + "Food [FOODON:00002403]", + "Food Packaging [FOODON:03490100]", + "Glass [ENVO:01000481]", + "Handrail [ENVO:03501212]", + "Hospital Gown [OBI:0002796]", + "Light Switch [ENVO:03501213]", + "Locker [ENVO:03501214]", + "N95 Mask [OBI:0002790]", + "Nurse Call Button [ENVO:03501215]", + "Paper [ENVO:03501256]", + "Particulate Matter [ENVO:01000060]", + "Plastic [ENVO:01000404]", + "PPE Gown [GENEPIO:0100025]", + "Sewage [ENVO:00002018]", + "Sink [ENVO:01000990]", + "Soil [ENVO:00001998]", + "Stainless Steel [ENVO:03501216]", + "Tissue Paper [ENVO:03501217]", + "Toilet Bowl [ENVO:03501218]", + "Water [ENVO:00002006]", + "Wastewater [ENVO:00002001]", + "Window [ENVO:03501219]", + "Wood [ENVO:00002040]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001223", + "type": "string", + "description": "A substance obtained from the natural or man-made environment e.g. soil, water, sewage, door handle, bed handrail, face mask.", + "examples": [ + "Face Mask [OBI:0002787]" + ], + "clasification": "Sample collection and processing", + "label": "Environmental Material" + }, + "host common name": { + "enum": [ + "Human [NCBITaxon:9606]", + "Bat [NCBITaxon:9397]", + "Cat [NCBITaxon:9685]", + "Chicken [NCBITaxon:9031]", + "Civet [NCBITaxon:9673]", + "Cow [NCBITaxon:9913]", + "Dog [NCBITaxon:9615]", + "Lion [NCBITaxon:9689]", + "Mink [NCBITaxon:452646]", + "Pangolin [NCBITaxon:9973]", + "Pig [NCBITaxon:9825]", + "Pigeon [NCBITaxon:8930]", + "Tiger [NCBITaxon:9694]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001386", + "type": "string", + "description": "The commonly used name of the host.", + "examples": [ + "Human [NCBITaxon:9606]" + ], + "classification": "Host information", + "label": "Host Common Name", + "table": "sample" + }, + "host scientific name": { + "enum": [ + "Bos taurus [NCBITaxon:9913]", + "Canis lupus familiaris [NCBITaxon:9615]", + "Chiroptera [NCBITaxon:9397]", + "Columbidae [NCBITaxon:8930]", + "Felis catus [NCBITaxon:9685]", + "Gallus gallus [NCBITaxon:9031]", + "Homo sapiens [NCBITaxon:9606]", + "Manis [NCBITaxon:9973]", + "Manis javanica [NCBITaxon:9974]", + "Neovison vison [NCBITaxon:452646]", + "Panthera leo [NCBITaxon:9689]", + "Panthera tigris [NCBITaxon:9694]", + "Rhinolophidae [NCBITaxon:58055]", + "Rhinolophus affinis [NCBITaxon:59477]", + "Sus scrofa domesticus [NCBITaxon:9825]", + "Viverridae [NCBITaxon:9673]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001387", + "type": "string", + "description": "The taxonomic, or scientific name of the host.", + "examples": [ + "Homo sapiens [NCBITaxon:9606]" + ], + "clasification": "Host information", + "label": "Host Scientific Name", + "table": [ + "runs" + ] + }, + "host subject id": { + "examples": [ + "e.g. #131" + ], + "ontology": "GENEPIO:0000079", + "type": "string", + "description": "a unique identifier by which each subject can be referred to, de-identified.", + "clasification": "Host information", + "label": "Host Subject Id", + "table": [ + "sample" + ] + }, + "taxon_id": { + "examples": [ + "2697049" + ], + "ontology": "NCIT:C164641", + "type": "string", + "description": "The NCBITaxon identifier for the organism being sequenced.", + "classification": "Sample collection and processing", + "label": "Tax ID", + "fill_mode": "batch", + "minLenght": "1" + }, + "definition for seropositive sample": { + "examples": [ + "" + ], + "ontology": "NCIT:C159692", + "type": "string", + "description": " The cut off value used by an investigatior in determining that a sample was seropositive.", + "clasification": "Sample collection and processing", + "label": "Definition for seropositive sample", + "table": [ + "sample" + ] + }, + "serotype (required for a seropositive sample)": { + "examples": [ + "h1n1" + ], + "ontology": "NCIT:C88894", + "type": "string", + "description": " erological variety of a species characterised by its antigenic properties. For influenza, ha subtype should be the letter h followed by a number between 1-16 unless novel subtype is identified and the na subtype should be the letter n followed by a number between 1-9 unless novel subtype is identified. If only one of the subtypes have been tested then use the format h5nx or hxn1.", + "clasification": "Sample collection and processing", + "label": "Serotype (required for a seropositive sample)", + "table": [ + "sample" + ] + }, + "host habitat": { + "examples": [ + "" + ], + "ontology": "ENVO:00002036", + "type": "string", + "description": "Natural habitat of the avian or mammalian host.", + "clasification": "Sample collection and processing", + "label": "Host habitat", + "table": [ + "sample" + ] + }, + "host description": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0000009", + "type": "string", + "description": "Other descriptive information relating to the host.", + "clasification": "Host information", + "label": "Host description", + "table": [ + "sample" + ] + }, + "gravidity": { + "examples": [ + "" + ], + "ontology": "OBI:0002456", + "type": "string", + "description": "Whether or not the subject is gravid. If so, report date due or date post-conception and specify which of these two dates is being reported.", + "clasification": "Host information", + "label": "Gravidity", + "table": [ + "sample" + ] + }, + "strain": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001123", + "type": "string", + "description": "Name of the strain from which the sample was obtained..", + "clasification": "Sample collection and processing", + "label": "Strain", + "table": [ + "sample" + ] + }, + "host_age": { + "ontology": "GENEPIO:0001392", + "type": "string", + "description": "Age of host at the time of sampling.", + "examples": [ + "79" + ], + "classification": "Host information", + "label": "Host Age", + "fill_mode": "sample" + }, + "host disease outcome": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001390", + "type": "string", + "description": "Disease outcome in the host.", + "clasification": "Host information", + "label": "Host disease outcome", + "table": [ + "sample" + ] + }, + "host health state": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001388", + "type": "string", + "description": " Status of the host", + "clasification": "Host information", + "label": "Host health state", + "table": [ + "sample" + ] + }, + "type exposure": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001417", + "type": "string", + "description": " Date, Location e.g. type of gathering, Family cluster, etc.", + "clasification": "Host information", + "label": "Outbreak Exposure Event Location", + "table": [ + "sample" + ] + }, + "personal protective equipment": { + "examples": [ + "" + ], + "ontology": "NCIT:C173748", + "type": "string", + "description": "Use of personal protective equipment, such as gloves, gowns, during any type of exposure. Example: mask", + "clasification": "Host information", + "label": "Personal protective equipment", + "table": [ + "sample" + ] + }, + "host sex": { + "enum": [ + "Female [NCIT:C46110]", + "Male [NCIT:C46109]", + "Non-binary Gender [GSSO:000132]", + "Transgender (assigned male at birth) [GSSO:004004]", + "Transgender (assigned female at birth) [GSSO:004005]", + "Undeclared [NCIT:C110959]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001395", + "type": "string", + "description": "The gender of the host at the time of sample collection.", + "examples": [ + "Male [NCIT:C46109]" + ], + "label": "Host Gender", + "table": [ + "sample" + ] + }, + "subject exposure duration": { + "examples": [ + "e.g. Patient infected while traveling in …." + ], + "ontology": "NCIT:C83280", + "type": "string", + "description": "If the information is unknown or can not be shared, leave blank.", + "clasification": "Host information", + "label": "Additional Host Information" + }, + "instrument_model": { + "enum": [ + "Illumina sequencing instrument [GENEPIO:0100105]", + "Illumina Genome Analyzer [GENEPIO:0100106]", + "Illumina Genome Analyzer II [GENEPIO:0100107]", + "Illumina Genome Analyzer IIx [GENEPIO:0100108]", + "Illumina HiScanSQ [GENEPIO:0100109]", + "Illumina HiSeq [GENEPIO:0100110]", + "Illumina HiSeq X [GENEPIO:0100111]", + "Illumina HiSeq X Five [GENEPIO:0100112]", + "Illumina HiSeq X Ten [GENEPIO:0100113]", + "Illumina HiSeq 1000 [GENEPIO:0100114]", + "Illumina HiSeq 1500 [GENEPIO:0100115]", + "Illumina HiSeq 2000 [GENEPIO:0100116]", + "Illumina HiSeq 2500 [GENEPIO:0100117]", + "Illumina HiSeq 3000 [GENEPIO:0100118]", + "Illumina HiSeq 4000 [GENEPIO:0100119]", + "Illumina iSeq [GENEPIO:0100120]", + "Illumina iSeq 100 [GENEPIO:0100121]", + "Illumina NovaSeq [GENEPIO:0100122]", + "Illumina NovaSeq 6000 [GENEPIO:0100123]", + "Illumina MiniSeq [GENEPIO:0100124]", + "Illumina MiSeq [GENEPIO:0100125]", + "Illumina NextSeq [GENEPIO:0100126]", + "Illumina NextSeq 500 [GENEPIO:0100127]", + "Illumina NextSeq 550 [GENEPIO:0100128]", + "Illumina NextSeq 2000 [GENEPIO:0100129]", + "Pacific Biosciences sequencing instrument [GENEPIO:0100130]", + "PacBio RS [GENEPIO:0100131]", + "PacBio RS II [GENEPIO:0100132]", + "PacBio Sequel [GENEPIO:0100133]", + "PacBio Sequel II [GENEPIO:0100134]", + "Ion Torrent sequencing instrument [GENEPIO:0100135]", + "Ion Torrent PGM [GENEPIO:0100136]", + "Ion Torrent Proton [GENEPIO:0100137]", + "Ion Torrent S5 XL [GENEPIO:0100138]", + "Ion Torrent S5 [GENEPIO:0100139]", + "Oxford Nanopore sequencing instrument [GENEPIO:0100140]", + "Oxford Nanopore GridION [GENEPIO:0100141]", + "Oxford Nanopore MinION [GENEPIO:0100142]", + "Oxford Nanopore PromethION [GENEPIO:0100143]", + "BGI Genomics sequencing instrument [GENEPIO:0100144]", + "BGI SEQ-500 [GENEPIO:0100145]", + "MGI sequencing instrument [GENEPIO:0100146]", + "MGI DNBSEQ-T7 [GENEPIO:0100147]", + "MGI DNBSEQ-G400 [GENEPIO:0100148]", + "MGI DNBSEQ-G400RS FAST [GENEPIO:0100149]", + "MGI DNBSEQ-G50 [GENEPIO:0100150]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001452", + "type": "string", + "description": "The model of the sequencing instrument used.", + "examples": [ + "Oxford Nanopore MinION [GENEPIO:0100142]" + ], + "classification": "Sequencing", + "label": "Sequencing Instrument Model", + "table": [ + "experiments" + ] + }, + "platform": { + "enum": [ + "Oxford Nanopore [OBI:0002750]", + "Illumina [OBI:0000759]", + "Ion Torrent [GENEPIO:0002683]", + "PacBio [GENEPIO:0001927]", + "BGI", + "MGI", + "Other" + ], + "examples": [ + "Illumina" + ], + "ontology": "GENEPIO_0000071", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "Sequencing Instrument Platform", + "fill_mode": "batch", + "minLenght": "1" + }, + "sequence_file_R1_fastq": { + "examples": [ + "ABC123_S1_L001_R1_001.fastq.gz" + ], + "ontology": "GENEPIO:0001476", + "type": "string", + "description": "The user-specified filename of the r1 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Sequence file R1 fastq" + }, + "sequence_file_R2_fastq": { + "examples": [ + "ABC123_S1_L001_R2_001.fastq.gz" + ], + "ontology": "GENEPIO:0001477", + "type": "string", + "description": "The user-specified filename of the r2 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Sequence file R2 fastq" + }, + "r1_fastq_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001478", + "type": "string", + "description": "The filepath of the r1 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Filepath R1 fastq" + }, + "r2_fastq_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001479", + "type": "string", + "description": "The filepath of the r2 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Filepath R2 fastq" + }, + "sequence_file_R2_md5": { + "examples": [ + "3e69af1f875fab020aed82f5edbc1f03" + ], + "ontology": "MS_1000569", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "file checksum", + "table": [ + "runs" + ] + }, + "scientific_name": { + "enum": [ + "Coronaviridae [NCBITaxon:11118]", + "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "examples": [ + "" + ], + "ontology": "NCIT:C43459", + "type": "string", + "description": "The taxonomic name of the organism.", + "clasification": "Sample collection and processing", + "classification": "Sample collection and processing", + "label": "Organism", + "table": "sample" + }, + "hospitalisation": { + "examples": [], + "ontology": "NCIT:C25179", + "type": "string", + "description": " Was the subject confined to a hospital as a result of virus infection or problems occurring secondary to virus infection?", + "clasification": "Sample collection and processing", + "label": "Hospitalisation", + "table": "sample" + }, + "illness symptoms": { + "examples": [ + "cough, diarrhea, fever, headache, malaise, myalgia" + ], + "ontology": "GENEPIO:0001523", + "type": "string", + "description": "The symptoms that have been reported in relation to the illness. If multiple exposures are applicable, please state them separated by semicolon.", + "clasification": "Sample collection and processing", + "label": "Illness symptoms", + "table": "sample" + }, + "sample_alias": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001148", + "type": "string", + "description": "Unique Identifier for the sample", + "clasification": "Sample collection and processing", + "label": "Sample ID given by submitting laboratory", + "table": "sample" + }, + "sample_description": { + "examples": [ + "Sample from Belgian Covid-19 patient. Sample was obtained at the Hospital AZ Rivierenland, in Antwerp, Belgium." + ], + "ontology": "sep:00196", + "type": "string", + "description": "Free text description of the sample.", + "clasification": "Sample collection and processing", + "label": "Sample Description", + "table": "sample" + }, + "sample storage conditions": { + "examples": [ + "24 degrees celsius" + ], + "ontology": "NCIT:C115535", + "type": "string", + "description": "The name and version of a particular protocol used for sampling.", + "clasification": "Sample collection and processing", + "label": "Biological Sample Storage Condition" + }, + "library_source": { + "enum": [ + "genomic", + "genomic single cell", + "transcriptomic", + "transcriptomic single cell", + "metagenomic", + "metatranscriptomic", + "synthetic", + "viral rna", + "other" + ], + "examples": [ + "METAGENOMIC" + ], + "ontology": "GENEPIO_0001965", + "type": "string", + "description": "Molecule type used to make the library.", + "clasification": "Sequencing", + "label": "Source material", + "table": [ + "experiments" + ] + }, + "library_selection": { + "enum": [ + "RANDOM [NCIT:C60702]", + "PCR [GENEPIO:0001955]", + "RANDOM PCR [GENEPIO:0001957]", + "RT-PCR [GENEPIO:0001959]", + "HMPR [GENEPIO:0001949]", + "MF [GENEPIO:0001952]", + "repeat fractionation", + "size fractionation [GENEPIO:0001963]", + "MSLL [GENEPIO:0001954]", + "cDNA [GENEPIO:0001962]", + "ChIP [GENEPIO:0001947]", + "MNase [GENEPIO:0001953]", + "DNase [GENEPIO:0001948]", + "Hybrid Selection [GENEPIO:0001950]", + "Reduced Representation [GENEPIO:0001960]", + "Restriction Digest [GENEPIO:0001961]", + "5-methylcytidine antibody [GENEPIO:0001941]", + "MBD2 protein methyl-CpG binding domain [GENEPIO:0001951]", + "CAGE [GENEPIO:0001942]", + "RACE [GENEPIO:0001956]", + "MDA", + "padlock probes capture method", + "Oligo-dT", + "Inverse rRNA selection", + "ChIP-Seq [GENEPIO:0001947]", + "Other" + ], + "examples": [ + "RANDOM PCR" + ], + "ontology": "GENEPIO_0001940", + "type": "string", + "description": "Library capture method.", + "classification": "Sequencing", + "label": "Capture method", + "fill_mode": "batch" + }, + "library_construction_protocol": { + "examples": [ + "library_construction_protocol_1" + ], + "ontology": "GENEPIO:0001450", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Library Construction protocol", + "table": [ + "experiments" + ] + }, + "library_strategy": { + "enum": [ + "Bisultife-Seq strategy [GENEPIO:0001975]", + "CTS strategy [GENEPIO:0001978]", + "ChIP-Seq strategy [GENEPIO:0001979]", + "DNase-Hypersensitivity strategy [GENEPIO:0001980]", + "EST strategy [GENEPIO:0001981]", + "FL-cDNA strategy [GENEPIO:0001983]", + "MB-Seq strategy [GENEPIO:0001984]", + "MNase-Seq strategy [GENEPIO:0001985]", + "MRE-Seq strategy [GENEPIO:0001986]", + "MeDIP-Seq strategy [GENEPIO:0001987]", + "RNA-Seq strategy [GENEPIO:0001990]", + "WCS strategy [GENEPIO:0001991]", + "WGS strategy [GENEPIO:0001992]", + "WXS strategy [GENEPIO:0001993]", + "Amplicon [GENEPIO:0001974]", + "Clone end strategy [GENEPIO:0001976]", + "Clone strategy [GENEPIO:0001977]", + "Finishing strategy [GENEPIO:0001982]", + "Other library strategy [GENEPIO:0001988]", + "Pool clone strategy [GENEPIO:0001989]" + ], + "examples": [ + "WGS" + ], + "ontology": "GENEPIO_0001973", + "type": "string", + "description": "Overall sequencing strategy or approach.", + "classification": "Sequencing", + "label": "Sequencing technique", + "fill_mode": "batch" + }, + "library_layout": { + "examples": [ + "CTS strategy" + ], + "ontology": "NCIT:C175894", + "type": "string", + "description": "Single or paired.", + "clasification": "Sequencing", + "label": "Library Layout", + "table": [ + "experiments" + ] + }, + "library_name": { + "examples": [ + "e.g PAIRED" + ], + "ontology": "GENEPIO:0001450", + "type": "string", + "description": "The submitter's name for this library.", + "clasification": "Sequencing", + "label": "Library Name", + "table": [ + "experiments" + ] + }, + "insert_size": { + "examples": [ + "350" + ], + "ontology": "GENEPIO_0000076", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Insert size" + }, + "experiment_title": { + "examples": [ + "experiment_title_1" + ], + "ontology": "ORNASEQ_0000004", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Experiment title", + "table": [ + "experiments" + ] + }, + "sample_title": { + "examples": [ + "s_20201007_026" + ], + "ontology": "GENEPIO:0000079", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Sample title", + "table": [ + "sample" + ] + }, + "study_title": { + "examples": [ + "e.g SARS-CoV-2 genomes from late April in Stockholm" + ], + "ontology": "OPMI_0000380", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Study title", + "table": [ + "study" + ] + }, + "study_type": { + "examples": [ + "" + ], + "enum": [ + "Whole Genome Sequencing [NCIT:C101294]", + "Metagenomics [NCIT:C153191]", + "Transcriptome Analysis [GENEPIO:0001111]", + "Resequencing [NCIT:C41254]", + "Epigenetics [OMIT:0027036] ", + "Synthetic Genomics [NCIT:C84343]", + "Forensic or Paleo-genomics [topic:3943]", + "Gene Regulation Study [topic:0204]", + "Cancer Genomics [NCIT:C18247]", + "Population Genomics [topic:3796]", + "RNASeq [OBI:0001177]", + "Exome Sequencing [OBI:0002118]", + "Pooled Clone Sequencing [OBI:2100402]", + "Transcriptome Sequencing [NCIT:C124261]", + "Other [NCIT:C124261]" + ], + "ontology": "GENEPIO:0000156", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Study type", + "table": "study" + }, + "study_abstract": { + "examples": [ + "Whole-genome sequences of SARS-CoV-2 from oro-pharyngeal swabs obtained from Belgian Covid-19 patients." + ], + "ontology": "OPMI:0000380", + "type": "string", + "description": " Briefly describes the goals, purpose, and scope of the Study. This need not be listed if it can be inherited from a referenced publication.", + "clasification": "Submission ENA", + "label": "Study abstract", + "table": [ + "study" + ] + }, + "study_alias": { + "examples": [ + "e.g Sweden" + ], + "ontology": "SIO_001066", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Study alias", + "table": [ + "study", + "experiments" + ] + }, + "experiment_alias": { + "examples": [ + "experiment_alias_7a" + ], + "ontology": "NCIT_C42790", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Experiment alias", + "table": [ + "experiments" + ] + }, + "file_format": { + "examples": [ + "BAM,CRAM,FASTQ" + ], + "enum": [ + "BAM [format:2572]", + "CRAM [format:3462]", + "FASTQ [format:1930]" + ], + "ontology": "NMR:1001459", + "type": "string", + "description": "The run data file model.", + "clasification": "Submission ENA", + "label": "File format", + "table": [ + "experiments" + ] + }, + "run_alias": { + "examples": [ + "e.g ena-EXPERIMENT-KAROLINSKA INSITUTET-29-07-2020-14:50:07:151-1" + ], + "ontology": "NCIT_C47911", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Run Alias", + "table": [ + "runs" + ] + }, + "sequence_file_R1_md5": { + "examples": [ + "" + ], + "ontology": "MS_1000568", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Fastq md5 r1" + }, + "broker_name": { + "examples": [ + "P17157_1007" + ], + "ontology": "BU_ISCIII:045", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Broker Name" + }, + "center_name": { + "examples": [ + "P17157_1007" + ], + "ontology": "GENEPIO:0001153", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Originating Laboratory" + }, + "authors": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001517", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Authors", + "fill_mode": "batch" + }, + "address": { + "examples": [ + "655 Lab St, Vancouver, British Columbia, V5N 2A2, Canada" + ], + "ontology": "GENEPIO:0001158", + "type": "string", + "description": "The mailing address of the agency submitting the sample.", + "classification": "Sample collection and processing", + "label": "Originating Laboratory Address", + "fill_mode": "batch" + } + } +} diff --git a/relecov_tools/schema/gisaid_schema.json b/relecov_tools/schema/gisaid_schema.json new file mode 100644 index 00000000..5e84e4e7 --- /dev/null +++ b/relecov_tools/schema/gisaid_schema.json @@ -0,0 +1,270 @@ +{ + "schema": "GISAID#", + "required": [ + "covv_type", + "covv_virus_name", + "submitter", + "covv_orig_lab", + "covv_orig_lab_addr", + "covv_collection_date", + "covv_location", + "covv_host", + "covv_patient_age", + "covv_gender", + "covv_seq_technology", + "covv_subm_lab", + "covv_subm_lab_addr", + "covv_authors" + ], + "type": "object", + "properties": { + "covv_type": { + "examples": ["betacoronavirus"], + "ontology": "NCIT:C25284", + "type": "string", + "description": "default must remain 'betacoronavirus'", + "clasification": "Database Identifiers" + }, + "sample_name": { + "examples": ["prov_rona_99"], + "ontology": "0", + "type": "string", + "description": "The user-defined name for the sample.", + "clasification": "Database Identifiers", + "label": "Collecting Sample id" + }, + "covv_virus_name": { + "examples": ["hCoV-19/Canada/prov_rona_99/2020"], + "ontology": "GENEPIO:0100282", + "type": "string", + "description": "The user-defined GISAID virus name assigned to the sequence.", + "clasification": "Database Identifiers", + "label": "GISAID Virus Name" + }, + "submitter": { + "examples": [""], + "ontology": "NCIT:C54269", + "type": "string", + "description": "", + "clasification": "enter your GISAID-Username", + "label": "GISAID Id" + }, + "covv_orig_lab": { + "examples": ["Public Health Agency of Canada"], + "ontology": "GENEPIO:0001153", + "type": "string", + "description": "The name of the agency that collected the original sample.", + "label": "Originating Laboratory" + }, + "covv_orig_lab_addr": { + "examples": ["655 Lab St, Vancouver, British Columbia, V5N 2A2, Canada"], + "ontology": "GENEPIO:0001158", + "type": "string", + "description": "The mailing address of the agency submitting the sample.", + "clasification": "Sample collection and processing", + "label": "Originating Laboratory Address" + }, + "covv_subm_lab_addr": { + "examples": ["123 Sunnybrooke St, Toronto, Ontario, M4P 1L6, Canada"], + "ontology": "GENEPIO:0001167", + "type": "string", + "description": "The mailing address of the agency submitting the sequence.", + "clasification": "Sample collection and processing", + "label": "Submitting Institution Address" + }, + "covv_collection_date": { + "examples": ["3/19/2020"], + "ontology": "GENEPIO:0001174", + "type": "string", + "description": "The date on which the sample was collected.", + "format": "date", + "classification": "Sample collection and processing", + "label": "Sample Collection Date" + }, + "covv_location": { + "examples": ["e.g. Europe / Germany / Bavaria / Munich"], + "ontology": "GENEPIO:0001181", + "type": "string", + "description": "The country of origin of the sample.", + "clasification": "Sample collection and processing", + "label": "Geo Loc Autonomic Country" + }, + "covv_host": { + "examples": [ + "e.g. Human, Environment, Canine, Manis javanica, Rhinolophus affinis, etc" + ], + "ontology": "GENEPIO:0001387", + "type": "string", + "description": "The taxonomic, or scientific name of the host.", + "clasification": "Host information", + "label": "Host Scientific Name" + }, + "covv_patient_age": { + "ontology": "GENEPIO:0001392", + "anyOf": [{"type": "integer"}, {"pattern": "\\d+-\\d+", "type": "string"}], + "description": "Age of host at the time of sampling.", + "examples": [79], + "label": "Host Age" + }, + "covv_specimen": { + "enum": [ + "Blood [UBERON:0000178]", + "Fluid [UBERON:0006314]", + "Fluid (Cerebrospinal (CSF)) [UBERON:0001359]", + "Fluid (Pericardial) [UBERON:0002409]", + "Fluid (Pleural) [UBERON:0001087]", + "Fluid (Vaginal) [UBERON:0036243]", + "Fluid (Amniotic) [UBERON:0000173]", + "Saliva [UBERON:0001836]", + "Tissue [UBERON:0000479]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001211", + "type": "string", + "description": "A substance obtained from an anatomical part of an organism e.g. tissue, blood.", + "examples": ["Blood [UBERON:0000178]"], + "classification": "Sample collection and processing", + "label": "Organism Substance" + }, + "covv_assembly_method": { + "examples": ["Ivar"], + "ontology": "GENEPIO:0001463", + "type": "string", + "description": "The name of software used to generate the consensus sequence.", + "clasification": "Bioinformatics and QC metrics", + "label": "Consensus sequence software name" + }, + "covv_gender": { + "enum": [ + "Female [NCIT:C46110]", + "Male [NCIT:C46109]", + "Non-binary Gender [GSSO:000132]", + "Transgender (assigned male at birth) [GSSO:004004]", + "Transgender (assigned female at birth) [GSSO:004005]", + "Undeclared [NCIT:C110959]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "examples": ["Male [NCIT:C46109]"], + "ontology": "GENEPIO:0001395", + "type": "string", + "description": "The gender of the host at the time of sample collection.", + "clasification": "Host information" + }, + "covv_seq_technology": { + "examples": [ + "e.g. Illumina Miseq, Sanger, Nanopore MinION, Ion Torrent, etc." + ], + "ontology": "GENEPIO:0001452", + "type": "string", + "description": "The model of the sequencing instrument used.", + "classification": "Sequencing", + "label": "Sequencing Instrument Model" + }, + "covv_authors": { + "examples": [""], + "ontology": "GENEPIO:0001517", + "type": "string", + "description": "", + "clasification": "Contributor Acknowledgement", + "label": "Authors" + }, + "covv_coverage": { + "examples": ["400x"], + "ontology": "GENEPIO:0001474", + "type": "string", + "description": "The average number of reads representing a given nucleotide in the reconstructed sequence.", + "clasification": "Bioinformatics and QC metrics", + "label": "Depth of coverage value " + }, + "covv_fasta_filename": { + "examples": [""], + "ontology": "GENEPIO:0001460", + "type": "string", + "description": "Filename that contains the sequence without path", + "clasification": "Bioinformatics and QC metrics" + }, + "covv_add_host_info": { + "examples": ["e.g. Patient infected while traveling in …."], + "ontology": "GENEPIO_0001268", + "type": "string", + "description": "If the information is unknown or can not be shared, leave blank.", + "clasification": "Host information", + "label": "Additional Host Information" + }, + "covv_last_vaccinated": { + "examples": ["e.g. 04/09/2021"], + "ontology": "0", + "type": "string", + "description": "The date the host received their last dose of vaccine.", + "format": "date", + "clasification": "Host information" + }, + "covv_outbreak": { + "examples": ["Date, Location e.g. type of gathering, Family cluster, etc."], + "ontology": "GENEPIO:0001417", + "type": "string", + "description": "If the information is unknown or can not be shared, leave blank.", + "clasification": "Host information", + "label": "Outbreak Exposure Event Location" + }, + "covv_sampling_strategy": { + "examples": [ + "e.g. Sentinel surveillance (ILI), Sentinel surveillance (ARI), Sentinel surveillance (SARI), Non-sentinel-surveillance (hospital), Non-sentinel-surveillance (GP network), Longitudinal sampling on same patient(s), S gene dropout" + ], + "ontology": "GENEPIO:0001445", + "type": "string", + "description": "The reason that the sample was sequenced.", + "classification": "Sequencing", + "label": "Purpose of Sequencing" + }, + "submitting_lab_sequence_id": { + "examples": [""], + "ontology": "0", + "type": "string", + "description": "Sample ID given by the submitting laboratory", + "clasification": "Database Identifiers" + }, + "covv_subm_lab": { + "examples": ["Centers for Disease Control and Prevention"], + "ontology": "GENEPIO:0001159", + "type": "string", + "description": "The name of the agency that generated the sequence.", + "label": "Submitting Institution" + }, + "covv_subm_sample_id": { + "examples": "", + "ontology": "GENEPIO:0001148", + "type": "string", + "description": "Sample ID given by the submitting laboratory", + "classification": "Database Identifiers", + "label": "Sample ID given by submitting laboratory", + "fill_mode": "sample" + }, + "covv_provider_sample_id": { + "examples": ["prov_rona_99"], + "ontology": "GENEPIO:0001123", + "type": "string", + "description": "The user-defined name for the sample.", + "classification": "Database Identifiers", + "label": "Sample ID given by originating laboratory", + "fill_mode": "sample" + }, + "covv_patient_status": { + "examples": ["Hospitalized", "Released", "Live", "Deceased"], + "ontology": "GENEPIO:0001388", + "type": "string", + "description": " Status of the host", + "classification": "Host information", + "label": "Host health state", + "fill_mode": "sample" + } + } +} diff --git a/relecov_tools/schema/institution_schemas/HUGTiP.json b/relecov_tools/schema/institution_schemas/HUGTiP.json new file mode 100644 index 00000000..b83a10cc --- /dev/null +++ b/relecov_tools/schema/institution_schemas/HUGTiP.json @@ -0,0 +1,53 @@ + +{ + "equivalences" : { + "Sample ID given by the submitting laboratory": "sample_id", + "Sample ID given in the microbiology lab": "sample_id", + "Sample ID given for sequencing": "sample_id", + "Sample Collection Date": "collection_date", + "Originating Laboratory": "OriginatingLab", + "Purpose of sampling": "sequencing_strategy", + "Specimen source": "source", + "Host": "host", + "Host Age": "age", + "Host Gender": "gender", + "Authors": "OriginatingLabAuthors" + }, + "constants" : { + "Public Health sample id (SIVIES)": "", + "Sample ID given if multiple rna-extraction or passages": "", + "ENA Sample ID": "", + "GISAID Virus Name": "", + "GISAID id": "", + "Submitting Institution": "HUGTiP", + "Sample Received Date": "", + "Biological Sample Storage Condition": "-80ºC", + "Environmental Material": "", + "Environmental System": "", + "Collection Device": "", + "Sequencing Date": "", + "Rna Extraction Protocol": "Seegene", + "Commercial All-in-one library kit": "", + "Library Preparation Kit": "", + "Enrichment Protocol": "", + "If Enrichment Protocol. If Other,Specify": "", + "Enrichment panel/assay": "", + "Number Of Samples In Run": "", + "Runid": "", + "Sequencing Instrument Model": "", + "Flowcell Kit": "", + "Source material": "", + "Capture method": "", + "Sequencing technique": "", + "Library Layout": "", + "Gene Name 1": "", + "Diagnostic Pcr Ct Value 1": "", + "Gene Name 2": "", + "Diagnostic Pcr Ct Value-2": "", + "Analysis Authors":"Marc Noguera-Julian", + "Author Submitter": "Elisa Martró", + "Sequence file R1 fastq": "", + "Sequence file R2 fastq": "" + }, + "outer" : {} +} diff --git a/relecov_tools/schema/institution_schemas/HUNSC-ITER.json b/relecov_tools/schema/institution_schemas/HUNSC-ITER.json new file mode 100644 index 00000000..f3614518 --- /dev/null +++ b/relecov_tools/schema/institution_schemas/HUNSC-ITER.json @@ -0,0 +1,56 @@ +{ + "equivalence": { + "Sample ID given by originating laboratory": "Sample ID given by originating laboratory", + "Sample ID given by the submitting laboratory": "Sample ID given by the submitting laboratory", + "Originating Laboratory": "Originating lab", + "Submitting Institution": "Submitting lab", + "Sample Collection Date": "Collection date", + "Specimen source": "Specimen source", + "Host": "Host", + "Host Age": "Patient age", + "Host Gender": "Gender", + "Library Preparation Kit": "Assembly method", + "Enrichment panel/assay": "Enrichment panel/assay", + "Enrichment panel/assay version": "Enrichment panel/assay version", + "Sequencing Instrument Model": "Sequencing technology", + "Diagnostic Pcr Ct Value 1": "Diagnostic Pcr Ct Value 1", + "Analysis Authors": "Authors", + "Author Submitter": "Submitter", + "Authors": "Authors", + "Sequence file R1 fastq": "FASTQ R1", + "Sequence file R2 fastq": "FASTQ R2" + }, + "constants": { + "Public Health sample id (SIVIES)": "NA", + "Sample ID given in the microbiology lab": "NA", + "Sample ID given if multiple rna-extraction or passages": "NA", + "Sample ID given for sequencing": "NA", + "ENA Sample ID": "NA", + "GISAID Virus Name": "NA", + "GISAID id": "NA", + "Sample Received Date": "NA", + "Purpose of sampling": "NA", + "Biological Sample Storage Condition": "-80ºC", + "Environmental Material": "NA", + "Environmental System": "NA", + "Collection Device": "Swab", + "Sequencing Date": "NA", + "Rna Extraction Protocol": "NA", + "Commercial All-in-one library kit": "NA", + "Enrichment Protocol": "Amplicon", + "If Enrichment Protocol. If Other,Specify": "NA", + "If Enrichment panel/assay. If Other, Specify": "NA", + "Enrichment panel/assay version": "NA", + "Number Of Samples In Run": "NA", + "Runid": "NA", + "Flowcell Kit": "NA", + "Source material": "VIRAL RNA", + "Capture method": "NA", + "Sequencing technique": "NA", + "Library Layout": "NA", + "Gene Name 1": "NA", + "Gene Name 2": "NA", + "Diagnostic Pcr Ct Value-2": "NA" + }, + "outer": {} +} \ No newline at end of file diff --git a/relecov_tools/schema/institution_schemas/ISCIII.json b/relecov_tools/schema/institution_schemas/ISCIII.json new file mode 100644 index 00000000..1d228ea0 --- /dev/null +++ b/relecov_tools/schema/institution_schemas/ISCIII.json @@ -0,0 +1,149 @@ +{ + "python_file": "ISCIII.py", + "required_files": { + "metadata_file": { + "file_name": "new_lab_metadata.xlsx", + "mapped_fields": { + "Public Health sample id (SIVIES)": "Código SiViEs", + "Sample ID given by originating laboratory": "Ref Hospital", + "Sample ID given by the submitting laboratory": "ID CNM", + "Sample ID given in the microbiology lab": "ID VI-VRP", + "Sample ID given if multiple rna-extraction or passages": "ID VI-VRP", + "Sample ID given for sequencing": "ID VI-VRP", + "GISAID id": "ID GISAID", + "Originating Laboratory": "Hospital", + "Sample Collection Date": "Fecha de toma", + "Sample Received Date": "Fecha recepción", + "Host Age": "Edad (años)", + "Host Gender": "Sexo", + "Diagnostic Pcr Ct Value 1": "PCR genE", + "Specimen source": "Muestra", + "Purpose of Sequencing": "Contexto", + "Nucleic acid extraction protocol": "Extracción" + }, + "function": "None" + }, + "sample_file": { + "file_name": "sample_data.json", + "mapped_fields": { + "Sequence file R1 fastq": "sequence_file_R1_fastq", + "Sequence file R2 fastq": "sequence_file_R2_fastq" + }, + "function": "None", + "mapped_key": "Sample ID given for sequencing" + }, + "run_id_file": { + "file_name": "samples_run_services_length.tsv", + "mapped_fields": { + "Runid": "run_name" + }, + "function": "None", + "mapped_key": "Sample ID given for sequencing" + }, + "org_lab_file": { + "file_name": "laboratory_mapping_list.tsv", + "mapped_fields": { + "Originating Laboratory": "mapped_org_lab" + }, + "mapped_key": "", + "function": "replace_originating_lab" + }, + "sequencer_file": { + "file_name": "samples_run_services_length.tsv", + "mapped_fields": { + "Sequencing Instrument Model": "run_name" + }, + "mapped_key": "", + "function": "added_seq_inst_model" + }, + "host_gender_file": { + "file_name": "", + "mapped_fields": { + "Host Gender": "" + }, + "mapped_key": "", + "function": "translate_gender_to_english" + }, + "specimen_source_file": { + "file_name": "", + "mapped_fields": { + "Specimen source": "" + }, + "mapped_key": "", + "function": "translate_specimen_source" + }, + "authors_file": { + "file_name": "gisaid_authors.tsv", + "mapped_fields": { + "Authors": "authors" + }, + "mapped_key": "GISAID id", + "function": "None" + }, + "run_date": { + "file_name": "runName_runDate.tsv", + "mapped_fields": { + "Sequencing Date": "run_date" + }, + "mapped_key": "Runid", + "function": "None" + }, + "read_length": { + "file_name": "samples_run_services_length.tsv", + "mapped_fields": { + "Read Length" : "read1_cycles" + }, + "mapped_key": "Sample ID given for sequencing", + "function": "None" + }, + "samples_in_run": { + "file_name": "run_and_num_of_samples.csv", + "mapped_fields": { + "Number Of Samples In Run": "num_samples" + }, + "mapped_key": "Runid", + "function": "None" + }, + "artic_version": { + "file_name": "samples_artic_version.tsv", + "mapped_fields": { + "Enrichment panel/assay version": "artic_version" + }, + "mapped_key": "Sample ID given for sequencing", + "function": "None" + }, + "purpose_of_sequencing": { + "file_name": "", + "mapped_fields" : {"Purpose of Sequencing" : "" }, + "mapped_key" : "", + "function": "translate_purpose_seq_to_english" + }, + "nucleic_acid_extraction_protocol": { + "file_name": "", + "mapped_fields" : {"Nucleic acid extraction protocol" : "" }, + "mapped_key" : "", + "function": "translate_nucleic_acid_extract_prot" + }, + "library_layout": { + "file_name": "samples_run_services_length.tsv", + "mapped_fields" : {"Library Layout" : "read2_cycles" }, + "mapped_key" : "sample_name", + "function": "findout_library_layout" + } + }, + "fixed_fields": { + "Submitting Institution": "Instituto de Salud Carlos III", + "Biological Sample Storage Condition": "-80 C", + "Purpose of sampling": "Surveillance", + "Specimen source": "Nasopharyngeal exudate", + "Host": "Human", + "Library Preparation Kit": "Illumina DNA Prep", + "Enrichment Protocol": "Amplicon", + "Enrichment panel/assay": "ARTIC", + "Source material": "viral rna", + "Capture method": "PCR", + "Sequencing technique": "Amplicon", + "Gene Name 1": "E gene", + "Diagnostic Pcr Ct Value 1": "" + } +} diff --git a/relecov_tools/schema/institution_schemas/template.json b/relecov_tools/schema/institution_schemas/template.json new file mode 100644 index 00000000..ba335aca --- /dev/null +++ b/relecov_tools/schema/institution_schemas/template.json @@ -0,0 +1,22 @@ +{ + "equivalence": { + "Final_term": "Initial term" + }, + "constants": { + "Final_term": "Constant value" + }, + "outer": { + "final term 1": { + "filename":"filename", + "sheet" : "in case of excel format, state sheet", + "column" : "name of the column where the needed value is", + "samplename_col" : "name of the column containing the samplenames" + }, + "final term 2" : { + "filename":"filename 2", + "sheet" : "in case of excel format, state sheet 2", + "column" : "name of the column where the needed value is 2", + "samplename_col" : "name of the column containing the samplenames 2" + } + } +} \ No newline at end of file diff --git a/relecov_tools/schema/institution_to_schema.json b/relecov_tools/schema/institution_to_schema.json new file mode 100644 index 00000000..eb0ff59c --- /dev/null +++ b/relecov_tools/schema/institution_to_schema.json @@ -0,0 +1,6 @@ +{ + "ITER" : "HUNSC-ITER.json", + "HUNSC-ITER" : "HUNSC-ITER.json", + "ISCIII" : "ISCIII.json", + "HUGTiP" : "HUGTiP.json" +} diff --git a/relecov_tools/schema/phage_schema.json b/relecov_tools/schema/phage_schema.json new file mode 100644 index 00000000..2524cf6c --- /dev/null +++ b/relecov_tools/schema/phage_schema.json @@ -0,0 +1,3551 @@ +{ + "schema": "PHAGE", + "required": [ + "sequence_submitted_by", + "sample_collected_by", + "sequencing_instrument", + "host_scientific_name", + "isolate", + "sample_collection_date", + "host_disease", + "consensus_sequence_software_name", + "geo_loc_name_state_province_territory", + "specimen_collector_sample_id", + "consensus_sequence_software_version", + "organism", + "geo_loc_name_country" + ], + "type": "object", + "properties": { + "sample_collector_contact_email": { + "examples": [ + "johnnyblogs@lab.ca" + ], + "ontology": "OBI:0001890", + "type": "string", + "description": "The email address of the contact responsible for follow-up regarding the sample.", + "clasification": "Sample collection and processing", + "label": "Originating Laboratory Email" + }, + "depth_of_coverage_value": { + "examples": [ + "400x" + ], + "ontology": "GENEPIO:0001474", + "type": "string", + "description": "The average number of reads representing a given nucleotide in the reconstructed sequence.", + "clasification": "Bioinformatics and QC metrics", + "label": "Depth of coverage value " + }, + "exposure_details": { + "ontology": "GENEPIO:0001431", + "type": "string", + "description": "Additional host exposure information.", + "examples": [ + "Host role - Other: Bus Driver" + ] + }, + "breadth_of_coverage_value": { + "examples": [ + "95%" + ], + "ontology": "GENEPIO:0001472", + "type": "string", + "description": "The percentage of the reference genome covered by the sequenced data, to a prescribed depth.", + "clasification": "Bioinformatics and QC metrics", + "label": "Breadth of coverage value" + }, + "most_recent_travel_departure_date": { + "examples": [ + "16/03/2020" + ], + "ontology": "GENEPIO:0001414", + "type": "string", + "description": "The date of a person's most recent departure from their primary residence (at that time) on a journey to one or more other locations.", + "format": "date" + }, + "sequencing_protocol": { + "examples": [ + "Genomes were generated through amplicon sequencing of 1200 bp amplicons with Freed schema primers. Libraries were created using Illumina DNA Prep kits, and sequence data was produced using Miseq Micro v2 (500 cycles) sequencing kits." + ], + "ontology": "GENEPIO:0001454", + "type": "string", + "description": "The protocol used to generate the sequence.", + "clasification": "Sequencing", + "label": "Sequencing Protocol" + }, + "%Ns": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "%Ns" + }, + "shipping_date": { + "examples": [ + "3/20/2020" + ], + "ontology": "0", + "type": "string", + "description": "The date on which the sample was sent.", + "format": "date", + "clasification": "Sample collection and processing", + "label": "Shipping Date" + }, + "results_emission_date": { + "examples": [ + "3/23/2020" + ], + "ontology": "NCIT_C142672", + "type": "string", + "description": "The date on which the results were emitted.", + "format": "date", + "clasification": "Sample collection and processing", + "label": "Results Emission Date" + }, + "Number_of_variants_(AF_greater_75%)": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "Number of variants (AF greater 75%)" + }, + "Numer_of_variants_with_effect": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "Numer of variants with effect" + }, + "host_age_bin": { + "enum": [ + "0 - 9 [GENEPIO:0100049]", + "10 - 19 [GENEPIO:0100050]", + "20 - 29 [GENEPIO:0100051]", + "30 - 39 [GENEPIO:0100052]", + "40 - 49 [GENEPIO:0100053]", + "50 - 59 [GENEPIO:0100054]", + "60 - 69 [GENEPIO:0100055]", + "70 - 79 [GENEPIO:0100056]", + "80 - 89 [GENEPIO:0100057]", + "90 - 99 [GENEPIO:0100058]", + "100+ [GENEPIO:0100059]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001394", + "type": "string", + "description": "The age category of the host at the time of sampling.", + "examples": [ + "50 - 59 [GENEPIO:0100054]" + ] + }, + "anatomical_material": { + "enum": [ + "Blood [UBERON:0000178]", + "Fluid [UBERON:0006314]", + "Fluid (Cerebrospinal (CSF)) [UBERON:0001359]", + "Fluid (Pericardial) [UBERON:0002409]", + "Fluid (Pleural) [UBERON:0001087]", + "Fluid (Vaginal) [UBERON:0036243]", + "Fluid (Amniotic) [UBERON:0000173]", + "Saliva [UBERON:0001836]", + "Tissue [UBERON:0000479]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001211", + "type": "string", + "description": "A substance obtained from an anatomical part of an organism e.g. tissue, blood.", + "examples": [ + "Blood [UBERON:0000178]" + ], + "classification": "Sample collection and processing", + "label": "Organism Substance" + }, + "sample_collection_date": { + "examples": [ + "3/19/2020" + ], + "ontology": "GENEPIO:0001174", + "type": "string", + "description": "The date on which the sample was collected.", + "format": "date", + "classification": "Sample collection and processing", + "label": "Sample Collection Date" + }, + "diagnostic_pcr_Ct_value_2": { + "examples": [ + "36" + ], + "ontology": "GENEPIO:0001512", + "type": "string", + "description": "The cycle threshold (CT) value result from a diagnostic SARS-CoV-2 RT-PCR test.", + "clasification": "Pathogen diagnostic testing", + "label": "Diagnostic Pcr Ct Value-2" + }, + "number_of_vaccine_doses_received": { + "ontology": "GENEPIO:0001406", + "type": "integer", + "description": "The number of doses of the vaccine recived by the host.", + "examples": [ + 2 + ] + }, + "lab_host": { + "enum": [ + "293/ACE2 Cell Line [GENEPIO:0100041]", + "Caco2 Cell Line [BTO:0000195]", + "Calu3 Cell Line [BTO:0002750]", + "EFK3B Cell Line [GENEPIO:0100042]", + "HEK293T Cell Line [BTO:0002181]", + "HRCE Cell Line [GENEPIO:0100043]", + "Huh7 Cell Line [BTO:0001950]", + "LLCMk2 Cell Line [CLO:0007330]", + "MDBK Cell Line [BTO:0000836]", + "NHBE Cell Line [BTO:0002924]", + "PK-15 Cell Line [BTO:0001865]", + "RK-13 Cell Line [BTO:0002909]", + "U251 Cell Line [BTO:0002035]", + "Vero Cell Line [BTO:0001444]", + "Vero E6 Cell Line [BTO:0004755]", + "Vero E6/TMPRSS2 Cell Line [GENEPIO:0100044]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001255", + "type": "string", + "description": "Name and description of the laboratory host used to propagate the source organism or material from which the sample was obtained.", + "examples": [ + "Vero E6 Cell Line [BTO:0004755]" + ], + "classification": "Sample collection and processing", + "label": "Lab passage Host" + }, + "anatomical_part": { + "enum": [ + "Anus [UBERON:0001245]", + "Duodenum [UBERON:0002114]", + "Eye [UBERON:0000970]", + "Intestine [UBERON:0000160]", + "Lower respiratory tract [UBERON:0001558]", + "Bronchus [UBERON:0002185]", + "Lung [UBERON:0002048]", + "Bronchiole [UBERON:0002186]", + "Alveolar sac [UBERON:0002169]", + "Pleural sac [UBERON:0009778]", + "Pleural cavity [UBERON:0002402]", + "Trachea [UBERON:0003126]", + "Rectum [UBERON:0001052]", + "Skin [UBERON:0001003]", + "Stomach [UBERON:0000945]", + "Upper respiratory tract [UBERON:0001557]", + "Anterior Nares [UBERON:2001427]", + "Esophagus [UBERON:0001043]", + "Ethmoid sinus [UBERON:0002453]", + "Nasal Cavity [UBERON:0001707]", + "Middle Nasal Turbinate [UBERON:0001762]", + "Inferior Nasal Turbinate [UBERON:0005922]", + "Nasopharynx (NP) [UBERON:0001728]", + "Oropharynx (OP) [UBERON:0001729]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001214", + "type": "string", + "description": "An anatomical part of an organism e.g. oropharynx. ", + "examples": [ + "Nasopharynx (NP) [UBERON:0001728]" + ], + "classification": "Sample collection and processing", + "label": "Anatomical Structure" + }, + "host_health_outcome": { + "enum": [ + "Deceased [NCIT:C28554]", + "Deteriorating [NCIT:C25254]", + "Recovered [NCIT:C49498]", + "Stable [NCIT:C30103]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001390", + "type": "string", + "description": "Disease outcome in the host.", + "examples": [ + "Recovered [NCIT:C49498]" + ] + }, + "rna_extraction_protocol": { + "examples": [ + "" + ], + "ontology": "OBI_0302884", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Rna Extraction Protocol" + }, + "sequencing_date": { + "examples": [ + "22/06/2020" + ], + "ontology": "GENEPIO:0001447", + "type": "string", + "description": "The date the sample was sequenced.", + "format": "date", + "clasification": "Sequencing", + "label": "Sequencing date" + }, + "passage_number": { + "ontology": "GENEPIO:0001261", + "type": "integer", + "description": "Number of passages.", + "examples": [ + 3 + ], + "clasification": "Sample collection and processing", + "label": "Passage Number" + }, + "ns_per_100_kbp": { + "examples": [ + "300" + ], + "ontology": "GENEPIO:0001484", + "type": "string", + "description": "The number of N symbols present in the consensus fasta sequence, per 100kbp of sequence.", + "clasification": "Bioinformatics and QC metrics", + "label": "Ns per 100 kbp" + }, + "case_id": { + "ontology": "GENEPIO:0100281", + "type": "string", + "description": "The identifier used to specify an epidemiologically detected case of disease.", + "examples": [ + "ABCD1234" + ] + }, + "biosample_accession": { + "examples": [ + "SAMN14180202" + ], + "ontology": "GENEPIO:0001139", + "type": "string", + "description": "The identifier assigned to a BioSample in INSDC archives.", + "clasification": "Database Identifiers", + "label": "Biosample accession ENA" + }, + "last_dose_vacicnation_date": { + "examples": [ + "09/04/2021" + ], + "ontology": "GENEPIO:0001408", + "type": "string", + "description": "The date the host received their last dose of vaccine.", + "format": "date" + }, + "bioinformatics_protocol": { + "examples": [ + "https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members" + ], + "ontology": "GENEPIO:0001489", + "type": "string", + "description": "The name of the bioinformatics protocol used.", + "clasification": "Bioinformatics and QC metrics", + "label": "Bioinformatics protocol" + }, + "if_bioinformatic_protocol_is_other_specify": { + "examples": [ + "https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members" + ], + "ontology": "0", + "type": "string", + "description": "The name of the bioinformatics protocol used.", + "clasification": "Bioinformatics and QC metrics", + "label": "If bioinformatic protocol Is Other, Specify" + }, + "bioinformatic_protocol_version": { + "examples": [ + "https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members" + ], + "ontology": "NCIT:C93490", + "type": "string", + "description": "The version number of the bioinformatics protocol used.", + "clasification": "Bioinformatics and QC metrics", + "label": "bioinformatics protocol version" + }, + "preprocessing": { + "examples": [ + "" + ], + "ontology": "MS_1002386", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Preprocessing" + }, + "if_preprocessing_other": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "If preprocessing Is Other, Specify" + }, + "preprocessing_params": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Preprocessing params" + }, + "mapping": { + "examples": [ + "" + ], + "ontology": "topic:0102", + "type": "string", + "description": "", + "clasification": "Lineage and Variant information", + "label": "Mapping" + }, + "mapping_params": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Lineage and Variant information", + "label": "Mapping params" + }, + "commercial/open-source/both": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Commercial/Open-source/both" + }, + "prior_sars_cov_2_antiviral_treatment": { + "enum": [ + "Prior antiviral treatment [GENEPIO:0100037]", + "No prior antiviral treatment [GENEPIO:0100233]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001438", + "type": "string", + "description": "Whether there was prior SARS-CoV-2 treatment with an antiviral agent.", + "examples": [ + "Prior antiviral treatment [GENEPIO:0100037]" + ] + }, + "gene_name_2": { + "enum": [ + "E gene (orf4) [GENEPIO:0100151]", + "M gene (orf5) [GENEPIO:0100152]", + "N gene (orf9) [GENEPIO:0100153]", + "Spike gene (orf2) [GENEPIO:0100154]", + "orf1ab (rep) [GENEPIO:0100155]", + "orf1a (pp1a) [GENEPIO:0100156]", + "nsp11 [GENEPIO:0100157]", + "nsp1 [GENEPIO:0100158]", + "nsp2 [GENEPIO:0100159]", + "nsp3 [GENEPIO:0100160]", + "nsp4 [GENEPIO:0100161]", + "nsp5 [GENEPIO:0100162]", + "nsp6 [GENEPIO:0100163]", + "nsp7 [GENEPIO:0100164]", + "nsp8 [GENEPIO:0100165]", + "nsp9 [GENEPIO:0100166]", + "nsp10 [GENEPIO:0100167]", + "RdRp gene (nsp12) [GENEPIO:0100168]", + "hel gene (nsp13) [GENEPIO:0100169]", + "exoN gene (nsp14) [GENEPIO:0100170]", + "nsp15 [GENEPIO:0100171]", + "nsp16 [GENEPIO:0100172]", + "orf3a [GENEPIO:0100173]", + "orf3b [GENEPIO:0100174]", + "orf6 (ns6) [GENEPIO:0100175]", + "orf7a [GENEPIO:0100176]", + "orf7b (ns7b) [GENEPIO:0100177]", + "orf8 (ns8) [GENEPIO:0100178]", + "orf9b [GENEPIO:0100179]", + "orf9c [GENEPIO:0100180]", + "orf10 [GENEPIO:0100181]", + "orf14 [GENEPIO:0100182]", + "SARS-COV-2 5' UTR [GENEPIO:0100183]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001510", + "type": "string", + "description": "The name of the gene used in the diagnostic RT-PCR test.", + "examples": [ + "RdRp gene (nsp12) [GENEPIO:0100168]" + ], + "classification": "Pathogen diagnostic testing", + "label": "Gene Name 2" + }, + "body_product": { + "enum": [ + "Breast Milk [UBERON:0001913]", + "Feces [UBERON:0001988]", + "Mucus [UBERON:0000912]", + "Semen [UBERON:0006530]", + "Sputum [UBERON:0007311]", + "Sweat [UBERON:0001089]", + "Tear [UBERON:0001827]", + "Urine [UBERON:0001088]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001216", + "type": "string", + "description": "A substance excreted/secreted from an organism e.g. feces, urine, sweat.", + "examples": [ + "Feces [UBERON:0001988]" + ], + "classification": "Sample collection and processing", + "label": "Body product" + }, + "host_age": { + "ontology": "GENEPIO:0001392", + "anyOf": [ + { + "type": "integer" + }, + { + "pattern": "\\d+-\\d+", + "type": "string" + } + ], + "description": "Age of host at the time of sampling.", + "examples": [ + 79 + ] + }, + "host_ethnicity": { + "ontology": "GECKO:0000061", + "type": "string", + "description": "The self-identified ethnicity(ies) of the host.", + "examples": [ + "Indigenous, European" + ], + "label": "Host ethnicity" + }, + "collection_device": { + "enum": [ + "Air filter [ENVO:00003968]", + "Blood Collection Tube [NCIT:C113122]", + "Bronchoscope [OBI:0002826]", + "Collection Container [OBI:0002088]", + "Collection Cup [GENEPIO:0100026]", + "Fibrobronchoscope Brush [OBI:0002825]", + "Filter [GENEPIO:0100103]", + "Fine Needle [OBI:0002827]", + "Microcapillary tube [OBI:0002858]", + "Micropipette [OBI:0001128]", + "Needle [OBI:0000436]", + "Serum Collection Tube [OBI:0002860]", + "Sputum Collection Tube [OBI:0002861]", + "Suction Catheter [OBI:0002831]", + "Swab [GENEPIO:0100027]", + "Urine Collection Tube [OBI:0002862]", + "Virus Transport Medium [OBI:0002866]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001234", + "type": "string", + "description": "The instrument or container used to collect the sample e.g. swab.", + "examples": [ + "Swab [GENEPIO:0100027]" + ], + "clasification": "Sample collection and processing", + "label": "Collection Device" + }, + "purpose_of_sequencing": { + "enum": [ + "Baseline surveillance (random sampling) [GENEPIO:0100005]", + "Targeted surveillance (non-random sampling) [GENEPIO:0100006]", + "Priority surveillance projects [GENEPIO:0100007]", + "Screening for Variants of Concern (VOC) [GENEPIO:0100008]", + "Sample has epidemiological link to Variant of Concern (VoC) [GENEPIO:0100273]", + "Sample has epidemiological link to Omicron Variant [GENEPIO:0100274]", + "Longitudinal surveillance (repeat sampling of individuals) [GENEPIO:0100009]", + "Re-infection surveillance [GENEPIO:0100010]", + "Vaccine escape surveillance [GENEPIO:0100011]", + "Travel-associated surveillance [GENEPIO:0100012]", + "Domestic travel surveillance [GENEPIO:0100013]", + "Interstate/ interprovincial travel surveillance [GENEPIO:0100275]", + "Intra-state/ intra-provincial travel surveillance [GENEPIO:0100276]", + "International travel surveillance [GENEPIO:0100014]", + "Surveillance of international border crossing by air travel or ground transport [GENEPIO:0100015]", + "Surveillance of international border crossing by air travel [GENEPIO:0100016]", + "Surveillance of international border crossing by ground transport [GENEPIO:0100017]", + "Surveillance from international worker testing [GENEPIO:0100018]", + "Cluster/Outbreak investigation [GENEPIO:0100019]", + "Multi-jurisdictional outbreak investigation [GENEPIO:0100020]", + "Intra-jurisdictional outbreak investigation [GENEPIO:0100021]", + "Research [GENEPIO:0100022]", + "Viral passage experiment [GENEPIO:0100023]", + "Protocol testing [GENEPIO:0100024]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001445", + "type": "string", + "description": "The reason that the sample was sequenced.", + "examples": [ + "Baseline surveillance (random sampling) [GENEPIO:0100005]" + ], + "classification": "Sequencing", + "label": "Purpose of Sequencing" + }, + "raw_sequence_data_processing_method": { + "examples": [ + "Porechop 0.2.3" + ], + "ontology": "GENEPIO:0001458", + "type": "string", + "description": "The method used for raw data processing such as removing barcodes, adapter trimming, filtering etc.", + "clasification": "Bioinformatics and QC metrics", + "label": "Raw sequence data processin method" + }, + "sequence_submitter_contact_address": { + "examples": [ + "123 Sunnybrooke St, Toronto, Ontario, M4P 1L6, Canada" + ], + "ontology": "GENEPIO:0001167", + "type": "string", + "description": "The mailing address of the agency submitting the sequence.", + "clasification": "Sample collection and processing", + "label": "Submitting Institution Address" + }, + "depth_of_coverage_threshold": { + "examples": [ + "100x" + ], + "ontology": "GENEPIO:0001475", + "type": "string", + "description": "The threshold used as a cut-off for the depth of coverage.", + "clasification": "Bioinformatics and QC metrics", + "label": "Depth of coverage threshold" + }, + "specimen_processing": { + "enum": [ + "Virus Passage [GENEPIO:0100039]", + "RNA Re-Extraction (Post RT-PCR) [GENEPIO:0100040]", + "Specimens Pooled [OBI:0600016]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001253", + "type": "string", + "description": "Any processing applied to the sample during or after receiving the sample. ", + "examples": [ + "Virus Passage [GENEPIO:0100039]" + ], + "classification": "Sample collection and processing", + "label": "Specimen Processing" + }, + "r1_fastq_filename": { + "examples": [ + "ABC123_S1_L001_R1_001.fastq.gz" + ], + "ontology": "GENEPIO:0001476", + "type": "string", + "description": "The user-specified filename of the r1 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Sequence file R1 fastq" + }, + "destination_of_most_recent_travel_city": { + "ontology": "GENEPIO:0001411", + "type": "string", + "description": "The name of the city that was the destination of most recent travel.", + "examples": [ + "New York City" + ] + }, + "r1_fastq_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001478", + "type": "string", + "description": "The filepath of the r1 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Filepath R1 fastq" + }, + "library_id": { + "ontology": "GENEPIO:0001448", + "type": "string", + "description": "The user-specified identifier for the library prepared for sequencing.", + "examples": [ + "XYZ_123345" + ], + "clasification": "Sequencing", + "labe": "Library Id" + }, + "enrichment_protocol": { + "examples": [ + "" + ], + "ontology": "EFO_0009089", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Enrichment_protocol" + }, + "if_enrichment_protocol_is_other_specify": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "If Enrichment Protocol Is Other, Specify" + }, + "if_consensus_other": { + "examples": [ + "1.3" + ], + "ontology": "0", + "type": "string", + "description": "The version of the software used to generate the consensus sequence.", + "clasification": "Bioinformatics and QC metrics", + "label": "If consensus Is Other, Specify" + }, + "if_mapping_other": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Lineage and Variant information", + "label": "If mapping Is Other, Specify" + }, + "amplicon protocol": { + "examples": [ + "" + ], + "ontology": "EFO_0003747", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Amplicon Protocol" + }, + "if_amplicon_protocol_if_other_especify": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Sequencing", + "labe": "If Amplicon Protocol If Other, Especify" + }, + "amplicon_version": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Amplicon Version" + }, + "host_vaccination_status": { + "enum": [ + "Fully Vaccinated [GENEPIO:0100100]", + "Partially Vaccinated [GENEPIO:0100101]", + "Not Vaccinated [GENEPIO:0100102]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001404", + "type": "string", + "description": "The vaccination status of the host (fully vaccinated, partially vaccinated, or not vaccinated).", + "examples": [ + "Fully Vaccinated [GENEPIO:0100100]" + ] + }, + "geo_loc_longitude": { + "ontology": "OBI:0001621", + "type": "string", + "description": "The longitude coordinates of the geographical location of sample collection.", + "examples": [ + "77.11 W" + ], + "clasification": "Sample collection and processing", + "label": "Geo Loc Longitude" + }, + "genbank_ena_ddbj_accession": { + "examples": [ + "MN908947.3" + ], + "ontology": "GENEPIO:0001145", + "type": "string", + "description": "The GenBank/ENA/DDBJ identifier assigned to the sequence in the INSDC archives.", + "clasification": "Database Identifiers", + "label": "GenBank/ENA/DDBJ accession" + }, + "bioproject_accession": { + "examples": [ + "PRJNA12345" + ], + "ontology": "GENEPIO:0001136", + "type": "string", + "description": "The INSDC accession number of the BioProject(s) to which the BioSample belongs.", + "clasification": "Database Identifiers", + "label": "Bioproject accession ENA" + }, + "lineage/clade_analysis_software_name": { + "examples": [ + "Pangolin" + ], + "ontology": "GENEPIO:0001501", + "type": "string", + "description": "The name of the software used to determine the lineage/clade.", + "clasification": "Lineage and Variant information", + "label": "lineage/clade analysis software name" + }, + "analysis_author": { + "examples": [ + "" + ], + "ontology": "NCIT:C42781", + "type": "string", + "description": "", + "clasification": "Contributor Acknowledgement", + "label": "Analysis Authors" + }, + "if_lineage_identification_other": { + "examples": [ + "Other than Pangolin" + ], + "ontology": "0", + "type": "string", + "description": "The name of the software used to determine the lineage/clade.", + "clasification": "Lineage and Variant information", + "label": "If lineage identification Is Other, Specify" + }, + "location_of_exposure_geo_loc_name_country": { + "enum": [ + "Afghanistan [GAZ:00006882]", + "Albania [GAZ:00002953]", + "Algeria [GAZ:00000563]", + "American Samoa [GAZ:00003957]", + "Andorra [GAZ:00002948]", + "Angola [GAZ:00001095]", + "Anguilla [GAZ:00009159]", + "Antarctica [GAZ:00000462]", + "Antigua and Barbuda [GAZ:00006883]", + "Argentina [GAZ:00002928]", + "Armenia [GAZ:00004094]", + "Aruba [GAZ:00004025]", + "Ashmore and Cartier Islands [GAZ:00005901]", + "Australia [GAZ:00000463]", + "Austria [GAZ:00002942]", + "Azerbaijan [GAZ:00004941]", + "Bahamas [GAZ:00002733]", + "Bahrain [GAZ:00005281]", + "Baker Island [GAZ:00007117]", + "Bangladesh [GAZ:00003750]", + "Barbados [GAZ:00001251]", + "Bassas da India [GAZ:00005810]", + "Belarus [GAZ:00006886]", + "Belgium [GAZ:00002938]", + "Belize [GAZ:00002934]", + "Benin [GAZ:00000904]", + "Bermuda [GAZ:00001264]", + "Bhutan [GAZ:00003920]", + "Bolivia [GAZ:00002511]", + "Borneo [GAZ:00025355]", + "Bosnia and Herzegovina [GAZ:00006887]", + "Botswana [GAZ:00001097]", + "Bouvet Island [GAZ:00001453]", + "Brazil [GAZ:00002828]", + "British Virgin Islands [GAZ:00003961]", + "Brunei [GAZ:00003901]", + "Bulgaria [GAZ:00002950]", + "Burkina Faso [GAZ:00000905]", + "Burundi [GAZ:00001090]", + "Cambodia [GAZ:00006888]", + "Cameroon [GAZ:00001093]", + "Canada [GAZ:00002560]", + "Cape Verde [GAZ:00001227]", + "Cayman Islands [GAZ:00003986]", + "Central African Republic [GAZ:00001089]", + "Chad [GAZ:00000586]", + "Chile [GAZ:00002825]", + "China [GAZ:00002845]", + "Christmas Island [GAZ:00005915]", + "Clipperton Island [GAZ:00005838]", + "Cocos Islands [GAZ:00009721]", + "Colombia [GAZ:00002929]", + "Comoros [GAZ:00005820]", + "Cook Islands [GAZ:00053798]", + "Coral Sea Islands [GAZ:00005917]", + "Costa Rica [GAZ:00002901]", + "Cote d'Ivoire [GAZ:00000906]", + "Croatia [GAZ:00002719]", + "Cuba [GAZ:00003762]", + "Curacao [GAZ:00012582]", + "Cyprus [GAZ:00004006]", + "Czech Republic [GAZ:00002954]", + "Democratic Republic of the Congo [GAZ:00001086]", + "Denmark [GAZ:00005852]", + "Djibouti [GAZ:00000582]", + "Dominica [GAZ:00006890]", + "Dominican Republic [GAZ:00003952]", + "Ecuador [GAZ:00002912]", + "Egypt [GAZ:00003934]", + "El Salvador [GAZ:00002935]", + "Equatorial Guinea [GAZ:00001091]", + "Eritrea [GAZ:00000581]", + "Estonia [GAZ:00002959]", + "Eswatini [GAZ:00001099]", + "Ethiopia [GAZ:00000567]", + "Europa Island [GAZ:00005811]", + "Falkland Islands (Islas Malvinas) [GAZ:00001412]", + "Faroe Islands [GAZ:00059206]", + "Fiji [GAZ:00006891]", + "Finland [GAZ:00002937]", + "France [GAZ:00003940]", + "French Guiana [GAZ:00002516]", + "French Polynesia [GAZ:00002918]", + "French Southern and Antarctic Lands [GAZ:00003753]", + "Gabon [GAZ:00001092]", + "Gambia [GAZ:00000907]", + "Gaza Strip [GAZ:00009571]", + "Georgia [GAZ:00004942]", + "Germany [GAZ:00002646]", + "Ghana [GAZ:00000908]", + "Gibraltar [GAZ:00003987]", + "Glorioso Islands [GAZ:00005808]", + "Greece [GAZ:00002945]", + "Greenland [GAZ:00001507]", + "Grenada [GAZ:02000573]", + "Guadeloupe [GAZ:00067142]", + "Guam [GAZ:00003706]", + "Guatemala [GAZ:00002936]", + "Guernsey [GAZ:00001550]", + "Guinea [GAZ:00000909]", + "Guinea-Bissau [GAZ:00000910]", + "Guyana [GAZ:00002522]", + "Haiti [GAZ:00003953]", + "Heard Island and McDonald Islands [GAZ:00009718]", + "Honduras [GAZ:00002894]", + "Hong Kong [GAZ:00003203]", + "Howland Island [GAZ:00007120]", + "Hungary [GAZ:00002952]", + "Iceland [GAZ:00000843]", + "India [GAZ:00002839]", + "Indonesia [GAZ:00003727]", + "Iran [GAZ:00004474]", + "Iraq [GAZ:00004483]", + "Ireland [GAZ:00002943]", + "Isle of Man [GAZ:00052477]", + "Israel [GAZ:00002476]", + "Italy [GAZ:00002650]", + "Jamaica [GAZ:00003781]", + "Jan Mayen [GAZ:00005853]", + "Japan [GAZ:00002747]", + "Jarvis Island [GAZ:00007118]", + "Jersey [GAZ:00001551]", + "Johnston Atoll [GAZ:00007114]", + "Jordan [GAZ:00002473]", + "Juan de Nova Island [GAZ:00005809]", + "Kazakhstan [GAZ:00004999]", + "Kenya [GAZ:00001101]", + "Kerguelen Archipelago [GAZ:00005682]", + "Kingman Reef [GAZ:00007116]", + "Kiribati [GAZ:00006894]", + "Kosovo [GAZ:00011337]", + "Kuwait [GAZ:00005285]", + "Kyrgyzstan [GAZ:00006893]", + "Laos [GAZ:00006889]", + "Latvia [GAZ:00002958]", + "Lebanon [GAZ:00002478]", + "Lesotho [GAZ:00001098]", + "Liberia [GAZ:00000911]", + "Libya [GAZ:00000566]", + "Liechtenstein [GAZ:00003858]", + "Line Islands [GAZ:00007144]", + "Lithuania [GAZ:00002960]", + "Luxembourg [GAZ:00002947]", + "Macau [GAZ:00003202]", + "Madagascar [GAZ:00001108]", + "Malawi [GAZ:00001105]", + "Malaysia [GAZ:00003902]", + "Maldives [GAZ:00006924]", + "Mali [GAZ:00000584]", + "Malta [GAZ:00004017]", + "Marshall Islands [GAZ:00007161]", + "Martinique [GAZ:00067143]", + "Mauritania [GAZ:00000583]", + "Mauritius [GAZ:00003745]", + "Mayotte [GAZ:00003943]", + "Mexico [GAZ:00002852]", + "Micronesia [GAZ:00005862]", + "Midway Islands [GAZ:00007112]", + "Moldova [GAZ:00003897]", + "Monaco [GAZ:00003857]", + "Mongolia [GAZ:00008744]", + "Montenegro [GAZ:00006898]", + "Montserrat [GAZ:00003988]", + "Morocco [GAZ:00000565]", + "Mozambique [GAZ:00001100]", + "Myanmar [GAZ:00006899]", + "Namibia [GAZ:00001096]", + "Nauru [GAZ:00006900]", + "Navassa Island [GAZ:00007119]", + "Nepal [GAZ:00004399]", + "Netherlands [GAZ:00002946]", + "New Caledonia [GAZ:00005206]", + "New Zealand [GAZ:00000469]", + "Nicaragua [GAZ:00002978]", + "Niger [GAZ:00000585]", + "Nigeria [GAZ:00000912]", + "Niue [GAZ:00006902]", + "Norfolk Island [GAZ:00005908]", + "North Korea [GAZ:00002801]", + "North Macedonia [GAZ:00006895]", + "North Sea [GAZ:00002284]", + "Northern Mariana Islands [GAZ:00003958]", + "Norway [GAZ:00002699]", + "Oman [GAZ:00005283]", + "Pakistan [GAZ:00005246]", + "Palau [GAZ:00006905]", + "Panama [GAZ:00002892]", + "Papua New Guinea [GAZ:00003922]", + "Paracel Islands [GAZ:00010832]", + "Paraguay [GAZ:00002933]", + "Peru [GAZ:00002932]", + "Philippines [GAZ:00004525]", + "Pitcairn Islands [GAZ:00005867]", + "Poland [GAZ:00002939]", + "Portugal [GAZ:00004126]", + "Puerto Rico [GAZ:00006935]", + "Qatar [GAZ:00005286]", + "Republic of the Congo [GAZ:00001088]", + "Reunion [GAZ:00003945]", + "Romania [GAZ:00002951]", + "Ross Sea [GAZ:00023304]", + "Russia [GAZ:00002721]", + "Rwanda [GAZ:00001087]", + "Saint Helena [GAZ:00000849]", + "Saint Kitts and Nevis [GAZ:00006906]", + "Saint Lucia [GAZ:00006909]", + "Saint Pierre and Miquelon [GAZ:00003942]", + "Saint Martin [GAZ:00005841]", + "Saint Vincent and the Grenadines [GAZ:02000565]", + "Samoa [GAZ:00006910]", + "San Marino [GAZ:00003102]", + "Sao Tome and Principe [GAZ:00006927]", + "Saudi Arabia [GAZ:00005279]", + "Senegal [GAZ:00000913]", + "Serbia [GAZ:00002957]", + "Seychelles [GAZ:00006922]", + "Sierra Leone [GAZ:00000914]", + "Singapore [GAZ:00003923]", + "Sint Maarten [GAZ:00012579]", + "Slovakia [GAZ:00002956]", + "Slovenia [GAZ:00002955]", + "Solomon Islands [GAZ:00005275]", + "Somalia [GAZ:00001104]", + "South Africa [GAZ:00001094]", + "South Georgia and the South Sandwich Islands [GAZ:00003990]", + "South Korea [GAZ:00002802]", + "South Sudan [GAZ:00233439]", + "Spain [GAZ:00003936]", + "Spratly Islands [GAZ:00010831]", + "Sri Lanka [GAZ:00003924]", + "State of Palestine [GAZ:00002475]", + "Sudan [GAZ:00000560]", + "Suriname [GAZ:00002525]", + "Svalbard [GAZ:00005396]", + "Swaziland [GAZ:00001099]", + "Sweden [GAZ:00002729]", + "Switzerland [GAZ:00002941]", + "Syria [GAZ:00002474]", + "Taiwan [GAZ:00005341]", + "Tajikistan [GAZ:00006912]", + "Tanzania [GAZ:00001103]", + "Thailand [GAZ:00003744]", + "Timor-Leste [GAZ:00006913]", + "Togo [GAZ:00000915]", + "Tokelau [GAZ:00260188]", + "Tonga [GAZ:00006916]", + "Trinidad and Tobago [GAZ:00003767]", + "Tromelin Island [GAZ:00005812]", + "Tunisia [GAZ:00000562]", + "Turkey [GAZ:00000558]", + "Turkmenistan [GAZ:00005018]", + "Turks and Caicos Islands [GAZ:00003955]", + "Tuvalu [GAZ:00009715]", + "USA [GAZ:00002459]", + "Uganda [GAZ:00001102]", + "Ukraine [GAZ:00002724]", + "United Arab Emirates [GAZ:00005282]", + "United Kingdom [GAZ:00002637]", + "Uruguay [GAZ:00002930]", + "Uzbekistan [GAZ:00004979]", + "Vanuatu [GAZ:00006918]", + "Venezuela [GAZ:00002931]", + "Viet Nam [GAZ:00003756]", + "Virgin Islands [GAZ:00003959]", + "Wake Island [GAZ:00007111]", + "Wallis and Futuna [GAZ:00007191]", + "West Bank [GAZ:00009572]", + "Western Sahara [GAZ:00000564]", + "Yemen [GAZ:00005284]", + "Zambia [GAZ:00001107]", + "Zimbabwe [GAZ:00001106]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001181", + "type": "string", + "description": "The country where the host was likely exposed to the causative agent of the illness.", + "examples": [ + "South Africa [GAZ:00001094]" + ] + }, + "sample_plan_name": { + "ontology": "GENEPIO:0100285", + "type": "string", + "description": "The name of the sample plan implemented for sample collection.", + "examples": [ + "CanCOGeN Sampling Strategy 1.0" + ], + "label": "Sample plan name" + }, + "treatment": { + "ontology": "OGMS:0000090", + "type": "string", + "description": "Include drug name, dosage", + "examples": [], + "label": "Treatment" + }, + "prior_sars_cov_2_antiviral_treatment_date": { + "examples": [ + "28/01/2021" + ], + "ontology": "GENEPIO:0001440", + "type": "string", + "description": "The date treatment was first administered during the prior SARS-CoV-2 infection.", + "format": "date" + }, + "library_preparation_kit": { + "examples": [ + "Nextera XT" + ], + "ontology": "GENEPIO:0001450", + "type": "string", + "description": "The name of the DNA library preparation kit used to generate the library being sequenced.", + "clasification": "Sequencing", + "label": "Library Preparation Kit" + }, + "flow_cell_barcode": { + "examples": [ + "FAB06069" + ], + "ontology": "GENEPIO:0001451", + "type": "string", + "description": "The barcode of the flow cell used for sequencing.", + "clasification": "Sequencing", + "label": "Flow Cell Barcode" + }, + "library_kit": { + "examples": [ + "" + ], + "ontology": "GENEPIO_0000085", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Library Kit" + }, + "fast5_filename": { + "examples": [ + "batch1a_sequences.fast5" + ], + "ontology": "GENEPIO:0001480", + "type": "string", + "description": "The user-specified filename of the FAST5 file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Filename fast5" + }, + "diagnostic_pcr_Ct_value_1": { + "examples": [ + "21" + ], + "ontology": "GENEPIO:0001509", + "type": "string", + "description": "The Ct value result from a diagnostic SARS-CoV-2 RT-PCR test.", + "clasification": "Pathogen diagnostic testing", + "label": "Diagnostic Pcr Ct Value 1" + }, + "collection_method": { + "enum": [ + "Amniocentesis [NCIT:C52009]", + "Aspiration [NCIT:C15631]", + "Suprapubic Aspiration [GENEPIO:0100028]", + "Tracheal Aspiration [GENEPIO:0100029]", + "Vacuum Aspiration [GENEPIO:0100030]", + "Biopsy [OBI:0002650]", + "Needle Biopsy [OBI:0002651]", + "Filtration [OBI:0302885]", + "Air Filtration [GENEPIO:0100031]", + "Lavage [OBI:0600044]", + "Bronchoalveolar Lavage (BAL) [GENEPIO:0100032]", + "Gastric Lavage [GENEPIO:0100033]", + "Lumbar Puncture [NCIT:C15327]", + "Necropsy [MMO:0000344]", + "Phlebotomy [NCIT:C28221]", + "Rinsing [GENEPIO:0002116]", + "Saline gargle (mouth rinse and gargle) [GENEPIO:0100034]", + "Scraping [GENEPIO:0100035]", + "Swabbing [GENEPIO:0002117]", + "Finger Prick [GENEPIO:0100036]", + "Washout Tear Collection [GENEPIO:0100038]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001241", + "type": "string", + "description": "The process used to collect the sample e.g. phlebotomy, necropsy.", + "examples": [ + "Bronchoalveolar Lavage (BAL) [GENEPIO:0100032]" + ], + "clasification": "Sample collection and processing", + "label": "Collection Method" + }, + "geo_loc_name_county_region": { + "ontology": "GENEPIO:0100280", + "type": "string", + "description": "The county/region of origin of the sample.", + "examples": [ + "Derbyshire" + ] + }, + "consensus_genome_length": { + "examples": [ + "38677" + ], + "ontology": "GENEPIO:0001483", + "type": "string", + "description": "Size of the assembled genome described as the number of base pairs.", + "clasification": "Bioinformatics and QC metrics", + "label": "Consensus genome length" + }, + "r2_fastq_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001479", + "type": "string", + "description": "The filepath of the r2 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Filepath R2 fastq" + }, + "consensus_criteria": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Consensus criteria" + }, + "destination_of_most_recent_travel_state_province_territory": { + "ontology": "GENEPIO:0001412", + "type": "string", + "description": "The name of the province that was the destination of most recent travel.", + "examples": [ + "California" + ] + }, + "reception_date": { + "examples": [ + "3/21/2020" + ], + "ontology": "NCIT:C93644", + "type": "string", + "description": "The date on which the sample was received.", + "format": "date", + "clasification": "Sample collection and processing", + "label": "Sample Received Date" + }, + "quality_control_metrics": { + "examples": [ + "" + ], + "ontology": "data_3914", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Quality control metrics " + }, + "host_common_name": { + "enum": [ + "Human [NCBITaxon:9606]", + "Bat [NCBITaxon:9397]", + "Cat [NCBITaxon:9685]", + "Chicken [NCBITaxon:9031]", + "Civet [NCBITaxon:9673]", + "Cow [NCBITaxon:9913]", + "Dog [NCBITaxon:9615]", + "Lion [NCBITaxon:9689]", + "Mink [NCBITaxon:452646]", + "Pangolin [NCBITaxon:9973]", + "Pig [NCBITaxon:9825]", + "Pigeon [NCBITaxon:8930]", + "Tiger [NCBITaxon:9694]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001386", + "type": "string", + "description": "The commonly used name of the host.", + "examples": [ + "Human [NCBITaxon:9606]" + ], + "classification": "Host information", + "label": "Host Common Name" + }, + "geo_loc_name_state_province_territory": { + "ontology": "GENEPIO:0001185", + "type": "string", + "description": "The state/province/territory of origin of the sample.", + "examples": [ + "Western Cape" + ], + "classification": "Sample collection and processing", + "label": "Geo Loc Autonomic Community" + }, + "lineage_clade_analysis_software_version": { + "ontology": "GENEPIO:0001502", + "type": "string", + "description": "The version of the software used to determine the lineage/clade.", + "examples": [ + "2.1.10" + ] + }, + "host_subject_id": { + "ontology": "GENEPIO:0001398", + "type": "string", + "description": "A unique identifier by which each host can be referred to e.g. #131", + "examples": [ + "BCxy123" + ] + }, + "sra_accession": { + "examples": [ + "SRR11177792" + ], + "ontology": "GENEPIO:0001142", + "type": "string", + "description": "The Sequence Read Archive (SRA), European Nucleotide Archive (ENA) or DDBJ Sequence Read Archive (DRA) identifier linking raw read data, methodological metadata and quality control metrics submitted to the INSDC.", + "clasification": "Database Identifiers", + "label": "SRA accession" + }, + "author_submitter": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Contributor Acknowledgement", + "label": "Author Submitter" + }, + "authors": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001517", + "type": "string", + "description": "", + "clasification": "Contributor Acknowledgement", + "label": "Authors" + }, + "host_role": { + "enum": [ + "Attendee [GENEPIO:0100249]", + "Student [OMRSE:00000058]", + "Patient [OMRSE:00000030]", + "Inpatient [NCIT:C25182]", + "Outpatient [NCIT:C28293]", + "Passenger [GENEPIO:0100250]", + "Resident [GENEPIO:0100251]", + "Visitor [GENEPIO:0100252]", + "Volunteer [GENEPIO:0100253]", + "Work [GENEPIO:0100254]", + "Administrator [GENEPIO:0100255]", + "First Responder [GENEPIO:0100256]", + "Firefighter [GENEPIO:0100257]", + "Paramedic [GENEPIO:0100258]", + "Police Officer [GENEPIO:0100259]", + "Housekeeper [GENEPIO:0100260]", + "Kitchen Worker [GENEPIO:0100261]", + "Laboratory Worker [GENEPIO:0100262]", + "Nurse [OMRSE:00000014]", + "Personal Care Aid [GENEPIO:0100263]", + "Pharmacist [GENEPIO:0100264]", + "Physician [OMRSE:00000013]", + "Vet [GENEPIO:0100265]", + "Social role [OMRSE:00000001]", + "Acquaintance of case [GENEPIO:0100266]", + "Relative of case [GENEPIO:0100267]", + "Child of case [GENEPIO:0100268]", + "Parent of case [GENEPIO:0100269]", + "Father of case [GENEPIO:0100270]", + "Mother of case [GENEPIO:0100271]", + "Spouse of case [GENEPIO:0100272]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001419", + "type": "string", + "description": "The role of the host in relation to the exposure setting.", + "examples": [ + "Patient [OMRSE:00000030]" + ] + }, + "consensus_sequence_name": { + "ontology": "GENEPIO:0001460", + "type": "string", + "description": "The name of the consensus sequence.", + "examples": [ + "ncov123assembly3" + ], + "label": "Consensus sequence name" + }, + "assembly": { + "examples": [ + "" + ], + "ontology": "NCIT_C63548", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Assembly" + }, + "if_assembly_other": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "If assembly Is Other, Specify" + }, + "assembly_params": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Assambly params" + }, + "gisaid_accession": { + "examples": [ + "hCoV-19/Canada/prov_rona_99/2020" + ], + "ontology": "GENEPIO:0100282", + "type": "string", + "description": "The user-defined GISAID virus name assigned to the sequence.", + "clasification": "Database Identifiers", + "label": "GISAID Virus Name" + }, + "geo_loc_name_country": { + "enum": [ + "Afghanistan [GAZ:00006882]", + "Albania [GAZ:00002953]", + "Algeria [GAZ:00000563]", + "American Samoa [GAZ:00003957]", + "Andorra [GAZ:00002948]", + "Angola [GAZ:00001095]", + "Anguilla [GAZ:00009159]", + "Antarctica [GAZ:00000462]", + "Antigua and Barbuda [GAZ:00006883]", + "Argentina [GAZ:00002928]", + "Armenia [GAZ:00004094]", + "Aruba [GAZ:00004025]", + "Ashmore and Cartier Islands [GAZ:00005901]", + "Australia [GAZ:00000463]", + "Austria [GAZ:00002942]", + "Azerbaijan [GAZ:00004941]", + "Bahamas [GAZ:00002733]", + "Bahrain [GAZ:00005281]", + "Baker Island [GAZ:00007117]", + "Bangladesh [GAZ:00003750]", + "Barbados [GAZ:00001251]", + "Bassas da India [GAZ:00005810]", + "Belarus [GAZ:00006886]", + "Belgium [GAZ:00002938]", + "Belize [GAZ:00002934]", + "Benin [GAZ:00000904]", + "Bermuda [GAZ:00001264]", + "Bhutan [GAZ:00003920]", + "Bolivia [GAZ:00002511]", + "Borneo [GAZ:00025355]", + "Bosnia and Herzegovina [GAZ:00006887]", + "Botswana [GAZ:00001097]", + "Bouvet Island [GAZ:00001453]", + "Brazil [GAZ:00002828]", + "British Virgin Islands [GAZ:00003961]", + "Brunei [GAZ:00003901]", + "Bulgaria [GAZ:00002950]", + "Burkina Faso [GAZ:00000905]", + "Burundi [GAZ:00001090]", + "Cambodia [GAZ:00006888]", + "Cameroon [GAZ:00001093]", + "Canada [GAZ:00002560]", + "Cape Verde [GAZ:00001227]", + "Cayman Islands [GAZ:00003986]", + "Central African Republic [GAZ:00001089]", + "Chad [GAZ:00000586]", + "Chile [GAZ:00002825]", + "China [GAZ:00002845]", + "Christmas Island [GAZ:00005915]", + "Clipperton Island [GAZ:00005838]", + "Cocos Islands [GAZ:00009721]", + "Colombia [GAZ:00002929]", + "Comoros [GAZ:00005820]", + "Cook Islands [GAZ:00053798]", + "Coral Sea Islands [GAZ:00005917]", + "Costa Rica [GAZ:00002901]", + "Cote d'Ivoire [GAZ:00000906]", + "Croatia [GAZ:00002719]", + "Cuba [GAZ:00003762]", + "Curacao [GAZ:00012582]", + "Cyprus [GAZ:00004006]", + "Czech Republic [GAZ:00002954]", + "Democratic Republic of the Congo [GAZ:00001086]", + "Denmark [GAZ:00005852]", + "Djibouti [GAZ:00000582]", + "Dominica [GAZ:00006890]", + "Dominican Republic [GAZ:00003952]", + "Ecuador [GAZ:00002912]", + "Egypt [GAZ:00003934]", + "El Salvador [GAZ:00002935]", + "Equatorial Guinea [GAZ:00001091]", + "Eritrea [GAZ:00000581]", + "Estonia [GAZ:00002959]", + "Eswatini [GAZ:00001099]", + "Ethiopia [GAZ:00000567]", + "Europa Island [GAZ:00005811]", + "Falkland Islands (Islas Malvinas) [GAZ:00001412]", + "Faroe Islands [GAZ:00059206]", + "Fiji [GAZ:00006891]", + "Finland [GAZ:00002937]", + "France [GAZ:00003940]", + "French Guiana [GAZ:00002516]", + "French Polynesia [GAZ:00002918]", + "French Southern and Antarctic Lands [GAZ:00003753]", + "Gabon [GAZ:00001092]", + "Gambia [GAZ:00000907]", + "Gaza Strip [GAZ:00009571]", + "Georgia [GAZ:00004942]", + "Germany [GAZ:00002646]", + "Ghana [GAZ:00000908]", + "Gibraltar [GAZ:00003987]", + "Glorioso Islands [GAZ:00005808]", + "Greece [GAZ:00002945]", + "Greenland [GAZ:00001507]", + "Grenada [GAZ:02000573]", + "Guadeloupe [GAZ:00067142]", + "Guam [GAZ:00003706]", + "Guatemala [GAZ:00002936]", + "Guernsey [GAZ:00001550]", + "Guinea [GAZ:00000909]", + "Guinea-Bissau [GAZ:00000910]", + "Guyana [GAZ:00002522]", + "Haiti [GAZ:00003953]", + "Heard Island and McDonald Islands [GAZ:00009718]", + "Honduras [GAZ:00002894]", + "Hong Kong [GAZ:00003203]", + "Howland Island [GAZ:00007120]", + "Hungary [GAZ:00002952]", + "Iceland [GAZ:00000843]", + "India [GAZ:00002839]", + "Indonesia [GAZ:00003727]", + "Iran [GAZ:00004474]", + "Iraq [GAZ:00004483]", + "Ireland [GAZ:00002943]", + "Isle of Man [GAZ:00052477]", + "Israel [GAZ:00002476]", + "Italy [GAZ:00002650]", + "Jamaica [GAZ:00003781]", + "Jan Mayen [GAZ:00005853]", + "Japan [GAZ:00002747]", + "Jarvis Island [GAZ:00007118]", + "Jersey [GAZ:00001551]", + "Johnston Atoll [GAZ:00007114]", + "Jordan [GAZ:00002473]", + "Juan de Nova Island [GAZ:00005809]", + "Kazakhstan [GAZ:00004999]", + "Kenya [GAZ:00001101]", + "Kerguelen Archipelago [GAZ:00005682]", + "Kingman Reef [GAZ:00007116]", + "Kiribati [GAZ:00006894]", + "Kosovo [GAZ:00011337]", + "Kuwait [GAZ:00005285]", + "Kyrgyzstan [GAZ:00006893]", + "Laos [GAZ:00006889]", + "Latvia [GAZ:00002958]", + "Lebanon [GAZ:00002478]", + "Lesotho [GAZ:00001098]", + "Liberia [GAZ:00000911]", + "Libya [GAZ:00000566]", + "Liechtenstein [GAZ:00003858]", + "Line Islands [GAZ:00007144]", + "Lithuania [GAZ:00002960]", + "Luxembourg [GAZ:00002947]", + "Macau [GAZ:00003202]", + "Madagascar [GAZ:00001108]", + "Malawi [GAZ:00001105]", + "Malaysia [GAZ:00003902]", + "Maldives [GAZ:00006924]", + "Mali [GAZ:00000584]", + "Malta [GAZ:00004017]", + "Marshall Islands [GAZ:00007161]", + "Martinique [GAZ:00067143]", + "Mauritania [GAZ:00000583]", + "Mauritius [GAZ:00003745]", + "Mayotte [GAZ:00003943]", + "Mexico [GAZ:00002852]", + "Micronesia [GAZ:00005862]", + "Midway Islands [GAZ:00007112]", + "Moldova [GAZ:00003897]", + "Monaco [GAZ:00003857]", + "Mongolia [GAZ:00008744]", + "Montenegro [GAZ:00006898]", + "Montserrat [GAZ:00003988]", + "Morocco [GAZ:00000565]", + "Mozambique [GAZ:00001100]", + "Myanmar [GAZ:00006899]", + "Namibia [GAZ:00001096]", + "Nauru [GAZ:00006900]", + "Navassa Island [GAZ:00007119]", + "Nepal [GAZ:00004399]", + "Netherlands [GAZ:00002946]", + "New Caledonia [GAZ:00005206]", + "New Zealand [GAZ:00000469]", + "Nicaragua [GAZ:00002978]", + "Niger [GAZ:00000585]", + "Nigeria [GAZ:00000912]", + "Niue [GAZ:00006902]", + "Norfolk Island [GAZ:00005908]", + "North Korea [GAZ:00002801]", + "North Macedonia [GAZ:00006895]", + "North Sea [GAZ:00002284]", + "Northern Mariana Islands [GAZ:00003958]", + "Norway [GAZ:00002699]", + "Oman [GAZ:00005283]", + "Pakistan [GAZ:00005246]", + "Palau [GAZ:00006905]", + "Panama [GAZ:00002892]", + "Papua New Guinea [GAZ:00003922]", + "Paracel Islands [GAZ:00010832]", + "Paraguay [GAZ:00002933]", + "Peru [GAZ:00002932]", + "Philippines [GAZ:00004525]", + "Pitcairn Islands [GAZ:00005867]", + "Poland [GAZ:00002939]", + "Portugal [GAZ:00004126]", + "Puerto Rico [GAZ:00006935]", + "Qatar [GAZ:00005286]", + "Republic of the Congo [GAZ:00001088]", + "Reunion [GAZ:00003945]", + "Romania [GAZ:00002951]", + "Ross Sea [GAZ:00023304]", + "Russia [GAZ:00002721]", + "Rwanda [GAZ:00001087]", + "Saint Helena [GAZ:00000849]", + "Saint Kitts and Nevis [GAZ:00006906]", + "Saint Lucia [GAZ:00006909]", + "Saint Pierre and Miquelon [GAZ:00003942]", + "Saint Martin [GAZ:00005841]", + "Saint Vincent and the Grenadines [GAZ:02000565]", + "Samoa [GAZ:00006910]", + "San Marino [GAZ:00003102]", + "Sao Tome and Principe [GAZ:00006927]", + "Saudi Arabia [GAZ:00005279]", + "Senegal [GAZ:00000913]", + "Serbia [GAZ:00002957]", + "Seychelles [GAZ:00006922]", + "Sierra Leone [GAZ:00000914]", + "Singapore [GAZ:00003923]", + "Sint Maarten [GAZ:00012579]", + "Slovakia [GAZ:00002956]", + "Slovenia [GAZ:00002955]", + "Solomon Islands [GAZ:00005275]", + "Somalia [GAZ:00001104]", + "South Africa [GAZ:00001094]", + "South Georgia and the South Sandwich Islands [GAZ:00003990]", + "South Korea [GAZ:00002802]", + "South Sudan [GAZ:00233439]", + "Spain [GAZ:00003936]", + "Spratly Islands [GAZ:00010831]", + "Sri Lanka [GAZ:00003924]", + "State of Palestine [GAZ:00002475]", + "Sudan [GAZ:00000560]", + "Suriname [GAZ:00002525]", + "Svalbard [GAZ:00005396]", + "Swaziland [GAZ:00001099]", + "Sweden [GAZ:00002729]", + "Switzerland [GAZ:00002941]", + "Syria [GAZ:00002474]", + "Taiwan [GAZ:00005341]", + "Tajikistan [GAZ:00006912]", + "Tanzania [GAZ:00001103]", + "Thailand [GAZ:00003744]", + "Timor-Leste [GAZ:00006913]", + "Togo [GAZ:00000915]", + "Tokelau [GAZ:00260188]", + "Tonga [GAZ:00006916]", + "Trinidad and Tobago [GAZ:00003767]", + "Tromelin Island [GAZ:00005812]", + "Tunisia [GAZ:00000562]", + "Turkey [GAZ:00000558]", + "Turkmenistan [GAZ:00005018]", + "Turks and Caicos Islands [GAZ:00003955]", + "Tuvalu [GAZ:00009715]", + "USA [GAZ:00002459]", + "Uganda [GAZ:00001102]", + "Ukraine [GAZ:00002724]", + "United Arab Emirates [GAZ:00005282]", + "United Kingdom [GAZ:00002637]", + "Uruguay [GAZ:00002930]", + "Uzbekistan [GAZ:00004979]", + "Vanuatu [GAZ:00006918]", + "Venezuela [GAZ:00002931]", + "Viet Nam [GAZ:00003756]", + "Virgin Islands [GAZ:00003959]", + "Wake Island [GAZ:00007111]", + "Wallis and Futuna [GAZ:00007191]", + "West Bank [GAZ:00009572]", + "Western Sahara [GAZ:00000564]", + "Yemen [GAZ:00005284]", + "Zambia [GAZ:00001107]", + "Zimbabwe [GAZ:00001106]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001181", + "type": "string", + "description": "The country of origin of the sample.", + "examples": [ + "South Africa [GAZ:00001094]" + ], + "label": "Geo Loc Autonomic Country" + }, + "consensus_sequence_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/ncov123assembly.fasta" + ], + "ontology": "GENEPIO:0001462", + "type": "string", + "description": "The filepath of the consesnsus sequence file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Consensus sequence filepath" + }, + "number_of_base_pairs_sequenced": { + "examples": [ + "387566" + ], + "ontology": "GENEPIO:0001482", + "type": "string", + "description": "The number of total base pairs generated by the sequencing process.", + "clasification": "Bioinformatics and QC metrics", + "label": "Number of base pairs sequenced " + }, + "destination_of_most_recent_travel_country": { + "ontology": "GENEPIO:0001413", + "type": "string", + "description": "The name of the country that was the destination of most recent travel.", + "examples": [ + "United Kingdom" + ] + }, + "fast5_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001481", + "type": "string", + "description": "The filepath of the FAST5 file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Filepath fast5" + }, + "culture_collection": { + "ontology": "GENEPIO:0100284", + "type": "string", + "description": "The name of the source collection and unique culture identifier. ", + "examples": [ + "/culture_collection=\"ATCC:26370\"" + ] + }, + "was_phix_used?": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Was Phix Used" + }, + "number_of_samples_in_run": { + "examples": [ + "" + ], + "ontology": "KISAO_0000326", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "Number Of Samples In Run" + }, + "amplicon_size": { + "examples": [ + "300bp" + ], + "ontology": "GENEPIO:0001449", + "type": "string", + "description": "The length of the amplicon generated by PCR amplification.", + "clasification": "Sequencing", + "label": "Amplicon Size" + }, + "flowcell_kit": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Sequencing", + "labe": "Flowcell Kit" + }, + "sample_collector_contact_address": { + "examples": [ + "655 Lab St, Vancouver, British Columbia, V5N 2A2, Canada" + ], + "ontology": "GENEPIO:0001158", + "type": "string", + "description": "The mailing address of the agency submitting the sample.", + "clasification": "Sample collection and processing", + "label": "Originating Laboratory Address" + }, + "collection_protocol": { + "ontology": "GENEPIO:0001243", + "type": "string", + "description": "The name and version of a particular protocol used for sampling.", + "examples": [ + "SC2SamplingProtocol 1.2" + ], + "clasification": "Sample collection and processing", + "label": "Collection Protocol" + }, + "variant_evidence": { + "examples": [ + "lineage-defining mutations: ORF1ab (K1655N), Spike (K417N, E484K, N501Y, D614G, A701V), N (T205I), E (P71L)" + ], + "ontology": "GENEPIO:0001504", + "type": "string", + "description": "The evidence used to make the variant determination.", + "clasification": "Lineage and Variant information", + "label": "Variant evidence" + }, + "most_recent_travel_return_date": { + "examples": [ + "26/04/2020" + ], + "ontology": "GENEPIO:0001415", + "type": "string", + "description": "The date of a person's most recent return to some residence from a journey originating at that residence.", + "format": "date" + }, + "exposure_contact_level": { + "enum": [ + "Direct (human-to-human contact) [TRANS:0000001]", + "Indirect contact [GENEPIO:0100246]", + "Close contact (face-to-face contact) [GENEPIO:0100247]", + "Casual contact [GENEPIO:0100248]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001418", + "type": "string", + "description": "The exposure transmission contact type.", + "examples": [ + "Direct (human-to-human contact) [TRANS:0000001]" + ] + }, + "runID": { + "examples": [ + "" + ], + "ontology": "NCIT_C117058", + "type": "string", + "description": "", + "clasification": "Sequencing", + "label": "RunID" + }, + "sequencing_instrument_platform": { + "examples": [ + "MinIon" + ], + "ontology": "GENEPIO_0000071", + "type": "string", + "description": "The model of the sequencing instrument used.", + "label": "Sequencing Platforms " + }, + "prior_sars_cov_2_infection": { + "enum": [ + "Prior infection [GENEPIO:0100234]", + "No prior infection [GENEPIO:0100236]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001435", + "type": "string", + "description": "Whether there was prior SARS-CoV-2 infection.", + "examples": [ + "Prior infection [GENEPIO:0100234]" + ] + }, + "gisaid_virus_name": { + "ontology": "GENEPIO:0100282", + "type": "string", + "description": "The user-defined GISAID virus name assigned to the sequence.", + "examples": [ + "hCoV-19/Canada/prov_rona_99/2020" + ], + "clasification": "Database Identifiers", + "label": "GISAID Virus Name" + }, + "specimen_collector_sample_id": { + "examples": [ + "prov_rona_99" + ], + "ontology": "GENEPIO:0001123", + "type": "string", + "description": "The user-defined name for the sample.", + "clasification": "Database Identifiers", + "label": "Sample ID given by originating laboratory" + }, + "host_scientific_name": { + "enum": [ + "Bos taurus [NCBITaxon:9913]", + "Canis lupus familiaris [NCBITaxon:9615]", + "Chiroptera [NCBITaxon:9397]", + "Columbidae [NCBITaxon:8930]", + "Felis catus [NCBITaxon:9685]", + "Gallus gallus [NCBITaxon:9031]", + "Homo sapiens [NCBITaxon:9606]", + "Manis [NCBITaxon:9973]", + "Manis javanica [NCBITaxon:9974]", + "Neovison vison [NCBITaxon:452646]", + "Panthera leo [NCBITaxon:9689]", + "Panthera tigris [NCBITaxon:9694]", + "Rhinolophidae [NCBITaxon:58055]", + "Rhinolophus affinis [NCBITaxon:59477]", + "Sus scrofa domesticus [NCBITaxon:9825]", + "Viverridae [NCBITaxon:9673]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001387", + "type": "string", + "description": "The taxonomic, or scientific name of the host.", + "examples": [ + "Homo sapiens [NCBITaxon:9606]" + ], + "clasification": "Host information", + "label": "Host Scientific Name" + }, + "host_disease": { + "enum": [ + "COVID-19 [MONDO:0100096]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001391", + "type": "string", + "description": "The name of the disease experienced by the host.", + "examples": [ + "COVID-19 [MONDO:0100096]" + ] + }, + "passage_method": { + "ontology": "GENEPIO:0001264", + "type": "string", + "description": "Description of how organism was passaged.", + "examples": [ + "AVL buffer+30%EtOH lysate received from Respiratory Lab. P3 passage in Vero-1 via bioreactor large-scale batch passage. P3 batch derived from the SP-2/reference lab strain." + ], + "clasification": "Sample collection and processing", + "label": "Passage Method" + }, + "%qc_filtered": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "%qc filtered" + }, + "%reads_host": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "%reads host" + }, + "%reads_virus": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "%reads virus" + }, + "%unmapped": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "%unmapped" + }, + "%genome _greater_10x": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "% genome greater 10x" + }, + "mean_depth_of_coverage_value": { + "examples": [ + "" + ], + "ontology": "NCIT:C167285", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "mean depth of coverage value" + }, + "symptom_onset_date": { + "examples": [ + "16/03/2020" + ], + "ontology": "GENEPIO:0001399", + "type": "string", + "description": "The date on which the symptoms began or were first noted.", + "format": "date", + "label": "Symptom onset date" + }, + "r2 fastq filename": { + "examples": [ + "ABC123_S1_L001_R2_001.fastq.gz" + ], + "ontology": "GENEPIO:0001477", + "type": "string", + "description": "The user-specified filename of the r2 FASTQ file.", + "clasification": "Bioinformatics and QC metrics", + "label": "Sequence file R2 fastq" + }, + "prior_sars_cov_2_infection_date": { + "examples": [ + "23/01/2021" + ], + "ontology": "GENEPIO:0001437", + "type": "string", + "description": "The date of diagnosis of the prior SARS-CoV-2 infection.", + "format": "date" + }, + "exposure_event": { + "enum": [ + "Mass Gathering [GENEPIO:0100237]", + "Convention [GENEPIO:0100238]", + "Convocation [GENEPIO:0100239]", + "Agricultural Event [GENEPIO:0100240]", + "Religious Gathering [GENEPIO:0100241]", + "Mass [GENEPIO:0100242]", + "Social Gathering [PCO:0000033]", + "Baby Shower [PCO:0000039]", + "Community Event [PCO:0000034]", + "Family Gathering [GENEPIO:0100243]", + "Family Reunion [GENEPIO:0100244]", + "Funeral [GENEPIO:0100245]", + "Party [PCO:0000035]", + "Potluck [PCO:0000037]", + "Wedding [PCO:0000038]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001417", + "type": "string", + "description": "Event leading to exposure.", + "examples": [ + "Mass Gathering [GENEPIO:0100237]" + ] + }, + "sequencing_kit_number": { + "examples": [ + "AB456XYZ789" + ], + "ontology": "GENEPIO:0001455", + "type": "string", + "description": "The manufacturer's kit number.", + "clasification": "Sequencing", + "label": "Sequencing Kit Number" + }, + "purpose_of_sampling_details": { + "ontology": "GENEPIO:0001200", + "type": "string", + "description": "Further details pertaining to the reason the sample was collected.", + "examples": [ + "Screening of bat specimens in museum collections." + ] + }, + "host_gender": { + "enum": [ + "Female [NCIT:C46110]", + "Male [NCIT:C46109]", + "Non-binary Gender [GSSO:000132]", + "Transgender (assigned male at birth) [GSSO:004004]", + "Transgender (assigned female at birth) [GSSO:004005]", + "Undeclared [NCIT:C110959]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001395", + "type": "string", + "description": "The gender of the host at the time of sample collection.", + "examples": [ + "Male [NCIT:C46109]" + ], + "label": "Host Gender" + }, + "prior_sars_cov_2_antiviral_treatment_agent": { + "ontology": "GENEPIO:0001439", + "type": "string", + "description": "The name of the antiviral treatment agent administered during the prior SARS-CoV-2 infection.", + "examples": [ + "Remdesivir" + ] + }, + "diagnostic_pcr_protocol_1": { + "examples": [ + "PCREGene 2.0" + ], + "ontology": "GENEPIO:0001508", + "type": "string", + "description": "The name and version number of the protocol used for diagnostic marker amplification.", + "clasification": "Pathogen diagnostic testing", + "label": "Diagnostic Pcr Protocol 1" + }, + "diagnostic_pcr_protocol_2": { + "examples": [ + "PCRRdRpGene 3.0" + ], + "ontology": "GENEPIO:0001511", + "type": "string", + "description": "The name and version number of the protocol used for diagnostic marker amplification.", + "clasification": "Pathogen diagnostic testing", + "label": "Diagnostic Pcr Protocol 2" + }, + "host_specimen_voucher": { + "ontology": "GENEPIO:0100283", + "type": "string", + "description": "Identifier for the physical specimen.", + "examples": [ + "URI: http://portal.vertnet.org/o/fmnh/mammals?id=33e55cfe-330b-40d9-aaae-8d042cba7542 INSDC triplet: UAM:Mamm:52179" + ] + }, + "exposure_setting": { + "enum": [ + "Human Exposure [ECTO:3000005]", + "Contact with Known COVID-19 Case [GENEPIO:0100184]", + "Contact with Patient [GENEPIO:0100185]", + "Contact with Probable COVID-19 Case [GENEPIO:0100186]", + "Contact with Person with Acute Respiratory Illness [GENEPIO:0100187]", + "Contact with Person with Fever and/or Cough [GENEPIO:0100188]", + "Contact with Person who Recently Travelled [GENEPIO:0100189]", + "Occupational, Residency or Patronage Exposure [GENEPIO:0100190]", + "Abbatoir [ECTO:1000033]", + "Animal Rescue [GENEPIO:0100191]", + "Childcare [GENEPIO:0100192]", + "Daycare [GENEPIO:0100193]", + "Nursery [GENEPIO:0100194]", + "Community Service Centre [GENEPIO:0100195]", + "Correctional Facility [GENEPIO:0100196]", + "Dormitory [GENEPIO:0100197]", + "Farm [ECTO:1000034]", + "First Nations Reserve [GENEPIO:0100198]", + "Funeral Home [GENEPIO:0100199]", + "Group Home [GENEPIO:0100200]", + "Healthcare Setting [GENEPIO:0100201]", + "Ambulance [GENEPIO:0100202]", + "Acute Care Facility [GENEPIO:0100203]", + "Clinic [GENEPIO:0100204]", + "Community Health Centre [GENEPIO:0100205]", + "Hospital [ECTO:1000035]", + "Emergency Department [GENEPIO:0100206]", + "ICU [GENEPIO:0100207]", + "Ward [GENEPIO:0100208]", + "Laboratory [ECTO:1000036]", + "Long-Term Care Facility [GENEPIO:0100209]", + "Pharmacy [GENEPIO:0100210]", + "Physician's Office [GENEPIO:0100211]", + "Household [GENEPIO:0100212]", + "Insecure Housing (Homeless) [GENEPIO:0100213]", + "Occupational Exposure [GENEPIO:0100214]", + "Worksite [GENEPIO:0100215]", + "Office [ECTO:1000037]", + "Outdoors [GENEPIO:0100216]", + "Camp/camping [ECTO:5000009]", + "Hiking Trail [GENEPIO:0100217]", + "Hunting Ground [ECTO:6000030]", + "Ski Resort [GENEPIO:0100218]", + "Petting zoo [ECTO:5000008]", + "Place of Worship [GENEPIO:0100220]", + "Church [GENEPIO:0100221]", + "Mosque [GENEPIO:0100222]", + "Temple [GENEPIO:0100223]", + "Restaurant [ECTO:1000040]", + "Retail Store [ECTO:1000041]", + "School [GENEPIO:0100224]", + "Temporary Residence [GENEPIO:0100225]", + "Homeless Shelter [GENEPIO:0100226]", + "Hotel [GENEPIO:0100227]", + "Veterinary Care Clinic [GENEPIO:0100228]", + "Travel Exposure [GENEPIO:0100229]", + "Travelled on a Cruise Ship [GENEPIO:0100230]", + "Travelled on a Plane [GENEPIO:0100231]", + "Travelled on Ground Transport [GENEPIO:0100232]", + "Travelled outside Province/Territory [GENEPIO:0001118]", + "Travelled outside Canada [GENEPIO:0001119]", + "Other Exposure Setting [GENEPIO:0100235]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001428", + "type": "string", + "description": "The setting leading to exposure.", + "examples": [ + "Healthcare Setting [GENEPIO:0100201]" + ] + }, + "lineage_clade_name": { + "ontology": "GENEPIO:0001500", + "type": "string", + "description": "The name of the lineage or clade.", + "examples": [ + "B.1.1.7" + ] + }, + "gene_name_1": { + "enum": [ + "E gene (orf4) [GENEPIO:0100151]", + "M gene (orf5) [GENEPIO:0100152]", + "N gene (orf9) [GENEPIO:0100153]", + "Spike gene (orf2) [GENEPIO:0100154]", + "orf1ab (rep) [GENEPIO:0100155]", + "orf1a (pp1a) [GENEPIO:0100156]", + "nsp11 [GENEPIO:0100157]", + "nsp1 [GENEPIO:0100158]", + "nsp2 [GENEPIO:0100159]", + "nsp3 [GENEPIO:0100160]", + "nsp4 [GENEPIO:0100161]", + "nsp5 [GENEPIO:0100162]", + "nsp6 [GENEPIO:0100163]", + "nsp7 [GENEPIO:0100164]", + "nsp8 [GENEPIO:0100165]", + "nsp9 [GENEPIO:0100166]", + "nsp10 [GENEPIO:0100167]", + "RdRp gene (nsp12) [GENEPIO:0100168]", + "hel gene (nsp13) [GENEPIO:0100169]", + "exoN gene (nsp14) [GENEPIO:0100170]", + "nsp15 [GENEPIO:0100171]", + "nsp16 [GENEPIO:0100172]", + "orf3a [GENEPIO:0100173]", + "orf3b [GENEPIO:0100174]", + "orf6 (ns6) [GENEPIO:0100175]", + "orf7a [GENEPIO:0100176]", + "orf7b (ns7b) [GENEPIO:0100177]", + "orf8 (ns8) [GENEPIO:0100178]", + "orf9b [GENEPIO:0100179]", + "orf9c [GENEPIO:0100180]", + "orf10 [GENEPIO:0100181]", + "orf14 [GENEPIO:0100182]", + "SARS-COV-2 5' UTR [GENEPIO:0100183]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001507", + "type": "string", + "description": "The name of the gene used in the diagnostic RT-PCR test.", + "examples": [ + "E gene (orf4) [GENEPIO:0100151]" + ], + "classification": "Pathogen diagnostic testing", + "label": "Gene Name 1" + }, + "Protocol_SARS-CoV-2_detection": { + "examples": [ + "" + ], + "ontology": "CIDO_0020274", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "Protocol SARS-CoV-2 detection" + }, + "consensus_sequence_software_name": { + "examples": [ + "Ivar" + ], + "ontology": "GENEPIO:0001463", + "type": "string", + "description": "The name of software used to generate the consensus sequence.", + "clasification": "Bioinformatics and QC metrics", + "label": "Consensus sequence software name" + }, + "reference_genome_accession": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001485", + "type": "string", + "description": "", + "clasification": "Pathogen diagnostic testing", + "label": "reference genome accession" + }, + "host_residence_country": { + "enum": [ + "Afghanistan [GAZ:00006882]", + "Albania [GAZ:00002953]", + "Algeria [GAZ:00000563]", + "American Samoa [GAZ:00003957]", + "Andorra [GAZ:00002948]", + "Angola [GAZ:00001095]", + "Anguilla [GAZ:00009159]", + "Antarctica [GAZ:00000462]", + "Antigua and Barbuda [GAZ:00006883]", + "Argentina [GAZ:00002928]", + "Armenia [GAZ:00004094]", + "Aruba [GAZ:00004025]", + "Ashmore and Cartier Islands [GAZ:00005901]", + "Australia [GAZ:00000463]", + "Austria [GAZ:00002942]", + "Azerbaijan [GAZ:00004941]", + "Bahamas [GAZ:00002733]", + "Bahrain [GAZ:00005281]", + "Baker Island [GAZ:00007117]", + "Bangladesh [GAZ:00003750]", + "Barbados [GAZ:00001251]", + "Bassas da India [GAZ:00005810]", + "Belarus [GAZ:00006886]", + "Belgium [GAZ:00002938]", + "Belize [GAZ:00002934]", + "Benin [GAZ:00000904]", + "Bermuda [GAZ:00001264]", + "Bhutan [GAZ:00003920]", + "Bolivia [GAZ:00002511]", + "Borneo [GAZ:00025355]", + "Bosnia and Herzegovina [GAZ:00006887]", + "Botswana [GAZ:00001097]", + "Bouvet Island [GAZ:00001453]", + "Brazil [GAZ:00002828]", + "British Virgin Islands [GAZ:00003961]", + "Brunei [GAZ:00003901]", + "Bulgaria [GAZ:00002950]", + "Burkina Faso [GAZ:00000905]", + "Burundi [GAZ:00001090]", + "Cambodia [GAZ:00006888]", + "Cameroon [GAZ:00001093]", + "Canada [GAZ:00002560]", + "Cape Verde [GAZ:00001227]", + "Cayman Islands [GAZ:00003986]", + "Central African Republic [GAZ:00001089]", + "Chad [GAZ:00000586]", + "Chile [GAZ:00002825]", + "China [GAZ:00002845]", + "Christmas Island [GAZ:00005915]", + "Clipperton Island [GAZ:00005838]", + "Cocos Islands [GAZ:00009721]", + "Colombia [GAZ:00002929]", + "Comoros [GAZ:00005820]", + "Cook Islands [GAZ:00053798]", + "Coral Sea Islands [GAZ:00005917]", + "Costa Rica [GAZ:00002901]", + "Cote d'Ivoire [GAZ:00000906]", + "Croatia [GAZ:00002719]", + "Cuba [GAZ:00003762]", + "Curacao [GAZ:00012582]", + "Cyprus [GAZ:00004006]", + "Czech Republic [GAZ:00002954]", + "Democratic Republic of the Congo [GAZ:00001086]", + "Denmark [GAZ:00005852]", + "Djibouti [GAZ:00000582]", + "Dominica [GAZ:00006890]", + "Dominican Republic [GAZ:00003952]", + "Ecuador [GAZ:00002912]", + "Egypt [GAZ:00003934]", + "El Salvador [GAZ:00002935]", + "Equatorial Guinea [GAZ:00001091]", + "Eritrea [GAZ:00000581]", + "Estonia [GAZ:00002959]", + "Eswatini [GAZ:00001099]", + "Ethiopia [GAZ:00000567]", + "Europa Island [GAZ:00005811]", + "Falkland Islands (Islas Malvinas) [GAZ:00001412]", + "Faroe Islands [GAZ:00059206]", + "Fiji [GAZ:00006891]", + "Finland [GAZ:00002937]", + "France [GAZ:00003940]", + "French Guiana [GAZ:00002516]", + "French Polynesia [GAZ:00002918]", + "French Southern and Antarctic Lands [GAZ:00003753]", + "Gabon [GAZ:00001092]", + "Gambia [GAZ:00000907]", + "Gaza Strip [GAZ:00009571]", + "Georgia [GAZ:00004942]", + "Germany [GAZ:00002646]", + "Ghana [GAZ:00000908]", + "Gibraltar [GAZ:00003987]", + "Glorioso Islands [GAZ:00005808]", + "Greece [GAZ:00002945]", + "Greenland [GAZ:00001507]", + "Grenada [GAZ:02000573]", + "Guadeloupe [GAZ:00067142]", + "Guam [GAZ:00003706]", + "Guatemala [GAZ:00002936]", + "Guernsey [GAZ:00001550]", + "Guinea [GAZ:00000909]", + "Guinea-Bissau [GAZ:00000910]", + "Guyana [GAZ:00002522]", + "Haiti [GAZ:00003953]", + "Heard Island and McDonald Islands [GAZ:00009718]", + "Honduras [GAZ:00002894]", + "Hong Kong [GAZ:00003203]", + "Howland Island [GAZ:00007120]", + "Hungary [GAZ:00002952]", + "Iceland [GAZ:00000843]", + "India [GAZ:00002839]", + "Indonesia [GAZ:00003727]", + "Iran [GAZ:00004474]", + "Iraq [GAZ:00004483]", + "Ireland [GAZ:00002943]", + "Isle of Man [GAZ:00052477]", + "Israel [GAZ:00002476]", + "Italy [GAZ:00002650]", + "Jamaica [GAZ:00003781]", + "Jan Mayen [GAZ:00005853]", + "Japan [GAZ:00002747]", + "Jarvis Island [GAZ:00007118]", + "Jersey [GAZ:00001551]", + "Johnston Atoll [GAZ:00007114]", + "Jordan [GAZ:00002473]", + "Juan de Nova Island [GAZ:00005809]", + "Kazakhstan [GAZ:00004999]", + "Kenya [GAZ:00001101]", + "Kerguelen Archipelago [GAZ:00005682]", + "Kingman Reef [GAZ:00007116]", + "Kiribati [GAZ:00006894]", + "Kosovo [GAZ:00011337]", + "Kuwait [GAZ:00005285]", + "Kyrgyzstan [GAZ:00006893]", + "Laos [GAZ:00006889]", + "Latvia [GAZ:00002958]", + "Lebanon [GAZ:00002478]", + "Lesotho [GAZ:00001098]", + "Liberia [GAZ:00000911]", + "Libya [GAZ:00000566]", + "Liechtenstein [GAZ:00003858]", + "Line Islands [GAZ:00007144]", + "Lithuania [GAZ:00002960]", + "Luxembourg [GAZ:00002947]", + "Macau [GAZ:00003202]", + "Madagascar [GAZ:00001108]", + "Malawi [GAZ:00001105]", + "Malaysia [GAZ:00003902]", + "Maldives [GAZ:00006924]", + "Mali [GAZ:00000584]", + "Malta [GAZ:00004017]", + "Marshall Islands [GAZ:00007161]", + "Martinique [GAZ:00067143]", + "Mauritania [GAZ:00000583]", + "Mauritius [GAZ:00003745]", + "Mayotte [GAZ:00003943]", + "Mexico [GAZ:00002852]", + "Micronesia [GAZ:00005862]", + "Midway Islands [GAZ:00007112]", + "Moldova [GAZ:00003897]", + "Monaco [GAZ:00003857]", + "Mongolia [GAZ:00008744]", + "Montenegro [GAZ:00006898]", + "Montserrat [GAZ:00003988]", + "Morocco [GAZ:00000565]", + "Mozambique [GAZ:00001100]", + "Myanmar [GAZ:00006899]", + "Namibia [GAZ:00001096]", + "Nauru [GAZ:00006900]", + "Navassa Island [GAZ:00007119]", + "Nepal [GAZ:00004399]", + "Netherlands [GAZ:00002946]", + "New Caledonia [GAZ:00005206]", + "New Zealand [GAZ:00000469]", + "Nicaragua [GAZ:00002978]", + "Niger [GAZ:00000585]", + "Nigeria [GAZ:00000912]", + "Niue [GAZ:00006902]", + "Norfolk Island [GAZ:00005908]", + "North Korea [GAZ:00002801]", + "North Macedonia [GAZ:00006895]", + "North Sea [GAZ:00002284]", + "Northern Mariana Islands [GAZ:00003958]", + "Norway [GAZ:00002699]", + "Oman [GAZ:00005283]", + "Pakistan [GAZ:00005246]", + "Palau [GAZ:00006905]", + "Panama [GAZ:00002892]", + "Papua New Guinea [GAZ:00003922]", + "Paracel Islands [GAZ:00010832]", + "Paraguay [GAZ:00002933]", + "Peru [GAZ:00002932]", + "Philippines [GAZ:00004525]", + "Pitcairn Islands [GAZ:00005867]", + "Poland [GAZ:00002939]", + "Portugal [GAZ:00004126]", + "Puerto Rico [GAZ:00006935]", + "Qatar [GAZ:00005286]", + "Republic of the Congo [GAZ:00001088]", + "Reunion [GAZ:00003945]", + "Romania [GAZ:00002951]", + "Ross Sea [GAZ:00023304]", + "Russia [GAZ:00002721]", + "Rwanda [GAZ:00001087]", + "Saint Helena [GAZ:00000849]", + "Saint Kitts and Nevis [GAZ:00006906]", + "Saint Lucia [GAZ:00006909]", + "Saint Pierre and Miquelon [GAZ:00003942]", + "Saint Martin [GAZ:00005841]", + "Saint Vincent and the Grenadines [GAZ:02000565]", + "Samoa [GAZ:00006910]", + "San Marino [GAZ:00003102]", + "Sao Tome and Principe [GAZ:00006927]", + "Saudi Arabia [GAZ:00005279]", + "Senegal [GAZ:00000913]", + "Serbia [GAZ:00002957]", + "Seychelles [GAZ:00006922]", + "Sierra Leone [GAZ:00000914]", + "Singapore [GAZ:00003923]", + "Sint Maarten [GAZ:00012579]", + "Slovakia [GAZ:00002956]", + "Slovenia [GAZ:00002955]", + "Solomon Islands [GAZ:00005275]", + "Somalia [GAZ:00001104]", + "South Africa [GAZ:00001094]", + "South Georgia and the South Sandwich Islands [GAZ:00003990]", + "South Korea [GAZ:00002802]", + "South Sudan [GAZ:00233439]", + "Spain [GAZ:00003936]", + "Spratly Islands [GAZ:00010831]", + "Sri Lanka [GAZ:00003924]", + "State of Palestine [GAZ:00002475]", + "Sudan [GAZ:00000560]", + "Suriname [GAZ:00002525]", + "Svalbard [GAZ:00005396]", + "Swaziland [GAZ:00001099]", + "Sweden [GAZ:00002729]", + "Switzerland [GAZ:00002941]", + "Syria [GAZ:00002474]", + "Taiwan [GAZ:00005341]", + "Tajikistan [GAZ:00006912]", + "Tanzania [GAZ:00001103]", + "Thailand [GAZ:00003744]", + "Timor-Leste [GAZ:00006913]", + "Togo [GAZ:00000915]", + "Tokelau [GAZ:00260188]", + "Tonga [GAZ:00006916]", + "Trinidad and Tobago [GAZ:00003767]", + "Tromelin Island [GAZ:00005812]", + "Tunisia [GAZ:00000562]", + "Turkey [GAZ:00000558]", + "Turkmenistan [GAZ:00005018]", + "Turks and Caicos Islands [GAZ:00003955]", + "Tuvalu [GAZ:00009715]", + "USA [GAZ:00002459]", + "Uganda [GAZ:00001102]", + "Ukraine [GAZ:00002724]", + "United Arab Emirates [GAZ:00005282]", + "United Kingdom [GAZ:00002637]", + "Uruguay [GAZ:00002930]", + "Uzbekistan [GAZ:00004979]", + "Vanuatu [GAZ:00006918]", + "Venezuela [GAZ:00002931]", + "Viet Nam [GAZ:00003756]", + "Virgin Islands [GAZ:00003959]", + "Wake Island [GAZ:00007111]", + "Wallis and Futuna [GAZ:00007191]", + "West Bank [GAZ:00009572]", + "Western Sahara [GAZ:00000564]", + "Yemen [GAZ:00005284]", + "Zambia [GAZ:00001107]", + "Zimbabwe [GAZ:00001106]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001396", + "type": "string", + "description": "The country where the host resides.", + "examples": [ + "South Africa [GAZ:00001094]" + ], + "label": "Host residence country" + }, + "travel_history": { + "ontology": "GENEPIO:0001416", + "type": "string", + "description": "Travel history in last six months.", + "examples": [ + "Canada, Vancouver, USA, Seattle, Italy, Milan" + ] + }, + "lineage/clade_analysis_software_version": { + "examples": [ + "2.1.10" + ], + "ontology": "GENEPIO:0001502", + "type": "string", + "description": "The version of the software used to determine the lineage/clade.", + "clasification": "Lineage and Variant information", + "label": "Lineage/clade analysis software version" + }, + "complications": { + "enum": [ + "Abnormal blood oxygen level [HP:0500165]", + "Acute kidney injury [HP:0001919]", + "Acute lung injury [MONDO:0015796]", + "Ventilation induced lung injury (VILI) [GENEPIO:0100092]", + "Acute respiratory failure [MONDO:0001208]", + "Arrhythmia (complication) [HP:0011675]", + "Tachycardia [HP:0001649]", + "Polymorphic ventricular tachycardia (VT) [HP:0031677]", + "Tachyarrhythmia [GENEPIO:0100084]", + "Noncardiogenic pulmonary edema [GENEPIO:0100085]", + "Acute respiratory distress syndrome (ARDS) [HP:0033677]", + "COVID-19 associated ARDS (CARDS) [NCIT:C171551]", + "Neurogenic pulmonary edema (NPE) [GENEPIO:0100086]", + "Cardiac injury [GENEPIO:0100087]", + "Cardiac arrest [HP:0001695]", + "Cardiogenic shock [HP:0030149]", + "Blood clot [HP:0001977]", + "Arterial clot [HP:0004420]", + "Deep vein thrombosis (DVT) [HP:0002625]", + "Pulmonary embolism (PE) [HP:0002204]", + "Cardiomyopathy [HP:0001638]", + "Central nervous system invasion [MONDO:0024619]", + "Stroke (complication) [HP:0001297]", + "Central Nervous System Vasculitis [MONDO:0003346]", + "Acute ischemic stroke [HP:0002140]", + "Coma [HP:0001259]", + "Convulsions [HP:0011097]", + "COVID-19 associated coagulopathy (CAC) [NCIT:C171562]", + "Cystic fibrosis [MONDO:0009061]", + "Cytokine release syndrome [MONDO:0600008]", + "Disseminated intravascular coagulation (DIC) [MPATH:108]", + "Encephalopathy [HP:0001298]", + "Fulminant myocarditis [GENEPIO:0100088]", + "Guillain-Barr\u00e9 syndrome [MONDO:0016218]", + "Internal hemorrhage (complication; internal bleeding) [HP:0011029]", + "Intracerebral haemorrhage [MONDO:0013792]", + "Kawasaki disease [MONDO:0012727]", + "Complete Kawasaki disease [GENEPIO:0100089]", + "Incomplete Kawasaki disease [GENEPIO:0100090]", + "Liver dysfunction [HP:0001410]", + "Acute liver injury [GENEPIO:0100091]", + "Long COVID-19 [MONDO:0100233]", + "Meningitis [HP:0001287]", + "Migraine [HP:0002076]", + "Miscarriage [HP:0005268]", + "Multisystem inflammatory syndrome in children (MIS-C) [MONDO:0100163]", + "Muscle injury [GENEPIO:0100093]", + "Myalgic encephalomyelitis (chronic fatigue syndrome) [MONDO:0005404]", + "Myocardial infarction (heart attack) [MONDO:0005068]", + "Acute myocardial infarction [MONDO:0004781]", + "ST-segment elevation myocardial infarction [MONDO:0041656]", + "Myocardial injury [HP:0001700]", + "Neonatal complications [NCIT:C168498]", + "Organ failure [GENEPIO:0100094]", + "Heart failure [HP:0001635]", + "Liver failure [MONDO:0100192]", + "Paralysis [HP:0003470]", + "Pneumothorax (collapsed lung) [HP:0002107]", + "Spontaneous pneumothorax [HP:0002108]", + "Spontaneous tension pneymothorax [MONDO:0002075]", + "Pneumonia (complication) [HP:0002090]", + "COVID-19 pneumonia [NCIT:C171550]", + "Pregancy complications [HP:0001197]", + "Rhabdomyolysis [HP:0003201]", + "Secondary infection [IDO:0000567]", + "Secondary staph infection [GENEPIO:0100095]", + "Secondary strep infection [GENEPIO:0100096]", + "Seizure (complication) [HP:0001250]", + "Motor seizure [HP:0020219]", + "Sepsis/Septicemia [HP:0100806]", + "Sepsis (systemic inflammatory response to infection) [IDO:0000636]", + "Septicemia (bloodstream infection) [NCIT:C3364]", + "Shock [HP:0031273]", + "Hyperinflammatory shock [GENEPIO:0100097]", + "Refractory cardiogenic shock [GENEPIO:0100098]", + "Refractory cardiogenic plus vasoplegic shock [GENEPIO:0100099]", + "Septic shock [NCIT:C35018]", + "Vasculitis [HP:0002633]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001402", + "type": "string", + "description": "Patient medical complications that are believed to have occurred as a result of host disease.", + "examples": [ + "Acute respiratory failure [MONDO:0001208]" + ] + }, + "geo_loc_name_city": { + "ontology": "GENEPIO:0001189", + "type": "string", + "description": "The city of origin of the sample.", + "examples": [ + "Vancouver" + ], + "clasification": "Sample collection and processing", + "label": "Geo Loc City" + }, + "bioproject umbrella accession": { + "examples": [ + "PRJNA623807" + ], + "ontology": "GENEPIO:0001133", + "type": "string", + "description": "The INSDC umbrella accession number of the BioProject to which the BioSample belongs.", + "clasification": "Database Identifiers", + "label": "umbrella bioproject accession" + }, + "biomaterial_extracted": { + "enum": [ + "mRNA (cDNA) [OBI:0002754]", + "RNA (Total) [OBI:0000895]", + "RNA (Poly-A) [OBI:0000869]", + "RNA (Ribo-Depleted) [OBI:0002627]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001266", + "type": "string", + "description": "The biomaterial extracted from samples for the purpose of sequencing.", + "examples": [ + "RNA (Total) [OBI:0000895]" + ], + "classification": "Sample collection and processing", + "label": "Biomaterial Extracted" + }, + "host_age_unit": { + "enum": [ + "months [UO:0000035]", + "years [UO:0000036]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001393", + "type": "string", + "description": "The units used to measure the host's age.", + "examples": [ + "years [UO:0000036]" + ] + }, + "geo_loc_latitude": { + "ontology": "OBI:0001620", + "type": "string", + "description": "The latitude coordinates of the geographical location of sample collection.", + "examples": [ + "38.98 N" + ], + "clasification": "Sample collection and processing", + "label": "Geo Loc Latitude" + }, + "environmental_material": { + "enum": [ + "Air vent [ENVO:03501208]", + "Banknote [ENVO:00003896]", + "Bed rail [ENVO:03501209]", + "Building Floor [ENVO:01000486]", + "Cloth [ENVO:02000058]", + "Control Panel [ENVO:03501210]", + "Door [ENVO:03501220]", + "Door Handle [ENVO:03501211]", + "Face Mask [OBI:0002787]", + "Face Shield [OBI:0002791]", + "Food [FOODON:00002403]", + "Food Packaging [FOODON:03490100]", + "Glass [ENVO:01000481]", + "Handrail [ENVO:03501212]", + "Hospital Gown [OBI:0002796]", + "Light Switch [ENVO:03501213]", + "Locker [ENVO:03501214]", + "N95 Mask [OBI:0002790]", + "Nurse Call Button [ENVO:03501215]", + "Paper [ENVO:03501256]", + "Particulate Matter [ENVO:01000060]", + "Plastic [ENVO:01000404]", + "PPE Gown [GENEPIO:0100025]", + "Sewage [ENVO:00002018]", + "Sink [ENVO:01000990]", + "Soil [ENVO:00001998]", + "Stainless Steel [ENVO:03501216]", + "Tissue Paper [ENVO:03501217]", + "Toilet Bowl [ENVO:03501218]", + "Water [ENVO:00002006]", + "Wastewater [ENVO:00002001]", + "Window [ENVO:03501219]", + "Wood [ENVO:00002040]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001223", + "type": "string", + "description": "A substance obtained from the natural or man-made environment e.g. soil, water, sewage, door handle, bed handrail, face mask.", + "examples": [ + "Face Mask [OBI:0002787]" + ], + "clasification": "Sample collection and processing", + "label": "Environmental Material" + }, + "sequence_submitter_contact_email": { + "examples": [ + "RespLab@lab.ca" + ], + "ontology": "GENEPIO:0001165", + "type": "string", + "description": "The email address of the contact responsible for follow-up regarding the sequence.", + "clasification": "Sample collection and processing", + "label": "Submitting Institution Email" + }, + "environmental_site": { + "enum": [ + "Acute care facility [ENVO:03501135]", + "Animal house [ENVO:00003040]", + "Bathroom [ENVO:01000422]", + "Clinical assessment centre [ENVO:03501136]", + "Conference venue [ENVO:03501127]", + "Corridor [ENVO:03501121]", + "Daycare [ENVO:01000927]", + "Emergency room (ER) [ENVO:03501145]", + "Family practice clinic [ENVO:03501186]", + "Group home [ENVO:03501196]", + "Homeless shelter [ENVO:03501133]", + "Hospital [ENVO:00002173]", + "Intensive Care Unit (ICU) [ENVO:03501152]", + "Long Term Care Facility [ENVO:03501194]", + "Patient room [ENVO:03501180]", + "Prison [ENVO:03501204]", + "Production Facility [ENVO:01000536]", + "School [ENVO:03501130]", + "Sewage Plant [ENVO:00003043]", + "Subway train [ENVO:03501109]", + "Wet market [ENVO:03501198]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001232", + "type": "string", + "description": "An environmental location may describe a site in the natural or built environment e.g. hospital, wet market, bat cave.", + "examples": [ + "Hospital [ENVO:00002173]" + ], + "classification": "Sample collection and processing", + "label": "Anthropogenic Geographic Feature" + }, + "pre_existing_conditions_and_risk_factors": { + "enum": [ + "Age 60+ [VO:0004925]", + "Anemia [HP:0001903]", + "Anorexia [HP:0002039]", + "Birthing labor [NCIT:C92743]", + "Bone marrow failure [NCIT:C80693]", + "Cancer [MONDO:0004992]", + "Breast cancer [MONDO:0007254]", + "Colorectal cancer [MONDO:0005575]", + "Hematologic malignancy [DOID:2531]", + "Lung cancer [MONDO:0008903]", + "Metastatic disease [MONDO:0024880]", + "Cancer treatment [NCIT:C16212]", + "Cancer surgery [NCIT:C157740]", + "Chemotherapy [NCIT:C15632]", + "Adjuvant chemotherapy [NCIT:C15360]", + "Cardiac disorder [NCIT:C3079]", + "Arrhythmia [HP:0011675]", + "Cardiac disease [MONDO:0005267]", + "Cardiomyopathy [HP:0001638]", + "Cardiac injury [GENEPIO:0100074]", + "Hypertension (high blood pressure) [HP:0000822]", + "Hypotension (low blood pressure) [HP:0002615]", + "Cesarean section [HP:0011410]", + "Chronic cough [GENEPIO:0100075]", + "Chronic gastrointestinal disease [GENEPIO:0100076]", + "Chronic lung disease [HP:0006528]", + "Corticosteroids [NCIT:C211]", + "Diabetes mellitus (diabetes) [HP:0000819]", + "Type I diabetes mellitus (T1D) [HP:0100651]", + "Type II diabetes mellitus (T2D) [HP:0005978]", + "Eczema [HP:0000964]", + "Electrolyte disturbance [HP:0003111]", + "Hypocalcemia [HP:0002901]", + "Hypokalemia [HP:0002900]", + "Hypomagnesemia [HP:0002917]", + "Encephalitis (brain inflammation) [HP:0002383]", + "Epilepsy [MONDO:0005027]", + "Hemodialysis [NCIT:C15248]", + "Hemoglobinopathy [MONDO:0044348]", + "Human immunodeficiency virus (HIV) [MONDO:0005109]", + "Acquired immunodeficiency syndrome (AIDS) [MONDO:0012268]", + "HIV and antiretroviral therapy (ART) [NCIT:C16118]", + "Immunocompromised [NCIT:C14139]", + "Lupus [MONDO:0004670]", + "Inflammatory bowel disease (IBD) [MONDO:0005265]", + "Colitis [HP:0002583]", + "Ulcerative colitis [HP:0100279]", + "Crohn's disease [HP:0100280]", + "Renal disorder [NCIT:C3149]", + "Renal disease [MONDO:0005240]", + "Chronic renal disease [HP:0012622]", + "Renal failure [HP:0000083]", + "Liver disease [MONDO:0005154]", + "Chronic liver disease [NCIT:C113609]", + "Fatty liver disease (FLD) [HP:0001397]", + "Myalgia (muscle pain) [HP:0003326]", + "Myalgic encephalomyelitis (chronic fatigue syndrome) [MONDO:0005404]", + "Neurological disorder [MONDO:0005071]", + "Neuromuscular disorder [MONDO:0019056]", + "Obesity [HP:0001513]", + "Severe obesity [MONDO:0005139]", + "Respiratory disorder [MONDO:0005087]", + "Asthma [HP:0002099]", + "Chronic bronchitis [HP:0004469]", + "Chronic pulmonary disease [HP:0006528]", + "Chronic obstructive pulmonary disease [HP:0006510]", + "Emphysema [HP:0002097]", + "Lung disease [MONDO:0005275]", + "Chronic lung disease [HP:0006528]", + "Pulmonary fibrosis [HP:0002206]", + "Pneumonia [HP:0002090]", + "Respiratory failure [HP:0002878]", + "Adult respiratory distress syndrome [HP:0033677]", + "Newborn respiratory distress syndrome [MONDO:0009971]", + "Tuberculosis [MONDO:0018076]", + "Postpartum (\u22646 weeks) [GENEPIO:0100077]", + "Pregnancy [NCIT:C25742]", + "Rheumatic disease [MONDO:0005554]", + "Sickle cell disease [MONDO:0011382]", + "Substance use [NBO:0001845]", + "Alcohol abuse [MONDO:0002046]", + "Drug abuse [GENEPIO:0100078]", + "Injection drug abuse [GENEPIO:0100079]", + "Smoking [NBO:0015005]", + "Vaping [NCIT:C173621]", + "Tachypnea (accelerated respiratory rate) [HP:0002789]", + "Transplant [NCIT:C159659]", + "Cardiac transplant [GENEPIO:0100080]", + "Hematopoietic stem cell transplant (bone marrow transplant) [NCIT:C131759]", + "Kidney transplant [NCIT:C157332]", + "Liver transplant [GENEPIO:0100081]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001401", + "type": "string", + "description": "Patient pre-existing conditions and risk factors.\nPre-existing condition: A medical condition that existed prior to the current infection.\nRisk Factor: A variable associated with an increased risk of disease or infection.", + "examples": [ + "Asthma [HP:0002099]" + ] + }, + "first_dose_vaccination_date": { + "examples": [ + "26/02/2021" + ], + "ontology": "GENEPIO:0001407", + "type": "string", + "description": "The date the host was first vaccinated.", + "format": "date" + }, + "variant_designation": { + "enum": [ + "Variant of Interest (VOI) [GENEPIO:0100082]", + "Variant of Concern (VOC) [GENEPIO:0100083]", + "Variant Under Monitoring (VUM) [GENEPIO:0100279]" + ], + "ontology": "GENEPIO:0001503", + "type": "string", + "description": "The variant classification of the lineage/clade i.e. variant, variant of concern.", + "examples": [ + "Variant of Concern (VOC) [GENEPIO:0100083]" + ], + "classification": "Lineage and Variant information", + "label": "Variant designation" + }, + "variant_calling": { + "examples": [ + "" + ], + "ontology": "operation_3227", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Variant Calling" + }, + "if_variant_calling_other": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "If variant calling Is Other, Specify" + }, + "variant_calling_params": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "clasification": "Bioinformatics and QC metrics", + "label": "Variant Calling params" + }, + "sample_collected_in_quarantine": { + "enum": [ + "Yes [NCIT:C49488]", + "No [NCIT:C49487]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0100277", + "type": "string", + "description": "Whether the sample was collected from an individual in quarantine.", + "examples": [ + "Yes [NCIT:C49488]" + ], + "label": "Sample collected in quarantine" + }, + "sample_collected_by": { + "examples": [ + "Public Health Agency of Canada" + ], + "ontology": "GENEPIO:0001153", + "type": "string", + "description": "The name of the agency that collected the original sample.", + "label": "Originating Laboratory" + }, + "sequencing_instrument": { + "enum": [ + "Illumina sequencing instrument [GENEPIO:0100105]", + "Illumina Genome Analyzer [GENEPIO:0100106]", + "Illumina Genome Analyzer II [GENEPIO:0100107]", + "Illumina Genome Analyzer IIx [GENEPIO:0100108]", + "Illumina HiScanSQ [GENEPIO:0100109]", + "Illumina HiSeq [GENEPIO:0100110]", + "Illumina HiSeq X [GENEPIO:0100111]", + "Illumina HiSeq X Five [GENEPIO:0100112]", + "Illumina HiSeq X Ten [GENEPIO:0100113]", + "Illumina HiSeq 1000 [GENEPIO:0100114]", + "Illumina HiSeq 1500 [GENEPIO:0100115]", + "Illumina HiSeq 2000 [GENEPIO:0100116]", + "Illumina HiSeq 2500 [GENEPIO:0100117]", + "Illumina HiSeq 3000 [GENEPIO:0100118]", + "Illumina HiSeq 4000 [GENEPIO:0100119]", + "Illumina iSeq [GENEPIO:0100120]", + "Illumina iSeq 100 [GENEPIO:0100121]", + "Illumina NovaSeq [GENEPIO:0100122]", + "Illumina NovaSeq 6000 [GENEPIO:0100123]", + "Illumina MiniSeq [GENEPIO:0100124]", + "Illumina MiSeq [GENEPIO:0100125]", + "Illumina NextSeq [GENEPIO:0100126]", + "Illumina NextSeq 500 [GENEPIO:0100127]", + "Illumina NextSeq 550 [GENEPIO:0100128]", + "Illumina NextSeq 2000 [GENEPIO:0100129]", + "Pacific Biosciences sequencing instrument [GENEPIO:0100130]", + "PacBio RS [GENEPIO:0100131]", + "PacBio RS II [GENEPIO:0100132]", + "PacBio Sequel [GENEPIO:0100133]", + "PacBio Sequel II [GENEPIO:0100134]", + "Ion Torrent sequencing instrument [GENEPIO:0100135]", + "Ion Torrent PGM [GENEPIO:0100136]", + "Ion Torrent Proton [GENEPIO:0100137]", + "Ion Torrent S5 XL [GENEPIO:0100138]", + "Ion Torrent S5 [GENEPIO:0100139]", + "Oxford Nanopore sequencing instrument [GENEPIO:0100140]", + "Oxford Nanopore GridION [GENEPIO:0100141]", + "Oxford Nanopore MinION [GENEPIO:0100142]", + "Oxford Nanopore PromethION [GENEPIO:0100143]", + "BGI Genomics sequencing instrument [GENEPIO:0100144]", + "BGI SEQ-500 [GENEPIO:0100145]", + "MGI sequencing instrument [GENEPIO:0100146]", + "MGI DNBSEQ-T7 [GENEPIO:0100147]", + "MGI DNBSEQ-G400 [GENEPIO:0100148]", + "MGI DNBSEQ-G400RS FAST [GENEPIO:0100149]", + "MGI DNBSEQ-G50 [GENEPIO:0100150]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001452", + "type": "string", + "description": "The model of the sequencing instrument used.", + "examples": [ + "Oxford Nanopore MinION [GENEPIO:0100142]" + ], + "classification": "Sequencing", + "label": "Sequencing Instrument Model" + }, + "host_health_status_details": { + "enum": [ + "Hospitalized [NCIT:C25179]", + "Hospitalized (Non-ICU) [GENEPIO:0100045]", + "Hospitalized (ICU) [GENEPIO:0100046]", + "Mechanical Ventilation [NCIT:C70909]", + "Medically Isolated [GENEPIO:0100047]", + "Medically Isolated (Negative Pressure) [GENEPIO:0100048]", + "Self-quarantining [NCIT:C173768]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001389", + "type": "string", + "description": "Further details pertaining to the health or disease status of the host at time of collection.", + "examples": [ + "Hospitalized (ICU) [GENEPIO:0100046]" + ], + "label": "Host health status details" + }, + "isolate": { + "ontology": "GENEPIO:0001644", + "type": "string", + "description": "Identifier of the specific isolate.", + "examples": [ + "SARS-CoV-2/human/USA/CA-CDPH-001/2020" + ], + "clasification": "Sample collection and processing", + "label": "Sample ID given by originating laboratory" + }, + "host_health_state": { + "enum": [ + "Asymptomatic [NCIT:C3833]", + "Deceased [NCIT:C28554]", + "Healthy [NCIT:C115935]", + "Recovered [NCIT:C49498]", + "Symptomatic [NCIT:C25269]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001388", + "type": "string", + "description": "Health status of the host at the time of sample collection.", + "examples": [ + "Asymptomatic [NCIT:C3833]" + ] + }, + "signs_and_symptoms": { + "enum": [ + "Abnormal lung auscultation [HP:0030829]", + "Ageusia (complete loss of taste) [HP:0041051]", + "Parageusia (distorted sense of taste) [HP:0031249]", + "Hypogeusia (reduced sense of taste) [HP:0000224]", + "Abnormality of the sense of smell [HP:0004408]", + "Anosmia (lost sense of smell) [HP:0000458]", + "Hyposmia (reduced sense of smell) [HP:0004409]", + "Acute Respiratory Distress Syndrome [HP:0033677]", + "Altered mental status [HP:0011446]", + "Arrhythmia [HP:0011675]", + "Cognitive impairment [HP:0100543]", + "Coma [HP:0001259]", + "Confusion [HP:0001289]", + "Delirium (sudden severe confusion) [HP:0031258]", + "Inability to arouse (inability to stay awake) [GENEPIO:0100061]", + "Irritability [HP:0000737]", + "Loss of speech [HP:0002371]", + "Asthenia (generalized weakness) [HP:0025406]", + "Chest tightness or pressure [HP:0031352]", + "Rigors (fever shakes) [HP:0025145]", + "Chills (sudden cold sensation) [HP:0025143]", + "Conjunctival injection [HP:0030953]", + "Conjunctivitis (pink eye) [HP:0000509]", + "Coryza [MP:0001867]", + "Cough [HP:0012735]", + "Nonproductive cough (dry cough) [HP:0031246]", + "Productive cough (wet cough) [HP:0031245]", + "Cyanosis (blueish skin discolouration) [HP:0000961]", + "Acrocyanosis [HP:0001063]", + "Circumoral cyanosis (bluish around mouth) [HP:0032556]", + "Cyanotic face (bluish face) [GENEPIO:0100062]", + "Central Cyanosis [GENEPIO:0100063]", + "Cyanotic lips (bluish lips) [GENEPIO:0100064]", + "Peripheral Cyanosis [GENEPIO:0100065]", + "Dyspnea (breathing difficulty) [HP:0002094]", + "Diarrhea (watery stool) [HP:0002014]", + "Dry gangrene [MP:0031127]", + "Encephalitis (brain inflammation) [HP:0002383]", + "Encephalopathy [HP:0001298]", + "Fatigue (tiredness) [HP:0012378]", + "Fever [HP:0001945]", + "Fever (>=38\u00baC) [GENEPIO:0100066]", + "Glossitis (inflammation of the tongue) [HP:0000206]", + "Ground Glass Opacities (GGO) [GENEPIO:0100067]", + "Headache [HP:0002315]", + "Hemoptysis (coughing up blood) [HP:0002105]", + "Hypocapnia [HP:0012417]", + "Hypotension (low blood pressure) [HP:0002615]", + "Hypoxemia (low blood oxygen) [HP:0012418]", + "Silent hypoxemia [GENEPIO:0100068]", + "Internal hemorrhage (internal bleeding) [HP:0011029]", + "Loss of Fine Movements [NCIT:C121416]", + "Low appetite [HP:0004396]", + "Malaise (general discomfort/unease) [HP:0033834]", + "Meningismus/nuchal rigidity [HP:0031179]", + "Muscle weakness [HP:0001324]", + "Nasal obstruction (stuffy nose) [HP:0001742]", + "Nausea [HP:0002018]", + "Nose bleed [HP:0000421]", + "Otitis [GENEPIO:0100069]", + "Pain [HP:0012531]", + "Abdominal pain [HP:0002027]", + "Arthralgia (painful joints) [HP:0002829]", + "Chest pain [HP:0100749]", + "Pleuritic chest pain [HP:0033771]", + "Myalgia (muscle pain) [HP:0003326]", + "Pharyngitis (sore throat) [HP:0025439]", + "Pharyngeal exudate [GENEPIO:0100070]", + "Pleural effusion [HP:0002202]", + "Pneumonia [HP:0002090]", + "Prostration [GENEPIO:0100071]", + "Pseudo-chilblains [HP:0033696]", + "Pseudo-chilblains on fingers (covid fingers) [GENEPIO:0100072]", + "Pseudo-chilblains on toes (covid toes) [GENEPIO:0100073]", + "Rash [HP:0000988]", + "Rhinorrhea (runny nose) [HP:0031417]", + "Seizure [HP:0001250]", + "Motor seizure [HP:0020219]", + "Shivering (involuntary muscle twitching) [HP:0025144]", + "Slurred speech [HP:0001350]", + "Sneezing [HP:0025095]", + "Sputum Production [HP:0033709]", + "Stroke [HP:0001297]", + "Swollen Lymph Nodes [HP:0002716]", + "Tachypnea (accelerated respiratory rate) [HP:0002789]", + "Vertigo (dizziness) [HP:0002321]", + "Vomiting (throwing up) [HP:0002013]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001400", + "type": "string", + "description": "A perceived change in function or sensation, (loss, disturbance or appearance) indicative of a disease, reported by a patient.", + "examples": [ + "Cough [HP:0012735], Fever [HP:0001945], Rigors (fever shakes) [HP:0025145]" + ] + }, + "dehosting_method": { + "examples": [ + "Nanostripper" + ], + "ontology": "GENEPIO:0001459", + "type": "string", + "description": "The method used to remove host reads from the pathogen sequence.", + "clasification": "Bioinformatics and QC metrics", + "label": "Dehosting Method" + }, + "amplicon_pcr_primer_scheme": { + "examples": [ + "https://github.com/joshquick/artic-ncov2019/blob/master/primer_schemes/nCoV-2019/V3/nCoV-2019.tsv" + ], + "ontology": "GENEPIO:0001456", + "type": "string", + "description": "The specifications of the primers (primer sequences, binding positions, fragment size generated etc) used to generate the amplicons to be sequenced.", + "clasification": "Sequencing", + "label": "Amplicon Pcr Primer Scheme" + }, + "vaccine_name": { + "ontology": "GENEPIO:0001405", + "type": "string", + "description": "The name of the vaccine(s) administered.", + "examples": [ + "BNT162b2, Pfizer-BioNTech" + ] + }, + "data_abstraction_details": { + "ontology": "GENEPIO:0100278", + "type": "string", + "description": "A description of how any data elements were altered to preserve patient privacy.", + "examples": [ + "Jitter added to publicly shared collection dates to prevent re-identifiability." + ], + "label": "Data abstraction details" + }, + "consensus_sequence_filename": { + "ontology": "GENEPIO:0001461", + "type": "string", + "description": "The name of the consensus sequence file.", + "label": "Consensus sequence filename", + "examples": [ + "ncov123assembly.fasta" + ] + }, + "purpose_of_sequencing_details": { + "enum": [ + "Screened for S gene target failure (S dropout)", + "Screened for mink variants", + "Screened for B.1.1.7 variant", + "Screened for B.1.135 variant", + "Screened for P.1 variant", + "Screened due to travel history", + "Screened due to close contact with infected individual", + "Assessing public health control measures", + "Determining early introductions and spread", + "Investigating airline-related exposures", + "Investigating temporary foreign worker", + "Investigating remote regions", + "Investigating health care workers", + "Investigating schools/universities", + "Investigating reinfection" + ], + "ontology": "GENEPIO:0001446", + "type": "string", + "description": "The description of why the sample was sequenced providing specific details.", + "examples": [ + "Screened for S gene target failure (S dropout)" + ], + "clasification": "Sequencing", + "label": "Purpose Of Sequencing Details" + }, + "sequence_submitted_by": { + "examples": [ + "Centers for Disease Control and Prevention" + ], + "ontology": "GENEPIO:0001159", + "type": "string", + "description": "The name of the agency that generated the sequence.", + "label": "Submitting Institution" + }, + "sequencing_protocol_name": { + "examples": [ + "1D_DNA_MinION, ARTIC Network Protocol V3" + ], + "ontology": "GENEPIO:0001453", + "type": "string", + "description": "The name and version number of the sequencing protocol used.", + "clasification": "Sequencing", + "label": "Sequencing Protocol Name" + }, + "prior_sars_cov_2_infection_isolate": { + "ontology": "GENEPIO:0001436", + "type": "string", + "description": "The identifier of the isolate found in the prior SARS-CoV-2 infection.", + "examples": [ + "SARS-CoV-2/human/USA/CA-CDPH-001/2020" + ] + }, + "organism": { + "enum": [ + "Coronaviridae [NCBITaxon:11118]", + "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "NCIT:C43459", + "type": "string", + "description": "Taxonomic name of the organism.", + "examples": [ + "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]" + ], + "classification": "Sample collection and processing", + "label": "Organism" + }, + "purpose_of_sampling": { + "enum": [ + "Cluster/Outbreak Investigation [GENEPIO:0100001]", + "Diagnostic Testing [GENEPIO:0100002]", + "Research [GENEPIO:0100003]", + "Protocol Testing [GENEPIO:0100024]", + "Surveillance [GENEPIO:0100004]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001198", + "type": "string", + "description": "The reason that the sample was collected.", + "examples": [ + "Diagnostic Testing [GENEPIO:0100002]" + ] + } + } +} \ No newline at end of file diff --git a/relecov_tools/schema/relecov_schema.json b/relecov_tools/schema/relecov_schema.json new file mode 100755 index 00000000..27b9bbe8 --- /dev/null +++ b/relecov_tools/schema/relecov_schema.json @@ -0,0 +1,2977 @@ +{ + "schema": "https://json-schema.org/draft/2020-12/schema", + "id": "https://github.com/BU-ISCIII/relecov-tools/blob/develop/relecov_tools/schema/relecov_schema.json", + "title": "RELECOV schema", + "description":"Json schema that specifies the structure, content, and validation rules for RELECOV metadata", + "version": "2.0.0", + "required": [ + "collecting_lab_sample_id", + "sequencing_sample_id", + "collecting_institution", + "submitting_institution", + "sample_collection_date", + "geo_loc_country", + "geo_loc_state", + "organism", + "isolate_sample_id", + "host_scientific_name", + "sequencing_instrument_model", + "sequencing_instrument_platform", + "library_source", + "library_layout", + "enrichment_panel", + "enrichment_panel_version" + ], + "type": "object", + "properties": { + "public_health_sample_id_sivies": { + "examples": [ + "2022CEU03926" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Sample collection and processing", + "label": "Public Health sample id (SIVIES)", + "fill_mode": "sample", + "minLenght": "1" + }, + "collecting_lab_sample_id": { + "examples": [ + "prov_rona_99" + ], + "ontology": "GENEPIO:0001123", + "type": "string", + "description": "The name given for the sample by the collecting institution.", + "classification": "Database Identifiers", + "label": "Sample ID given by originating laboratory", + "fill_mode": "sample", + "minLenght": "1" + }, + "collecting_institution": { + "examples": [ + "Public Health Agency of Canada" + ], + "ontology": "GENEPIO:0001153", + "type": "string", + "description": "The name of the agency/institution that collected the sample.", + "classification": "Sample collection and processing", + "label": "Originating Laboratory", + "fill_mode": "sample", + "minLenght": "1" + }, + "collecting_institution_email": { + "examples": [ + "johnnyblogs@lab.ca" + ], + "ontology": "OBI:0001890", + "type": "string", + "description": "The email address of the contact responsible for follow-up regarding the sample.", + "classification": "Sample collection and processing", + "label": "Originating Laboratory Email", + "fill_mode": "batch" + }, + "collecting_institution_address": { + "examples": [ + "655 Lab St, Vancouver, British Columbia, V5N 2A2, Canada" + ], + "ontology": "GENEPIO:0001158", + "type": "string", + "description": "The mailing address of the agency submitting the sample.", + "classification": "Sample collection and processing", + "label": "Originating Laboratory Address", + "fill_mode": "batch" + }, + "microbiology_lab_sample_id": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Sample collection and processing", + "label": "Sample ID given in the microbiology lab", + "fill_mode": "sample", + "minLenght": "1" + }, + "isolate_sample_id": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001644", + "type": "string", + "description": "", + "classification": "Database Identifiers", + "label": "Sample ID given if multiple rna-extraction or passages", + "fill_mode": "sample", + "minLenght": "1" + }, + "sequencing_sample_id": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0000079", + "type": "string", + "description": "If the information is unknown or can not be shared, leave blank.", + "classification": "Database Identifiers", + "label": "Sample ID given for sequencing", + "fill_mode": "sample", + "minLenght": "1" + }, + "sequencing_institution": { + "ontology": "GENEPIO:0100416", + "type": "string", + "description": "The name of the agency that generated the sequence", + "classification": "Sequencing", + "label": "Sequencing Institution", + "fill_mode": "sample", + "minLenght": "1" + }, + "submitting_lab_sample_id": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001148", + "type": "string", + "description": "Sample ID given by the submitting laboratory", + "classification": "Sequencing", + "label": "Sample ID given by the submitting laboratory", + "fill_mode": "sample", + "minLenght": "1" + }, + "submitting_institution": { + "examples": [ + "Centers for Disease Control and Prevention" + ], + "ontology": "GENEPIO:0001159", + "type": "string", + "description": "The name of the agency that submitted the sequence to public databases.", + "classification": "Sample collection and processing", + "label": "Submitting Institution", + "fill_mode": "sample", + "minLenght": "1" + }, + "submitting_institution_email": { + "examples": [ + "RespLab@lab.ca" + ], + "ontology": "GENEPIO:0001165", + "type": "string", + "description": "The email address of the contact responsible for follow-up regarding the sequence.", + "classification": "Sample collection and processing", + "label": "Submitting Institution Email", + "fill_mode": "batch" + }, + "submitting_institution_address": { + "examples": [ + "123 Sunnybrooke St, Toronto, Ontario, M4P 1L6, Canada" + ], + "ontology": "GENEPIO:0001167", + "type": "string", + "description": "The mailing address of the agency submitting the sequence.", + "classification": "Sample collection and processing", + "label": "Submitting Institution Address", + "fill_mode": "batch" + }, + "sample_collection_date": { + "examples": [ + "3/19/2020" + ], + "ontology": "GENEPIO:0001174", + "type": "string", + "description": "The date on which the sample was collected.", + "format": "date", + "classification": "Sample collection and processing", + "label": "Sample Collection Date", + "fill_mode": "sample", + "minLenght": "1" + }, + "sample_received_date": { + "examples": [ + "3/21/2020" + ], + "ontology": "NCIT:C93644", + "type": "string", + "description": "The date on which the sample was received.", + "format": "date", + "classification": "Sample collection and processing", + "label": "Sample Received Date", + "fill_mode": "sample" + }, + "sample_storage_conditions": { + "examples": [ + "24 degrees celsius" + ], + "ontology": "NCIT:C115535", + "type": "string", + "description": "The name and version of a particular protocol used for sampling.", + "classification": "Sample collection and processing", + "label": "Biological Sample Storage Condition", + "fill_mode": "batch" + }, + "geo_loc_country": { + "enum": [ + "Afghanistan [GAZ:00006882]", + "Albania [GAZ:00002953]", + "Algeria [GAZ:00000563]", + "American Samoa [GAZ:00003957]", + "Andorra [GAZ:00002948]", + "Angola [GAZ:00001095]", + "Anguilla [GAZ:00009159]", + "Antarctica [GAZ:00000462]", + "Antigua and Barbuda [GAZ:00006883]", + "Argentina [GAZ:00002928]", + "Armenia [GAZ:00004094]", + "Aruba [GAZ:00004025]", + "Ashmore and Cartier Islands [GAZ:00005901]", + "Australia [GAZ:00000463]", + "Austria [GAZ:00002942]", + "Azerbaijan [GAZ:00004941]", + "Bahamas [GAZ:00002733]", + "Bahrain [GAZ:00005281]", + "Baker Island [GAZ:00007117]", + "Bangladesh [GAZ:00003750]", + "Barbados [GAZ:00001251]", + "Bassas da India [GAZ:00005810]", + "Belarus [GAZ:00006886]", + "Belgium [GAZ:00002938]", + "Belize [GAZ:00002934]", + "Benin [GAZ:00000904]", + "Bermuda [GAZ:00001264]", + "Bhutan [GAZ:00003920]", + "Bolivia [GAZ:00002511]", + "Borneo [GAZ:00025355]", + "Bosnia and Herzegovina [GAZ:00006887]", + "Botswana [GAZ:00001097]", + "Bouvet Island [GAZ:00001453]", + "Brazil [GAZ:00002828]", + "British Virgin Islands [GAZ:00003961]", + "Brunei [GAZ:00003901]", + "Bulgaria [GAZ:00002950]", + "Burkina Faso [GAZ:00000905]", + "Burundi [GAZ:00001090]", + "Cambodia [GAZ:00006888]", + "Cameroon [GAZ:00001093]", + "Canada [GAZ:00002560]", + "Cape Verde [GAZ:00001227]", + "Cayman Islands [GAZ:00003986]", + "Central African Republic [GAZ:00001089]", + "Chad [GAZ:00000586]", + "Chile [GAZ:00002825]", + "China [GAZ:00002845]", + "Christmas Island [GAZ:00005915]", + "Clipperton Island [GAZ:00005838]", + "Cocos Islands [GAZ:00009721]", + "Colombia [GAZ:00002929]", + "Comoros [GAZ:00005820]", + "Cook Islands [GAZ:00053798]", + "Coral Sea Islands [GAZ:00005917]", + "Costa Rica [GAZ:00002901]", + "Cote d'Ivoire [GAZ:00000906]", + "Croatia [GAZ:00002719]", + "Cuba [GAZ:00003762]", + "Curacao [GAZ:00012582]", + "Cyprus [GAZ:00004006]", + "Czech Republic [GAZ:00002954]", + "Democratic Republic of the Congo [GAZ:00001086]", + "Denmark [GAZ:00005852]", + "Djibouti [GAZ:00000582]", + "Dominica [GAZ:00006890]", + "Dominican Republic [GAZ:00003952]", + "Ecuador [GAZ:00002912]", + "Egypt [GAZ:00003934]", + "El Salvador [GAZ:00002935]", + "Equatorial Guinea [GAZ:00001091]", + "Eritrea [GAZ:00000581]", + "Estonia [GAZ:00002959]", + "Eswatini [GAZ:00001099]", + "Ethiopia [GAZ:00000567]", + "Europa Island [GAZ:00005811]", + "Falkland Islands [GAZ:00001412]", + "Faroe Islands [GAZ:00059206]", + "Fiji [GAZ:00006891]", + "Finland [GAZ:00002937]", + "France [GAZ:00003940]", + "French Guiana [GAZ:00002516]", + "French Polynesia [GAZ:00002918]", + "French Southern and Antarctic Lands [GAZ:00003753]", + "Gabon [GAZ:00001092]", + "Gambia [GAZ:00000907]", + "Gaza Strip [GAZ:00009571]", + "Georgia [GAZ:00004942]", + "Germany [GAZ:00002646]", + "Ghana [GAZ:00000908]", + "Gibraltar [GAZ:00003987]", + "Glorioso Islands [GAZ:00005808]", + "Greece [GAZ:00002945]", + "Greenland [GAZ:00001507]", + "Grenada [GAZ:02000573]", + "Guadeloupe [GAZ:00067142]", + "Guam [GAZ:00003706]", + "Guatemala [GAZ:00002936]", + "Guernsey [GAZ:00001550]", + "Guinea [GAZ:00000909]", + "Guinea-Bissau [GAZ:00000910]", + "Guyana [GAZ:00002522]", + "Haiti [GAZ:00003953]", + "Heard Island and McDonald Islands [GAZ:00009718]", + "Honduras [GAZ:00002894]", + "Hong Kong [GAZ:00003203]", + "Howland Island [GAZ:00007120]", + "Hungary [GAZ:00002952]", + "Iceland [GAZ:00000843]", + "India [GAZ:00002839]", + "Indonesia [GAZ:00003727]", + "Iran [GAZ:00004474]", + "Iraq [GAZ:00004483]", + "Ireland [GAZ:00002943]", + "Isle of Man [GAZ:00052477]", + "Israel [GAZ:00002476]", + "Italy [GAZ:00002650]", + "Jamaica [GAZ:00003781]", + "Jan Mayen [GAZ:00005853]", + "Japan [GAZ:00002747]", + "Jarvis Island [GAZ:00007118]", + "Jersey [GAZ:00001551]", + "Johnston Atoll [GAZ:00007114]", + "Jordan [GAZ:00002473]", + "Juan de Nova Island [GAZ:00005809]", + "Kazakhstan [GAZ:00004999]", + "Kenya [GAZ:00001101]", + "Kerguelen Archipelago [GAZ:00005682]", + "Kingman Reef [GAZ:00007116]", + "Kiribati [GAZ:00006894]", + "Kosovo [GAZ:00011337]", + "Kuwait [GAZ:00005285]", + "Kyrgyzstan [GAZ:00006893]", + "Laos [GAZ:00006889]", + "Latvia [GAZ:00002958]", + "Lebanon [GAZ:00002478]", + "Lesotho [GAZ:00001098]", + "Liberia [GAZ:00000911]", + "Libya [GAZ:00000566]", + "Liechtenstein [GAZ:00003858]", + "Line Islands [GAZ:00007144]", + "Lithuania [GAZ:00002960]", + "Luxembourg [GAZ:00002947]", + "Macau [GAZ:00003202]", + "Madagascar [GAZ:00001108]", + "Malawi [GAZ:00001105]", + "Malaysia [GAZ:00003902]", + "Maldives [GAZ:00006924]", + "Mali [GAZ:00000584]", + "Malta [GAZ:00004017]", + "Marshall Islands [GAZ:00007161]", + "Martinique [GAZ:00067143]", + "Mauritania [GAZ:00000583]", + "Mauritius [GAZ:00003745]", + "Mayotte [GAZ:00003943]", + "Mexico [GAZ:00002852]", + "Micronesia [GAZ:00005862]", + "Midway Islands [GAZ:00007112]", + "Moldova [GAZ:00003897]", + "Monaco [GAZ:00003857]", + "Mongolia [GAZ:00008744]", + "Montenegro [GAZ:00006898]", + "Montserrat [GAZ:00003988]", + "Morocco [GAZ:00000565]", + "Mozambique [GAZ:00001100]", + "Myanmar [GAZ:00006899]", + "Namibia [GAZ:00001096]", + "Nauru [GAZ:00006900]", + "Navassa Island [GAZ:00007119]", + "Nepal [GAZ:00004399]", + "Netherlands [GAZ:00002946]", + "New Caledonia [GAZ:00005206]", + "New Zealand [GAZ:00000469]", + "Nicaragua [GAZ:00002978]", + "Niger [GAZ:00000585]", + "Nigeria [GAZ:00000912]", + "Niue [GAZ:00006902]", + "Norfolk Island [GAZ:00005908]", + "North Korea [GAZ:00002801]", + "North Macedonia [GAZ:00006895]", + "North Sea [GAZ:00002284]", + "Northern Mariana Islands [GAZ:00003958]", + "Norway [GAZ:00002699]", + "Oman [GAZ:00005283]", + "Pakistan [GAZ:00005246]", + "Palau [GAZ:00006905]", + "Panama [GAZ:00002892]", + "Papua New Guinea [GAZ:00003922]", + "Paracel Islands [GAZ:00010832]", + "Paraguay [GAZ:00002933]", + "Peru [GAZ:00002932]", + "Philippines [GAZ:00004525]", + "Pitcairn Islands [GAZ:00005867]", + "Poland [GAZ:00002939]", + "Portugal [GAZ:00004126]", + "Puerto Rico [GAZ:00006935]", + "Qatar [GAZ:00005286]", + "Republic of the Congo [GAZ:00001088]", + "Reunion [GAZ:00003945]", + "Romania [GAZ:00002951]", + "Ross Sea [GAZ:00023304]", + "Russia [GAZ:00002721]", + "Rwanda [GAZ:00001087]", + "Saint Helena [GAZ:00000849]", + "Saint Kitts and Nevis [GAZ:00006906]", + "Saint Lucia [GAZ:00006909]", + "Saint Pierre and Miquelon [GAZ:00003942]", + "Saint Martin [GAZ:00005841]", + "Saint Vincent and the Grenadines [GAZ:02000565]", + "Samoa [GAZ:00006910]", + "San Marino [GAZ:00003102]", + "Sao Tome and Principe [GAZ:00006927]", + "Saudi Arabia [GAZ:00005279]", + "Senegal [GAZ:00000913]", + "Serbia [GAZ:00002957]", + "Seychelles [GAZ:00006922]", + "Sierra Leone [GAZ:00000914]", + "Singapore [GAZ:00003923]", + "Sint Maarten [GAZ:00012579]", + "Slovakia [GAZ:00002956]", + "Slovenia [GAZ:00002955]", + "Solomon Islands [GAZ:00005275]", + "Somalia [GAZ:00001104]", + "South Africa [GAZ:00001094]", + "South Georgia and the South Sandwich Islands [GAZ:00003990]", + "South Korea [GAZ:00002802]", + "South Sudan [GAZ:00233439]", + "Spain [GAZ:00003936]", + "Spratly Islands [GAZ:00010831]", + "Sri Lanka [GAZ:00003924]", + "State of Palestine [GAZ:00002475]", + "Sudan [GAZ:00000560]", + "Suriname [GAZ:00002525]", + "Svalbard [GAZ:00005396]", + "Swaziland [GAZ:00001099]", + "Sweden [GAZ:00002729]", + "Switzerland [GAZ:00002941]", + "Syria [GAZ:00002474]", + "Taiwan [GAZ:00005341]", + "Tajikistan [GAZ:00006912]", + "Tanzania [GAZ:00001103]", + "Thailand [GAZ:00003744]", + "Timor-Leste [GAZ:00006913]", + "Togo [GAZ:00000915]", + "Tokelau [GAZ:00260188]", + "Tonga [GAZ:00006916]", + "Trinidad and Tobago [GAZ:00003767]", + "Tromelin Island [GAZ:00005812]", + "Tunisia [GAZ:00000562]", + "Turkey [GAZ:00000558]", + "Turkmenistan [GAZ:00005018]", + "Turks and Caicos Islands [GAZ:00003955]", + "Tuvalu [GAZ:00009715]", + "USA [GAZ:00002459]", + "Uganda [GAZ:00001102]", + "Ukraine [GAZ:00002724]", + "United Arab Emirates [GAZ:00005282]", + "United Kingdom [GAZ:00002637]", + "Uruguay [GAZ:00002930]", + "Uzbekistan [GAZ:00004979]", + "Vanuatu [GAZ:00006918]", + "Venezuela [GAZ:00002931]", + "Viet Nam [GAZ:00003756]", + "Virgin Islands [GAZ:00003959]", + "Wake Island [GAZ:00007111]", + "Wallis and Futuna [GAZ:00007191]", + "West Bank [GAZ:00009572]", + "Western Sahara [GAZ:00000564]", + "Yemen [GAZ:00005284]", + "Zambia [GAZ:00001107]", + "Zimbabwe [GAZ:00001106]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001181", + "type": "string", + "description": "The country of origin of the sample.", + "examples": [ + "South Africa [GAZ:00001094]" + ], + "classification": "Sample collection and processing", + "label": "Country", + "fill_mode": "batch", + "minLenght": "1" + }, + "geo_loc_state": { + "examples": [ + "Western Cape" + ], + "ontology": "GENEPIO:0001185", + "type": "string", + "description": "The state/province/territory of origin of the sample.", + "classification": "Sample collection and processing", + "label": "Autonomic Community", + "fill_mode": "batch", + "minLenght": "1" + }, + "geo_loc_region": { + "examples": [ + "Derbyshire" + ], + "ontology": "NCIT:C87189", + "type": "string", + "description": "The county/region of origin of the sample.", + "classification": "Sample collection and processing", + "label": "Province", + "fill_mode": "batch" + }, + "geo_loc_city": { + "examples": [ + "Vancouver" + ], + "ontology": "GENEPIO:0001189", + "type": "string", + "description": "The city of origin of the sample.", + "classification": "Sample collection and processing", + "label": "City", + "fill_mode": "batch" + }, + "geo_loc_latitude": { + "examples": [ + "38.98 N" + ], + "ontology": "OBI:0001620", + "type": "string", + "description": "The latitude coordinates of the geographical location of sample collection.", + "classification": "Sample collection and processing", + "label": "Geo Loc Latitude", + "fill_mode": "batch" + }, + "study_type": { + "examples": [ + "" + ], + "enum": [ + "Whole Genome Sequencing [NCIT:C101294]", + "Metagenomics [NCIT:C153191]", + "Transcriptome Analysis [GENEPIO:0001111]", + "Resequencing [NCIT:C41254]", + "Epigenetics [OMIT:0027036] ", + "Synthetic Genomics [NCIT:C84343]", + "Forensic or Paleo-genomics [topic:3943]", + "Gene Regulation Study [topic:0204]", + "Cancer Genomics [NCIT:C18247]", + "Population Genomics [topic:3796]", + "RNASeq [OBI:0001177]", + "Exome Sequencing [OBI:0002118]", + "Pooled Clone Sequencing [OBI:2100402]", + "Transcriptome Sequencing [NCIT:C124261]", + "Other [NCIT:C17649]" + ], + "ontology": "GENEPIO:0000156", + "type": "string", + "description": "", + "classification": "Submission ENA", + "label": "Study type", + "fill_mode": "batch" + }, + "geo_loc_longitude": { + "examples": [ + "77.11 W" + ], + "ontology": "OBI:0001621", + "type": "string", + "description": "The longitude coordinates of the geographical location of sample collection.", + "classification": "Sample collection and processing", + "label": "Geo Loc Longitude", + "fill_mode": "batch" + }, + "purpose_sampling": { + "enum": [ + "Cluster/Outbreak Investigation [GENEPIO:0100001]", + "Diagnostic Testing [GENEPIO:0100002]", + "Research [GENEPIO:0100003]", + "Protocol Testing [GENEPIO:0100024]", + "Surveillance [GENEPIO:0100004]", + "Other [NCIT:C124261]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001198", + "type": "string", + "description": "The reason that the sample was collected.", + "examples": [ + "Diagnostic Testing [GENEPIO:0100002]" + ], + "classification": "Sample collection and processing", + "label": "Purpose of sampling", + "fill_mode": "batch" + }, + "anatomical_material": { + "enum": [ + "Blood [UBERON:0000178]", + "Fluid (Cerebrospinal (CSF)) [UBERON:0001359]", + "Fluid (Pericardial) [UBERON:0002409]", + "Fluid (Pleural) [UBERON:0001087]", + "Fluid (Vaginal) [UBERON:0036243]", + "Fluid (Amniotic) [UBERON:0000173]", + "Saliva [UBERON:0001836]", + "Tissue [UBERON:0000479]", + "Placenta [UBERON:0001987]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001211", + "type": "string", + "description": "A substance obtained from an anatomical part of an organism e.g. tissue, blood.", + "examples": [ + "Blood [UBERON:0000178]" + ], + "classification": "Sample collection and processing", + "label": "Anatomical Material", + "fill_mode": "batch" + }, + "specimen_source": { + "enum": [ + "Lower respiratory tract Aspiration", + "Bronchus Aspiration", + "Lung Aspiration", + "Bronchiole Aspiration", + "Alveolar sac Aspiration", + "Pleural sac Aspiration", + "Pleural cavity Aspiration", + "Trachea Aspiration", + "Upper respiratory tract Aspiration", + "Anterior Nares Aspiration", + "Esophagus Aspiration", + "Ethmoid sinus Aspiration", + "Nasal Cavity Aspiration", + "Middle Nasal Turbinate Aspiration", + "Pharynx Aspiration", + "Nasopharynx Aspiration", + "Oropharynx Aspiration", + "Lower respiratory tract Vacuum Aspiration", + "Bronchus Vacuum Aspiration", + "Lung Vacuum Aspiration", + "Bronchiole Vacuum Aspiration", + "Alveolar sac Vacuum Aspiration", + "Pleural sac Vacuum Aspiration", + "Pleural cavity Vacuum Aspiration", + "Trachea Vacuum Aspiration", + "Upper respiratory tract Vacuum Aspiration", + "Anterior Nares Vacuum Aspiration", + "Esophagus Vacuum Aspiration", + "Ethmoid sinus Vacuum Aspiration", + "Nasal Cavity Vacuum Aspiration", + "Middle Nasal Turbinate Vacuum Aspiration", + "Nasopharynx Vacuum Aspiration", + "Oropharynx Vacuum Aspiration", + "Lower respiratory tract Biopsy", + "Bronchus Biopsy", + "Lung biopsy", + "Bronchiole Biopsy", + "Alveolar sac Biopsy", + "Pleural sac Biopsy", + "Pleural cavity Biopsy", + "Trachea Biopsy", + "Rectum Biopsy", + "Skin Biopsy", + "Stomach Biopsy", + "Upper respiratory tract Biopsy", + "Anterior Nares Biopsy", + "Esophagus Biopsy", + "Ethmoid sinus Biopsy", + "Nasal Cavity Biopsy", + "Middle Nasal Turbinate Biopsy", + "Anus Biopsy", + "Duodenum Biopsy", + "Nasopharynx Biopsy", + "Oropharynx Biopsy", + "Lower respiratory tract Needle Biopsy", + "Bronchus Needle Biopsy", + "Lung Needle Biopsy", + "Bronchiole Needle Biopsy", + "Alveolar sac Needle Biopsy", + "Pleural sac Needle Biopsy", + "Pleural cavity Needle Biopsy", + "Trachea Needle Biopsy", + "Rectum Needle Biopsy", + "Skin Needle Biopsy", + "Stomach Needle Biopsy", + "Upper respiratory tract Needle Biopsy", + "Anterior Nares Needle Biopsy", + "Esophagus Needle Biopsy", + "Ethmoid sinus Needle Biopsy", + "Nasal Cavity Needle Biopsy", + "Middle Nasal Turbinate Needle Biopsy", + "Anus Needle Biopsy", + "Duodenum Needle Biopsy", + "Nasopharynx Needle Biopsy", + "Oropharynx Needle Biopsy", + "Lower respiratory tract Lavage ", + "Pleural sac Lavage ", + "Pleural cavity Lavage ", + "Trachea Lavage ", + "Upper respiratory tract Lavage ", + "Anterior Nares Lavage ", + "Esophagus Lavage ", + "Ethmoid sinus Lavage ", + "Nasal Cavity Lavage ", + "Middle Nasal Turbinate Lavage ", + "Nasopharynx Lavage ", + "Oropharynx Lavage ", + "Bronchoalveolar Lavage", + "Gastric Lavage", + "Lower respiratory tract Necropsy", + "Bronchus Necropsy", + "Lung Necropsy", + "Bronchiole Necropsy", + "Alveolar sac Necropsy", + "Pleural sac Necropsy", + "Pleural cavity Necropsy", + "Trachea Necropsy", + "Rectum Necropsy", + "Skin Necropsy", + "Stomach Necropsy", + "Upper respiratory tract Necropsy", + "Anterior Nares Necropsy", + "Esophagus Necropsy", + "Ethmoid sinus Necropsy", + "Nasal Cavity Necropsy", + "Middle Nasal Turbinate Necropsy", + "Anus Necropsy", + "Duodenum Necropsy", + "Nasopharynx (NP) Necropsy", + "Oropharynx (OP) Necropsy", + "Lower respiratory tract Rinsing", + "Bronchus Rinsing", + "Lung Rinsing", + "Bronchiole Rinsing", + "Alveolar sac Rinsing", + "Pleural sac Rinsing", + "Pleural cavity Rinsing", + "Trachea Rinsing", + "Upper respiratory tract Rinsing", + "Anterior Nares Rinsing", + "Esophagus Rinsing", + "Ethmoid sinus Rinsing", + "Nasal Cavity Rinsing", + "Middle Nasal Turbinate Rinsing", + "Nasopharynx Rinsing", + "Oropharynx Rinsing", + "Bronchiole Scraping", + "Alveolar sac Scraping", + "Pleural sac Scraping", + "Pleural cavity Scraping", + "Trachea Scraping", + "Upper respiratory tract Scraping", + "Anterior Nares Scraping", + "Esophagus Scraping", + "Ethmoid sinus Scraping", + "Nasal Cavity Scraping", + "Middle Nasal Turbinate Scraping", + "Nasopharynx Scraping", + "Oropharynx Scraping", + "Nasopharynx swab", + "Oropharynx Swab", + "Pharynx Swab", + "Lower respiratory tract Wash", + "Bronchus Wash", + "Lung Wash", + "Bronchiole Wash", + "Alveolar sac Wash", + "Pleural sac Wash", + "Pleural cavity Wash", + "Trachea Wash", + "Upper respiratory tract Wash", + "Anterior Nares Wash", + "Esophagus Wash", + "Ethmoid sinus Wash", + "Nasal Cavity Wash", + "Middle Nasal Turbinate Wash", + "Nasopharynx Wash", + "Oropharynx Wash", + "Feces", + "Mucus", + "Sputum", + "Sweat", + "Tear", + "Urine", + "Blood", + "Fluid (Cerebrospinal (CSF))", + "Saliva", + "Tissue", + "Placenta", + "Scraping", + "Not Applicable", + "Not Collected", + "Not Provided", + "Missing", + "Restricted Access", + "Not Provided [GENEPIO:0001668]" + ], + "ontology": "GENEPIO:0001211", + "type": "string", + "description": "Source of the specimen, merge of anatomical_part, anatomical_material, body_product and collection method.", + "examples": [ + "" + ], + "classification": "Sample collection and processing", + "label": "Specimen source", + "fill_mode": "batch" + }, + "anatomical_part": { + "enum": [ + "Eye [UBERON:0000970]", + "Intestine [UBERON:0000160]", + "Lower respiratory tract [UBERON:0001558]", + "Bronchus [UBERON:0002185]", + "Lung [UBERON:0002048]", + "Bronchiole [UBERON:0002186]", + "Alveolar sac [UBERON:0002169]", + "Pleural sac [UBERON:0009778]", + "Pleural cavity [UBERON:0002402]", + "Trachea [UBERON:0003126]", + "Rectum [UBERON:0001052]", + "Skin [UBERON:0001003]", + "Stomach [UBERON:0000945]", + "Upper respiratory tract [UBERON:0001557]", + "Anterior Nares [UBERON:2001427]", + "Esophagus [UBERON:0001043]", + "Ethmoid sinus [UBERON:0002453]", + "Nasal Cavity [UBERON:0001707]", + "Middle Nasal Turbinate [UBERON:0001762]", + "Anus [UBERON:0001245]", + "Duodenum [UBERON:0002114]", + "Pharynx [UBERON:0006562]", + "Nasopharynx [UBERON:0001728]", + "Oropharynx [UBERON:0001729]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001214", + "type": "string", + "description": "", + "examples": [ + "" + ], + "classification": "Sample collection and processing", + "label": "Anatomical Part", + "fill_mode": "batch" + }, + "collection_device": { + "enum": [ + "Air filter [ENVO:00003968]", + "Blood Collection Tube [NCIT:C113122]", + "Bronchoscope [NCIT:C17611]", + "Collection Container [NCIT:C43446]", + "Collection Cup [GENEPIO:0100026]", + "Filter [NCIT:C45801]", + "Needle [NCIT:C69013]", + "Serum Collection Tube [NCIT:C113675]", + "Sputum Collection Tube [GENEPIO:0002115]", + "Suction Catheter [58253008]", + "Swab [GENEPIO:0100027]", + "Other [NCIT:C17649]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001234", + "type": "string", + "description": "", + "examples": [ + "" + ], + "classification": "Sample collection and processing", + "label": "Collection Device", + "fill_mode": "batch" + }, + "collection_method": { + "enum": [ + "Amniocentesis [NCIT:C52009]", + "Suprapublic Aspiration [GENEPIO:0100028]", + "Tracheal Aspiration [GENEPIO:0100029]", + "Vacuum Aspiration [GENEPIO:0100030]", + "Needle Biopsy [OBI:0002654]", + "Filtration [NCIT:C16583]", + "Lavage [NCIT:C38068]", + "Bronchoalveolar Lavage (BAL) [GENEPIO:0100032]", + "Gastric Lavage [GENEPIO:0100033]", + "Lumbar Puncture [NCIT:C15327]", + "Necropsy [NCIT:C166270]", + "Phlebotomy [NCIT:C28221]", + "Rinsing [GENEPIO:0002116]", + "Scraping [GENEPIO:0100035]", + "Swab [NCIT:C17627]", + "Finger Pick [GENEPIO:0100036]", + "Wash [NCIT:C65077]", + "Washout Tear Collection [GENEPIO:0100038]", + "Inferior Nasal Turbinate [NCIT:C32794]", + "Aspiration [NCIT:C15631]", + "Biopsy [NCIT:C15189]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001241", + "type": "string", + "description": "", + "examples": [ + "" + ], + "classification": "Sample collection and processing", + "label": "Collection Method", + "fill_mode": "batch" + }, + "body_product": { + "enum": [ + "Breast Milk [UBERON:0001913]", + "Feces [UBERON:0001988]", + "Mucus [UBERON:0000912]", + "Sputum [UBERON:0007311]", + "Sweat [UBERON:0001089]", + "Tear [UBERON:0001827]", + "Urine [UBERON:0001088]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001216", + "type": "string", + "description": "", + "examples": [ + "" + ], + "classification": "Sample collection and processing", + "label": "Body Product", + "fill_mode": "batch" + }, + "environmental_material": { + "enum": [ + "Air vent [ENVO:03501208]", + "Banknote [ENVO:00003896]", + "Bed rail [ENVO:03501209]", + "Building Floor [ENVO:01000486]", + "Cloth [ENVO:02000058]", + "Control Panel [ENVO:03501210]", + "Door [ENVO:03501220]", + "Door Handle [ENVO:03501211]", + "Face Mask [OBI:0002787]", + "Face Shield [OBI:0002791]", + "Food [FOODON:00002403]", + "Food Packaging [FOODON:03490100]", + "Glass [ENVO:01000481]", + "Handrail [ENVO:03501212]", + "Hospital Gown [OBI:0002796]", + "Light Switch [ENVO:03501213]", + "Locker [ENVO:03501214]", + "N95 Mask [OBI:0002790]", + "Nurse Call Button [ENVO:03501215]", + "Paper [ENVO:03501256]", + "Particulate matter [ENVO:01000060]", + "Plastic [ENVO:01000404]", + "PPE Gown [GENEPIO:0100025]", + "Sewage [ENVO:00002018]", + "Sink [ENVO:01000990]", + "Soil [ENVO:00001998]", + "Stainless Steel [ENVO:03501216]", + "Tissue Paper [ENVO:03501217]", + "Toilet Bowl [ENVO:03501218]", + "Water [ENVO:00002006]", + "Wastewater [ENVO:00002001]", + "Window [ENVO:03501219]", + "Wood [ENVO:00002040]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Other [NCIT:C17649]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001223", + "type": "string", + "description": "A substance obtained from the natural or man-made environment e.g. soil, water, sewage, door handle, bed handrail, face mask.", + "examples": [ + "Face Mask [OBI:0002787]" + ], + "classification": "Sample collection and processing", + "label": "Environmental Material", + "fill_mode": "batch" + }, + "environmental_system": { + "enum": [ + "Acute care facility [ENVO:03501135]", + "Animal house [ENVO:00003040]", + "Bathroom [ENVO:01000422]", + "Clinical assessment centre [ENVO:03501136]", + "Conference venue [ENVO:03501127]", + "Corridor [ENVO:03501121]", + "Daycare [ENVO:01000927]", + "Emergency room (ER) [ENVO:03501145]", + "Family practice clinic [ENVO:03501186]", + "Group home [ENVO:03501196]", + "Homeless shelter [ENVO:03501133]", + "Hospital [ENVO:00002173]", + "Intensive Care Unit (ICU) [ENVO:03501152]", + "Long Term Care Facility [ENVO:03501194]", + "Patient room [ENVO:03501180]", + "Prison [ENVO:03501204]", + "Production Facility [ENVO:01000536]", + "School [ENVO:03501130]", + "Sewage Plant [ENVO:00003043]", + "Subway train [ENVO:03501109]", + "Wet market [ENVO:03501198]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001232", + "type": "string", + "description": "An environmental location may describe a site in the natural or built environment e.g. hospital, wet market, bat cave.", + "examples": [ + "Hospital [ENVO:00002173]" + ], + "classification": "Sample collection and processing", + "label": "Environmental System", + "fill_mode": "batch" + }, + "organism": { + "enum": [ + "Coronaviridae [NCBITaxon:11118]", + "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "Respiratory syncytial virus [NCBITaxon:12814]", + "Influenza virus (organism) [SNOMED:725894000]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "NCIT:C43459", + "type": "string", + "description": "Taxonomic name of the organism.", + "examples": [ + "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]" + ], + "classification": "Sample collection and processing", + "label": "Organism", + "fill_mode": "batch", + "minLenght": "1" + }, + "tax_id": { + "examples": [ + "2697049" + ], + "ontology": "NCIT:C164641", + "type": "string", + "description": "The NCBITaxon identifier for the organism being sequenced.", + "classification": "Sample collection and processing", + "label": "Tax ID", + "fill_mode": "batch", + "minLenght": "1" + }, + "host_common_name": { + "enum": [ + "Human [NCBITaxon:9606]", + "Bat [NCBITaxon:9397]", + "Cat [NCBITaxon:9685]", + "Chicken [NCBITaxon:9031]", + "Civet [NCBITaxon:9673]", + "Cow [NCBITaxon:9913]", + "Dog [NCBITaxon:9615]", + "Lion [NCBITaxon:9689]", + "Mink [NCBITaxon:452646]", + "Pangolin [NCBITaxon:9973]", + "Pig [NCBITaxon:9825]", + "Pigeon [NCBITaxon:8930]", + "Tiger [NCBITaxon:9694]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001386", + "type": "string", + "description": "The commonly used name of the host.", + "examples": [ + "Human [NCBITaxon:9606]" + ], + "classification": "Host information", + "label": "Host", + "fill_mode": "batch" + }, + "host_scientific_name": { + "enum": [ + "Bos taurus [NCBITaxon:9913]", + "Canis lupus familiaris [NCBITaxon:9615]", + "Chiroptera [NCBITaxon:9397]", + "Columbidae [NCBITaxon:8930]", + "Felis catus [NCBITaxon:9685]", + "Gallus gallus [NCBITaxon:9031]", + "Homo sapiens [NCBITaxon:9606]", + "Manis [NCBITaxon:9973]", + "Manis javanica [NCBITaxon:9974]", + "Neovison vison [NCBITaxon:452646]", + "Panthera leo [NCBITaxon:9689]", + "Panthera tigris [NCBITaxon:9694]", + "Rhinolophidae [NCBITaxon:58055]", + "Rhinolophus affinis [NCBITaxon:59477]", + "Sus scrofa domesticus [NCBITaxon:9825]", + "Viverridae [NCBITaxon:9673]", + "Not Applicable [GENEPIO:0001619]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001387", + "type": "string", + "description": "The taxonomic, or scientific name of the host.", + "examples": [ + "Homo sapiens [NCBITaxon:9606]" + ], + "classification": "Host information", + "label": "Host Scientific Name", + "fill_mode": "batch", + "minLenght": "1" + }, + "host_disease": { + "enum": [ + "COVID-19 [MONDO:0100096]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001391", + "type": "string", + "description": "The name of the disease experienced by the host.", + "examples": [ + "COVID-19 [MONDO:0100096]" + ], + "classification": "Host information", + "label": "Host disease", + "fill_mode": "batch" + }, + "host_age": { + "ontology": "GENEPIO:0001392", + "type": "string", + "description": "Age of host at the time of sampling.", + "examples": [ + "79" + ], + "classification": "Host information", + "label": "Host Age", + "fill_mode": "sample" + }, + "host_gender": { + "enum": [ + "Female [NCIT:C46110]", + "Male [NCIT:C46109]", + "Non-binary Gender [GSSO:000132]", + "Transgender (assigned male at birth) [GSSO:004004]", + "Transgender (assigned female at birth) [GSSO:004005]", + "Undeclared [NCIT:C110959]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001395", + "type": "string", + "description": "The gender of the host at the time of sample collection.", + "examples": [ + "Male [NCIT:C46109]" + ], + "classification": "Host information", + "label": "Host Gender", + "fill_mode": "sample" + }, + "sequencing_instrument_model": { + "enum": [ + "Illumina sequencing instrument [GENEPIO:0100105]", + "Illumina Genome Analyzer [GENEPIO:0100106]", + "Illumina Genome Analyzer II [GENEPIO:0100107]", + "Illumina Genome Analyzer IIx [GENEPIO:0100108]", + "Illumina HiScanSQ [GENEPIO:0100109]", + "Illumina HiSeq [GENEPIO:0100110]", + "Illumina HiSeq X [GENEPIO:0100111]", + "Illumina HiSeq X Five [GENEPIO:0100112]", + "Illumina HiSeq X Ten [GENEPIO:0100113]", + "Illumina HiSeq 1000 [GENEPIO:0100114]", + "Illumina HiSeq 1500 [GENEPIO:0100115]", + "Illumina HiSeq 2000 [GENEPIO:0100116]", + "Illumina HiSeq 2500 [GENEPIO:0100117]", + "Illumina HiSeq 3000 [GENEPIO:0100118]", + "Illumina HiSeq 4000 [GENEPIO:0100119]", + "Illumina iSeq [GENEPIO:0100120]", + "Illumina iSeq 100 [GENEPIO:0100121]", + "Illumina NovaSeq [GENEPIO:0100122]", + "Illumina NovaSeq 6000 [GENEPIO:0100123]", + "Illumina MiniSeq [GENEPIO:0100124]", + "Illumina MiSeq [GENEPIO:0100125]", + "Illumina NextSeq [GENEPIO:0100126]", + "Illumina NextSeq 500 [GENEPIO:0100127]", + "Illumina NextSeq 550 [GENEPIO:0100128]", + "Illumina NextSeq 2000 [GENEPIO:0100129]", + "Pacific Biosciences sequencing instrument [GENEPIO:0100130]", + "PacBio RS [GENEPIO:0100131]", + "PacBio RS II [GENEPIO:0100132]", + "PacBio Sequel [GENEPIO:0100133]", + "PacBio Sequel II [GENEPIO:0100134]", + "Ion Torrent sequencing instrument [GENEPIO:0100135]", + "Ion Torrent PGM [GENEPIO:0100136]", + "Ion Torrent Proton [GENEPIO:0100137]", + "Ion Torrent S5 XL [GENEPIO:0100138]", + "Ion Torrent S5 [GENEPIO:0100139]", + "Oxford Nanopore sequencing instrument [GENEPIO:0100140]", + "Oxford Nanopore GridION [GENEPIO:0100141]", + "Oxford Nanopore MinION [GENEPIO:0100142]", + "Oxford Nanopore PromethION [GENEPIO:0100143]", + "BGI Genomics sequencing instrument [GENEPIO:0100144]", + "BGI SEQ-500 [GENEPIO:0100145]", + "MGI sequencing instrument [GENEPIO:0100146]", + "MGI DNBSEQ-T7 [GENEPIO:0100147]", + "MGI DNBSEQ-G400 [GENEPIO:0100148]", + "MGI DNBSEQ-G400RS FAST [GENEPIO:0100149]", + "MGI DNBSEQ-G50 [GENEPIO:0100150]", + "Not Applicable [GENEPIO:0001619]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001452", + "type": "string", + "description": "The model of the sequencing instrument used.", + "examples": [ + "Oxford Nanopore MinION [GENEPIO:0100142]" + ], + "classification": "Sequencing", + "label": "Sequencing Instrument Model", + "fill_mode": "batch", + "minLenght": "1" + }, + "purpose_of_sequencing": { + "enum": [ + "Baseline surveillance (random sampling) [GENEPIO:0100005]", + "Targeted surveillance (non-random sampling) [GENEPIO:0100006]", + "Priority surveillance projects [GENEPIO:0100007]", + "Screening for Variants of Concern (VOC) [GENEPIO:0100008]", + "Sample has epidemiological link to Variant of Concern (VoC) [GENEPIO:0100273]", + "Sample has epidemiological link to Omicron Variant [GENEPIO:0100274]", + "Longitudinal surveillance (repeat sampling of individuals) [GENEPIO:0100009]", + "Re-infection surveillance [GENEPIO:0100010]", + "Vaccine escape surveillance [GENEPIO:0100011]", + "Travel-associated surveillance [GENEPIO:0100012]", + "Domestic travel surveillance [GENEPIO:0100013]", + "Interstate/ interprovincial travel surveillance [GENEPIO:0100275]", + "Intra-state/ intra-provincial travel surveillance [GENEPIO:0100276]", + "International travel surveillance [GENEPIO:0100014]", + "Surveillance of international border crossing by air travel or ground transport [GENEPIO:0100015]", + "Surveillance of international border crossing by air travel [GENEPIO:0100016]", + "Surveillance of international border crossing by ground transport [GENEPIO:0100017]", + "Surveillance from international worker testing [GENEPIO:0100018]", + "Cluster/Outbreak investigation [GENEPIO:0100019]", + "Multi-jurisdictional outbreak investigation [GENEPIO:0100020]", + "Intra-jurisdictional outbreak investigation [GENEPIO:0100021]", + "Research [GENEPIO:0100003]", + "Viral passage experiment [GENEPIO:0100023]", + "Protocol testing [GENEPIO:0100024]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001445", + "type": "string", + "description": "The reason that the sample was sequenced.", + "examples": [ + "Baseline surveillance (random sampling) [GENEPIO:0100005]" + ], + "classification": "Sequencing", + "label": "Purpose of Sequencing", + "fill_mode": "batch" + }, + "sequencing_date": { + "examples": [ + "4/26/2021" + ], + "ontology": "GENEPIO:0001447", + "type": "string", + "description": "The date the sample was sequenced.", + "format": "date", + "classification": "Sequencing", + "label": "Sequencing Date", + "fill_mode": "batch" + }, + "nucleic_acid_extraction_protocol": { + "examples": [ + "Opentrons custom rna extraction protocol" + ], + "ontology": "OBI_0302884", + "type": "string", + "description": "DNA/RNA extraction protocol", + "classification": "Sequencing", + "label": "Nucleic acid extraction protocol", + "fill_mode": "batch" + }, + "all_in_one_library_kit": { + "enum": [ + "Ion Xpress", + "ABL_DeepChek NGS", + "Ion AmpliSeq Kit for Chef DL8", + "NEBNext Fast DNA Library Prep Set for Ion Torrent", + "NEBNext ARTIC SARS-CoV-2 FS", + "Illumina COVIDSeq Test [CIDO:0020172]", + "ABL DeepChek® Assay WG SC2 V1", + "Not Provided [GENEPIO:0001668]", + "Other [NCIT:C17649]" + ], + "examples": [ + "Illumina COVIDSeq Test" + ], + "ontology": "GENEPIO_0000085", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "Commercial All-in-one library kit", + "fill_mode": "batch" + }, + "library_preparation_kit": { + "enum": [ + "Illumina DNA PCR-Free Prep", + "Illumina DNA Prep", + "Illumina Stranded mRNA Prep", + "Nextera XT DNA Library Preparation Kit", + "TruSeq DNA Nano", + "TruSeq DNA Nano Library Prep Kit for NeoPrep", + "TruSeq DNA PCR-Free", + "TruSeq RNA Library Prep Kit v2", + "TruSeq Stranded Total RNA", + "TruSeq Stranded mRNA", + "Nextera XT", + "NEBNex Fast DNA Library Prep Set for Ion Torrent", + "Nextera DNA Flex", + "Ion AmpliSeq Kit Library Kit Plus", + "Ion AmpliSeq Library Kit 2.0", + "Ion Xpress Plus Fragment Library Kit", + "Oxford Nanopore Sequencing Kit", + "SQK-RBK110-96", + "Nanopore COVID Maxi: 9216 samples", + "Nanopore COVID Midi: 2304 samples", + "Nanopore COVID Mini: 576 samples", + "Vela Diagnostics:ViroKey SQ FLEX Library Prep Reagents", + "Not Provided [GENEPIO:0001668]", + "Other [NCIT:C17649]" + ], + "examples": [ + "Illumina DNA Prep Tagmentation" + ], + "ontology": "GENEPIO:0001450", + "type": "string", + "description": "The name of the DNA library preparation kit used to generate the library being sequenced.", + "classification": "Sequencing", + "label": "Library Preparation Kit", + "fill_mode": "batch" + }, + "enrichment_protocol": { + "enum": [ + "Amplicon [GENEPIO:0001974]", + "Probes [OMIT:0016121]", + "Custom probes [OMIT:0016112]", + "Custom amplicon [OMIT:0016112]", + "No enrichment [NCIT:C154307]", + "Other [NCIT:C17649]", + "Not Provided [GENEPIO:0001668]" + ], + "examples": [ + "AMPLICON" + ], + "ontology": "EFO_0009089", + "type": "string", + "description": "Type of enrichment protocol", + "classification": "Sequencing", + "label": "Enrichment Protocol", + "fill_mode": "batch" + }, + "if_enrichment_protocol_is_other_specify": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "If Enrichment Protocol Is Other, Specify", + "fill_mode": "batch" + }, + "enrichment_panel": { + "enum": [ + "ARTIC", + "Illumina respiratory Virus Oligo Panel", + "Illumina AmpliSeq Community panel", + "Illumina AmpliSeq SARS-CoV-2 Research Panel for Illumina", + "Ion AmpliSeq SARS-CoV-2 Research Panel", + "xGen SC2 Midnight1200 Amplicon Panel", + "ViroKey SQ FLEX SARS-CoV-2 Primer Set", + "NEBNext VarSkip Short SARS-CoV-2 primers", + "Other [NCIT:C17649]" + ], + "examples": [ + "ARTIC" + ], + "ontology": "0", + "type": "string", + "description": "Commercial or custom panel/assay used for enrichment.", + "classification": "Sequencing", + "label": "Enrichment panel/assay", + "fill_mode": "batch" + }, + "enrichment_panel_version": { + "enum": [ + "ARTIC v1", + "ARTIC v2", + "ARTIC v3", + "ARTIC v4", + "ARTIC v4.1", + "ARTIC v5", + "ARTIC v5.1", + "ARTIC v5.2", + "ARTIC v5.3", + "ARTIC v5.3.2", + "Illumina AmpliSeq Community panel", + "Illumina AmpliSeq SARS-CoV-2 Research Panel for Illumina", + "Illumina Respiratory Virus Oligos Panel V1", + "Illumina Respiratory Virus Oligos Panel V2", + "Ion AmpliSeq SARS-CoV-2 Insight", + "xGen SC2 Midnight1200 Amplicon Panel", + "ViroKey SQ FLEX SARS-CoV-2 Primer Set", + "NEBNext VarSkip Short SARS-CoV-2 primers", + "Other [NCIT:C17649]" + ], + "examples": [ + "ARTIC v4" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "Enrichment panel/assay version", + "fill_mode": "batch" + }, + "if_enrichment_panel_assay_is_other_specify": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "If Enrichment panel/assay Is Other, Specify", + "fill_mode": "batch" + }, + "if_enrichment_panel_assay_version_other": { + "examples": [ + "" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "If Enrichment panel/assay version Is Other, Specify", + "fill_mode": "batch" + }, + "amplicon_pcr_primer_scheme": { + "examples": [ + "https://github.com/joshquick/artic-ncov2019/blob/master/primer_schemes/nCoV-2019/V3/nCoV-2019.tsv" + ], + "ontology": "GENEPIO:0001456", + "type": "string", + "description": "The specifications of the primers (primer sequences, binding positions, fragment size generated etc) used to generate the amplicons to be sequenced.", + "classification": "Sequencing", + "label": "Amplicon Pcr Primer Scheme", + "fill_mode": "batch" + }, + "number_of_samples_in_run": { + "examples": [ + "" + ], + "ontology": "KISAO_0000326", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "Number Of Samples In Run", + "fill_mode": "batch" + }, + "flowcell_kit": { + "enum": [ + "HiSeq 3000/4000 PE Cluster Kit", + "HiSeq 3000/4000 SBS Kit (50 cycles)", + "HiSeq 3000/4000 SBS Kit (150 cycles)", + "HiSeq 3000/4000 SBS Kit (300 cycles)", + "HiSeq 3000/4000 SBS Kit", + "HiSeq 3000/4000 SR Cluster Kit", + "TG HiSeq 3000/4000 SBS Kit (50 cycles)", + "TG HiSeq 3000/4000 SBS Kit (150 cycles)", + "TG HiSeq 3000/4000 SBS Kit (300 cycles)", + "TG HiSeq 3000/4000 PE ClusterKit", + "TG HiSeq 3000/4000 SR ClusterKit", + "HiSeq PE Cluster Kit v4 cBot", + "HiSeq SR Rapid Cluster Kit v2", + "HiSeq PE Rapid Cluster Kit v2", + "TG HiSeq Rapid PE Cluster Kit v2", + "HiSeq Rapid Duo cBot Sample Loading Kit", + "TG TruSeq Rapid Duo cBot Sample Loading Kit", + "HiSeq Rapid SBS Kit v2 (50 cycles)", + "HiSeq Rapid SBS Kit v2 (200 cycles)", + "HiSeq Rapid SBS Kit v2 (500 cycles)", + "TG HiSeq Rapid SBS Kit v2 (200 Cycle)", + "TG HiSeq Rapid SBS Kit v2 (50 Cycle)", + "HiSeq SR Cluster Kit v4 cBot", + "TG HiSeq SR Cluster Kit v4 - cBot", + "TG HiSeq PE Cluster Kit v4 - cBot", + "HiSeq X Ten Reagent Kit v2.5", + "HiSeq X Ten Reagent Kit v2.5 - 10 pack", + "HiSeq X Five Reagent Kit v2.5", + "MiSeq Reagent Kit v3 (150-cycle)", + "MiSeq Reagent Kit v3 (600-cycle)", + "TG MiSeq Reagent Kit v3 (600 cycle)", + "TG MiSeq Reagent Kit v3 (150 cycle)", + "MiSeq Reagent Kit v2 (50-cycles)", + "MiSeq Reagent Kit v2 (300-cycles)", + "MiSeq Reagent Kit v2 (500-cycles)", + "MiniSeq Rapid Reagent Kit (100 cycles)", + "MiniSeq High Output Reagent Kit (75-cycles)", + "MiniSeq High Output Reagent Kit (150-cycles)", + "NextSeq 1000/2000 P1 Reagents (300 Cycles)", + "NextSeq 1000/2000 P2 Reagents (100 Cycles) v3", + "NextSeq 1000/2000 P2 Reagents (200 Cycles) v3", + "NextSeq 1000/2000 P2 Reagents (300 Cycles) v3", + "NextSeq 2000 P3 Reagents (50 Cycles)", + "NextSeq 2000 P3 Reagents (100 Cycles)", + "NextSeq 2000 P3 Reagents (200 Cycles)", + "NextSeq 2000 P3 Reagents (300 Cycles)", + "NextSeq 1000/2000 Read and Index Primers", + "NextSeq 1000/2000 Index Primer Kit", + "NextSeq 1000/2000 Read Primer Kit", + "NextSeq 500/550 High Output Kit v2.5 (75 Cycles)", + "NextSeq 500/550 High Output Kit v2.5 (150 Cycles)", + "NextSeq 500/550 High Output Kit v2.5 (300 Cycles)", + "NextSeq 500/550 Mid Output Kit v2.5 (150 Cycles)", + "NextSeq 500/550 Mid Output Kit v2.5 (300 Cycles)", + "TG NextSeq 500/550 High Output Kit v2.5 (75 Cycles)", + "TG NextSeq 500/550 High Output Kit v2.5 (150 Cycles)", + "TG NextSeq 500/550 High Output Kit v2.5 (300 Cycles)", + "TG NextSeq 500/550 Mid Output Kit v2.5 (150 Cycles)", + "TG NextSeq 500/550 Mid Output Kit v2.5 (300 Cycles)", + "NovaSeq 6000 S4 Reagent Kit v1.5 (300 cycles)", + "NovaSeq 6000 S4 Reagent Kit v1.5 (200 cycles)", + "NovaSeq 6000 S4 Reagent Kit v1.5 (35 cycles)", + "NovaSeq 6000 S2 Reagent Kit v1.5 (300 cycles)", + "NovaSeq 6000 S2 Reagent Kit v1.5 (200 cycles)", + "NovaSeq 6000 S2 Reagent Kit v1.5 (100 cycles)", + "NovaSeq 6000 S1 Reagent Kit v1.5 (300 cycles)", + "NovaSeq 6000 S1 Reagent Kit v1.5 (200 cycles)", + "NovaSeq 6000 S1 Reagent Kit v1.5 (100 cycles)", + "NovaSeq 6000 SP Reagent Kit v1.5 (500 cycles)", + "NovaSeq 6000 SP Reagent Kit v1.5 (300 cycles)", + "NovaSeq 6000 SP Reagent Kit v1.5 (200 cycles)", + "NovaSeq 6000 SP Reagent Kit v1.5 (100 cycles)", + "NovaSeq 6000 SP Reagent Kit (100 cycles)", + "NovaSeq 6000 SP Reagent Kit (200 cycles)", + "NovaSeq 6000 SP Reagent Kit (300 cycles)", + "NovaSeq 6000 SP Reagent Kit (500 cycles)", + "NovaSeq 6000 S1 Reagent Kit (100 cycles)", + "NovaSeq 6000 S1 Reagent Kit (200 cycles)", + "NovaSeq 6000 S1 Reagent Kit (300 cycles)", + "NovaSeq 6000 S2 Reagent Kit (100 cycles)", + "NovaSeq 6000 S2 Reagent Kit (200 cycles)", + "NovaSeq 6000 S2 Reagent Kit (300 cycles)", + "NovaSeq 6000 S4 Reagent Kit (200 cycles)", + "NovaSeq 6000 S4 Reagent Kit (300 cycles)", + "NovaSeq 6000 S4 Reagent Kit (300 Cycles) - 10 pack", + "NovaSeq 6000 S4 Reagent Kit (300 Cycles) - 20 pack", + "NovaSeq 6000 S4 Reagent Kit (300 Cycles) - 40 pack", + "NovaSeq Library Tubes Accessory Pack (24 tubes)", + "NovaSeq XP 4-Lane Kit v1.5", + "NovaSeq XP 2-Lane Kit v1.5", + "NovaSeq XP 2-Lane Kit", + "NovaSeq Xp 4-Lane Kit", + "NovaSeq Xp Flow Cell Dock", + "NovaSeq Xp 2-Lane Manifold Pack", + "NovaSeq Xp 4-Lane Manifold Pack", + "PhiX Control v3", + "TG PhiX Control Kit v3", + "NextSeq PhiX Control Kit", + "TruSeq Dual Index Sequencing Primer Box, Single-Read", + "TruSeq Dual Index Sequencing Primer Box, Paired-End", + "TruSeq PE Cluster Kit v3-cBot-HS", + "TruSeq PE Cluster Kit v5-CS-GA", + "TruSeq SBS Kit v3-HS (200 cycles)", + "TruSeq SBS Kit v3-HS (50 cycles)", + "TG TruSeq SBS Kit v3 - HS (200-cycles)", + "TruSeq SBS Kit v5-GA", + "TruSeq SR Cluster Kit v3-cBot-HS", + "TG TruSeq SR Cluster Kit v1-cBot - HS", + "iSeq 100 i1 Reagent v2 (300-cycle)", + "iSeq 100 i1 Reagent v2 (300-cycle) 4 pack", + "iSeq 100 i1 Reagent v2 (300-cycle) 8 pack", + "Not Provided [GENEPIO:0001668]", + "Other [NCIT:C17649]" + ], + "examples": [ + "iSeq 100 i1 Reagent v2 (300-cycle) 8 pack" + ], + "ontology": "0", + "type": "string", + "description": "Flowcell sequencer used for sequencing the sample", + "classification": "Sequencing", + "label": "Flowcell Kit", + "fill_mode": "batch" + }, + "runID": { + "examples": [ + "NextSeq_GEN_320" + ], + "ontology": "NCIT_C117058", + "type": "string", + "description": "Unique sequencing run identifier.", + "classification": "Sequencing", + "label": "Runid", + "fill_mode": "batch" + }, + "sequencing_instrument_platform": { + "enum": [ + "Oxford Nanopore [OBI:0002750]", + "Illumina [OBI:0000759]", + "Ion Torrent [GENEPIO:0002683]", + "PacBio [GENEPIO:0001927]", + "BGI", + "MGI", + "Other [NCIT:C17649]" + ], + "examples": [ + "Illumina" + ], + "ontology": "GENEPIO_0000071", + "type": "string", + "description": "", + "classification": "Sequencing", + "label": "Sequencing Instrument Platform", + "fill_mode": "batch", + "minLenght": "1" + }, + "library_source": { + "enum": [ + "genomic", + "genomic single cell", + "transcriptomic", + "transcriptomic single cell", + "metagenomic", + "metatranscriptomic", + "synthetic", + "viral rna", + "other [NCIT:C17649]", + "Not Provided [GENEPIO:0001668]" + ], + "examples": [ + "metagenomic" + ], + "ontology": "GENEPIO_0001965", + "type": "string", + "description": "Molecule type used to make the library.", + "classification": "Sequencing", + "label": "Source material", + "fill_mode": "batch" + }, + "library_selection": { + "enum": [ + "RANDOM [NCIT:C60702]", + "PCR [GENEPIO:0001955]", + "RANDOM PCR [GENEPIO:0001957]", + "RT-PCR [GENEPIO:0001959]", + "HMPR [GENEPIO:0001949]", + "MF [GENEPIO:0001952]", + "repeat fractionation", + "size fractionation [GENEPIO:0001963]", + "MSLL [GENEPIO:0001954]", + "cDNA [GENEPIO:0001962]", + "ChIP [GENEPIO:0001947]", + "MNase [GENEPIO:0001953]", + "DNase [GENEPIO:0001948]", + "Hybrid Selection [GENEPIO:0001950]", + "Reduced Representation [GENEPIO:0001960]", + "Restriction Digest [GENEPIO:0001961]", + "5-methylcytidine antibody [GENEPIO:0001941]", + "MBD2 protein methyl-CpG binding domain [GENEPIO:0001951]", + "CAGE [GENEPIO:0001942]", + "RACE [GENEPIO:0001956]", + "MDA", + "padlock probes capture method", + "Oligo-dT", + "Inverse rRNA selection", + "ChIP-Seq [GENEPIO:0001947]", + "Other [NCIT:C17649]", + "Not Provided [GENEPIO:0001668]" + ], + "examples": [ + "RANDOM PCR" + ], + "ontology": "GENEPIO_0001940", + "type": "string", + "description": "Library capture method.", + "classification": "Sequencing", + "label": "Capture method", + "fill_mode": "batch" + }, + "library_strategy": { + "enum": [ + "Bisultife-Seq strategy [GENEPIO:0001975]", + "CTS strategy [GENEPIO:0001978]", + "ChIP-Seq strategy [GENEPIO:0001979]", + "DNase-Hypersensitivity strategy [GENEPIO:0001980]", + "EST strategy [GENEPIO:0001981]", + "FL-cDNA strategy [GENEPIO:0001983]", + "MB-Seq strategy [GENEPIO:0001984]", + "MNase-Seq strategy [GENEPIO:0001985]", + "MRE-Seq strategy [GENEPIO:0001986]", + "MeDIP-Seq strategy [GENEPIO:0001987]", + "RNA-Seq strategy [GENEPIO:0001990]", + "WCS strategy [GENEPIO:0001991]", + "WGS strategy [GENEPIO:0001992]", + "WXS strategy [GENEPIO:0001993]", + "Amplicon [GENEPIO:0001974]", + "Clone end strategy [GENEPIO:0001976]", + "Clone strategy [GENEPIO:0001977]", + "Finishing strategy [GENPEIO:0001982]", + "Other library strategy [GENEPIO:0001988]", + "Pool clone strategy [GENEPIO:0001989]" + ], + "examples": [ + "WGS" + ], + "ontology": "GENEPIO_0001973", + "type": "string", + "description": "Overall sequencing strategy or approach.", + "classification": "Sequencing", + "label": "Sequencing technique", + "fill_mode": "batch" + }, + "library_layout": { + "enum": [ + "Single [OBI:0002481]", + "Paired [OBI:0001852]" + ], + "examples": [ + "Paired" + ], + "ontology": "NCIT:C175894", + "type": "string", + "description": "Single or paired sequencing configuration", + "classification": "Sequencing", + "label": "Library Layout", + "fill_mode": "batch" + }, + "read_length": { + "examples": [ + "75" + ], + "ontology": "GENEPIO:0000076", + "type": "string", + "description": "number of base pairs per read", + "classification": "Sequencing", + "label": "Read Length", + "fill_mode": "batch" + }, + "sequence_file_R1_fastq": { + "examples": [ + "ABC123_S1_L001_R1_001.fastq.gz" + ], + "ontology": "GENEPIO:0001476", + "type": "string", + "description": "The user-specified filename of the r1 FASTQ file.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Sequence file R1 fastq", + "fill_mode": "sample" + }, + "sequence_file_R2_fastq": { + "examples": [ + "ABC123_S1_L001_R2_002.fastq.gz" + ], + "ontology": "GENEPIO:0001477", + "type": "string", + "description": "The user-specified filename of the r2 FASTQ file.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Sequence file R2 fastq", + "fill_mode": "sample" + }, + "sequence_file_R1_md5": { + "examples": [ + "b5242d60471e5a5a97b35531dbbe8c30" + ], + "ontology": "MS_1000568", + "type": "string", + "description": "Checksum value to validate successful file transmission", + "classification": "Bioinformatics and QC metrics fields", + "label": "Fastq md5 r1", + "fill_mode": "sample", + "minLenght": "1" + }, + "sequence_file_R2_md5": { + "examples": [ + "b5242d60471e5a5a97b35531dbbe8c30" + ], + "ontology": "MS_1000569", + "type": "string", + "description": "Checksum value to validate successful file transmission", + "classification": "Bioinformatics and QC metrics fields", + "label": "Sequence fastq R2 md5", + "fill_mode": "sample" + }, + "r1_fastq_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001478", + "type": "string", + "description": "The filepath of the r1 FASTQ file.", + "classification": "Files info", + "label": "Filepath R1 fastq", + "fill_mode": "batch" + }, + "r2_fastq_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001479", + "type": "string", + "description": "The filepath of the r2 FASTQ file.", + "classification": "Files info", + "label": "Filepath R2 fastq", + "fill_mode": "batch" + }, + "fastq_r1_md5": { + "examples": [ + "b5242d60471e5a5a97b35531dbbe8c30" + ], + "ontology": "MS_1000568", + "type": "string", + "description": "Checksum value to validate successful file transmission", + "classification": "Bioinformatics and QC metrics fields", + "label": "Fastq md5 r1", + "fill_mode": "sample", + "minLenght": "1" + }, + "fastq_r2_md5": { + "examples": [ + "b5242d60471e5a5a97b35531dbbe8c30" + ], + "ontology": "MS_1000569", + "type": "string", + "description": "Checksum value to validate successful file transmission", + "classification": "Bioinformatics and QC metrics fields", + "label": "Sequence fastq R2 md5", + "fill_mode": "sample" + }, + "fast5_filename": { + "examples": [ + "batch1a_sequences.fast5" + ], + "ontology": "GENEPIO:0001480", + "type": "string", + "description": "The user-specified filename of the FAST5 file.", + "classification": "Files info", + "label": "Filename fast5", + "fill_mode": "batch" + }, + "fast5_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/" + ], + "ontology": "GENEPIO:0001481", + "type": "string", + "description": "The filepath of the FAST5 file.", + "classification": "Files info", + "label": "Filepath fast5", + "fill_mode": "batch" + }, + "processing_date": { + "examples": [ + "e.g 20220705" + ], + "ontology": "0", + "type": "string", + "format": "date", + "description": "The time the processing of the data started", + "classification": "Bioinformatics and QC metrics fields", + "label": "Analysis date", + "fill_mode": "batch", + "minLenght": "1" + }, + "analysis_date": { + "examples": [ + "e.g 20220705" + ], + "ontology": "0", + "type": "string", + "format": "date", + "description": "The time of a sample analysis process", + "classification": "Bioinformatics and QC metrics fields", + "label": "Analysis date", + "fill_mode": "batch", + "minLenght": "1" + }, + "dehosting_method_software_name": { + "examples": [ + "KRAKEN2_KRAKEN2 " + ], + "ontology": "GENEPIO:0001459", + "type": "string", + "description": "The method used to remove host reads from the pathogen sequence.", + "classification": "Bioinformatic Analysis fields", + "label": "Dehosting Method", + "fill_mode": "batch" + }, + "dehosting_method_software_version": { + "examples": [ + "2.4.1" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "The method version used to remove host reads from the pathogen sequence.", + "classification": "Bioinformatic Analysis fields", + "label": "Dehosting Method Version", + "fill_mode": "batch" + }, + "reference_genome_accession": { + "examples": [ + "NC_045512.2" + ], + "ontology": "GENEPIO:0001485", + "type": "string", + "description": "A persistent, unique identifier of a genome database entry.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Reference genome accession", + "fill_mode": "batch" + }, + "bioinformatics_protocol_software_name": { + "examples": [ + "https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members" + ], + "ontology": "GENEPIO:0001489", + "type": "string", + "description": "The name of the bioinformatics protocol used.", + "classification": "Bioinformatic Analysis fields", + "label": "Bioinformatics protocol", + "fill_mode": "batch" + }, + "if_bioinformatic_protocol_is_other_specify": { + "examples": [ + "https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members" + ], + "ontology": "0", + "type": "string", + "description": "The name of the bioinformatics protocol used.", + "classification": "Bioinformatic Analysis fields", + "label": "If bioinformatics protocol Is Other, Specify", + "fill_mode": "batch", + "minLenght": "1" + }, + "bioinformatics_protocol_software_version": { + "examples": [ + "https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members" + ], + "ontology": "NCIT:C93490", + "type": "string", + "description": "The version number of the bioinformatics protocol used.", + "classification": "Bioinformatic Analysis fields", + "label": "Bioinformatics protocol version", + "fill_mode": "batch", + "minLenght": "1" + }, + "commercial_open_source_both": { + "enum": [ + "Commercial", + "Open Source", + "Both" + ], + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "If bioinformatics protocol used was open-source or commercial", + "classification": "Bioinformatic Analysis fields", + "label": "Commercial/Open-source/both", + "fill_mode": "batch" + }, + "preprocessing_software_name": { + "examples": [ + "fastp" + ], + "ontology": "MS_1002386", + "type": "string", + "description": "Software used for preprocessing step.", + "classification": "Bioinformatic Analysis fields", + "label": "Preprocessing software", + "fill_mode": "batch" + }, + "preprocessing_software_version": { + "examples": [ + "v5.3.1" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "Version of the preprocessing software used.", + "classification": "Bioinformatic Analysis fields", + "label": "Preprocessing software version", + "fill_mode": "batch" + }, + "if_preprocessing_other": { + "examples": [ + "" + ], + "ontology": "MS_1002386", + "type": "string", + "description": "Preprocessing software name other", + "classification": "Bioinformatic Analysis fields", + "label": "If preprocessing Is Other, Specify", + "fill_mode": "batch" + }, + "preprocessing_params": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Preprocessing parameters used.", + "classification": "Bioinformatic Analysis fields", + "label": "Preprocessing params", + "fill_mode": "batch" + }, + "mapping_software_name": { + "examples": [ + "bowtie2" + ], + "ontology": "topic:0102", + "type": "string", + "description": "Software used for mapping step.", + "classification": "Bioinformatic Analysis fields", + "label": "Mapping software", + "fill_mode": "batch" + }, + "mapping_software_version": { + "examples": [ + "v7.0.1" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "Version of the mapper used.", + "classification": "Bioinformatic Analysis fields", + "label": "Mapping software version", + "fill_mode": "batch" + }, + "if_mapping_other": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Mapping software used other ", + "classification": "Bioinformatic Analysis fields", + "label": "If mapping Is Other, Specify", + "fill_mode": "batch" + }, + "mapping_params": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Parameters used for mapping step.", + "classification": "Bioinformatic Analysis fields", + "label": "Mapping params", + "fill_mode": "batch" + }, + "assembly": { + "examples": [ + "Spades" + ], + "ontology": "NCIT_C63548", + "type": "string", + "description": "Software used for assembly of the pathogen genome.", + "classification": "Bioinformatic Analysis fields", + "label": "Assembly software", + "fill_mode": "batch" + }, + "assembly_version": { + "examples": [ + "Spades" + ], + "ontology": "0", + "type": "string", + "description": "Software used for assembly of the pathogen genome.", + "classification": "Bioinformatic Analysis fields", + "label": "Assembly software version", + "fill_mode": "batch" + }, + "if_assembly_other": { + "examples": [ + "v3.1" + ], + "ontology": "0", + "type": "string", + "description": "Assembly software version", + "classification": "Bioinformatic Analysis fields", + "label": "If assembly Is Other, Specify", + "fill_mode": "batch" + }, + "assembly_params": { + "examples": [ + "-k 127,56,27" + ], + "ontology": "0", + "type": "string", + "description": "Params used for genome assembly.", + "classification": "Bioinformatic Analysis fields", + "label": "Assembly params", + "fill_mode": "batch" + }, + "vcf_filename": { + "examples": [ + "Ivar" + ], + "ontology": "0", + "type": "string", + "description": "Name of the vcf file.", + "classification": "Bioinformatic Variants", + "label": "VCF filename", + "fill_mode": "batch" + }, + "variant_calling_software_name": { + "examples": [ + "Ivar" + ], + "ontology": "operation_3227", + "type": "string", + "description": "Software used for variant calling.", + "classification": "Bioinformatic Variants", + "label": "Variant calling software", + "fill_mode": "batch" + }, + "variant_calling_software_version": { + "examples": [ + "v4.1" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "Variant calling software version", + "classification": "Bioinformatic Variants", + "label": "Variant calling software version", + "fill_mode": "batch" + }, + "if_variant_calling_other": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Bioinformatic Variants", + "label": "If variant calling Is Other, Specify", + "fill_mode": "batch" + }, + "variant_calling_params": { + "examples": [ + "-t 0.5 -Q 20" + ], + "ontology": "0", + "type": "string", + "description": "Params used for variant calling", + "classification": "Bioinformatic Variants", + "label": "Variant calling params", + "fill_mode": "batch" + }, + "consensus_sequence_name": { + "ontology": "GENEPIO:0001460", + "type": "string", + "description": "The name of the consensus sequence.", + "examples": [ + "2018086 NC_045512.2" + ], + "classification": "Bioinformatic Analysis fields", + "label": "Consensus sequence name", + "fill_mode": "batch" + }, + "consensus_sequence_filename": { + "ontology": "GENEPIO:0001461", + "type": "string", + "description": "The name of the consensus sequence filename", + "examples": [ + "2018102.consensus.fa" + ], + "classification": "Bioinformatic Analysis fields", + "label": "Consensus sequence filename", + "fill_mode": "batch" + }, + "consensus_sequence_md5": { + "ontology": "0", + "type": "string", + "description": "The md5 of the consensus sequence.", + "examples": [ + "5gaskañlkdak3143242ñlkas" + ], + "classification": "Bioinformatic Analysis fields", + "label": "Consensus sequence name md5", + "fill_mode": "batch" + }, + "consensus_sequence_filepath": { + "examples": [ + "/User/Documents/RespLab/Data/ncov123assembly.fasta" + ], + "ontology": "GENEPIO:0001462", + "type": "string", + "description": "The filepath of the consesnsus sequence file.", + "classification": "Bioinformatic Analysis fields", + "label": "Consensus sequence filepath", + "fill_mode": "batch" + }, + "long_table_path": { + "examples": [ + "/User/Documents/RespLab/ncov123_longtable.tsv" + ], + "ontology": "0", + "type": "string", + "description": "The path where the long table including all variants and annotations is.", + "classification": "Bioinformatic Analysis fields", + "label": "Long table path", + "fill_mode": "batch" + }, + "consensus_sequence_software_name": { + "examples": [ + "Ivar" + ], + "ontology": "GENEPIO:0001463", + "type": "string", + "description": "The name of software used to generate the consensus sequence.", + "classification": "Bioinformatic Analysis fields", + "label": "Consensus software", + "fill_mode": "batch" + }, + "if_consensus_other": { + "examples": [ + "v1.3" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Bioinformatic Analysis fields", + "label": "If consensus Is Other, Specify", + "fill_mode": "batch" + }, + "consensus_sequence_software_version": { + "examples": [ + "1.3" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "The version of the software used to generate the consensus sequence.", + "classification": "Bioinformatic Analysis fields", + "label": "Consensus software version", + "fill_mode": "batch" + }, + "consensus_params": { + "examples": [ + "AF > 0.75" + ], + "ontology": "0", + "type": "string", + "description": "Parameters used for consensus generation", + "classification": "Bioinformatic Analysis fields", + "label": "Consensus params", + "fill_mode": "batch" + }, + "consensus_genome_length": { + "examples": [ + "38677" + ], + "ontology": "GENEPIO:0001483", + "type": "string", + "description": "Size of the assembled genome described as the number of base pairs.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Consensus genome length", + "fill_mode": "batch" + }, + "depth_of_coverage_threshold": { + "examples": [ + "10x" + ], + "ontology": "GENEPIO:0001475", + "type": "string", + "description": "The threshold used as a cut-off for the depth of coverage.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Depth of coverage threshold", + "fill_mode": "batch" + }, + "number_of_base_pairs_sequenced": { + "examples": [ + "387566" + ], + "ontology": "GENEPIO:0001482", + "type": "string", + "description": "The number of total base pairs generated by the sequencing process.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Number of base pairs sequenced ", + "fill_mode": "batch" + }, + "qc_filtered": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Percentage of read that pass quality control threshold ", + "classification": "Bioinformatics and QC metrics fields", + "label": "%qc filtered", + "fill_mode": "batch" + }, + "per_reads_host": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Percentage of reads mapped to host", + "classification": "Bioinformatics and QC metrics fields", + "label": "%reads host", + "fill_mode": "batch" + }, + "per_reads_virus": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Percentage of reads mapped to virus", + "classification": "Bioinformatics and QC metrics fields", + "label": "%reads virus", + "fill_mode": "batch" + }, + "per_unmapped": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Percentage of reads unmapped to virus or to host", + "classification": "Bioinformatics and QC metrics fields", + "label": "%unmapped", + "fill_mode": "batch" + }, + "depth_of_coverage_value": { + "examples": [ + "400x" + ], + "ontology": "GENEPIO:0001474", + "type": "string", + "description": "The average number of reads representing a given nucleotide in the reconstructed sequence.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Depth of coverage value ", + "fill_mode": "batch" + }, + "per_genome_greater_10x": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Percentage of genome with coverage greater than 10x", + "classification": "Bioinformatics and QC metrics fields", + "label": "% genome greater 10x", + "fill_mode": "batch" + }, + "per_Ns": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "Percentage of Ns", + "classification": "Bioinformatics and QC metrics fields", + "label": "%Ns", + "fill_mode": "batch" + }, + "ns_per_100_kbp": { + "examples": [ + "300" + ], + "ontology": "GENEPIO:0001484", + "type": "string", + "description": "The number of N symbols present in the consensus fasta sequence, per 100kbp of sequence.", + "classification": "Bioinformatics and QC metrics fields", + "label": "Ns per 100 kbp", + "fill_mode": "batch" + }, + "number_of_variants_in_consensus": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "The number of variants found in consensus sequence", + "classification": "Bioinformatic Variants", + "label": "Number of variants (AF greater 75%)", + "fill_mode": "batch" + }, + "number_of_variants_with_effect": { + "examples": [ + "" + ], + "ontology": "0", + "type": "string", + "description": "The number of missense variants", + "classification": "Bioinformatic Variants", + "label": "Number of variants with effect", + "fill_mode": "batch" + }, + "lineage_name": { + "examples": [ + "B.1.1.7" + ], + "ontology": "NCIT:C60792", + "type": "string", + "description": "The name of the lineage.", + "classification": "Lineage fields", + "label": "Lineage designation", + "fill_mode": "batch" + }, + "clade_name": { + "examples": [ + "B.1.1.7" + ], + "ontology": "NCIT:C179767", + "type": "string", + "description": "The name of the clade.", + "classification": "Lineage fields", + "label": "Clade designation", + "fill_mode": "batch" + }, + "clade_type_software_name": { + "examples": [ + "Pangolin" + ], + "ontology": "GENEPIO:0001501", + "type": "string", + "description": "The name of the software used to determine the clade/type.", + "classification": "Lineage fields", + "label": "Clade/Type identification software", + "fill_mode": "batch" + }, + "if_clade_type_other": { + "examples": [ + "Pangolin" + ], + "ontology": "GENEPIO:0001501", + "type": "string", + "description": "The name of the software used to determine the clade/type.", + "classification": "Lineage fields", + "label": "If Clade/Type Is Other, Specify", + "fill_mode": "batch" + }, + "lineage_analysis_software_name": { + "examples": [ + "Pangolin" + ], + "ontology": "GENEPIO:0001501", + "type": "string", + "description": "The name of the software used to determine the lineage/clade.", + "classification": "Lineage fields", + "label": "Lineage identification software", + "fill_mode": "batch" + }, + "if_lineage_identification_other": { + "examples": [ + "Nextclade" + ], + "ontology": "0", + "type": "string", + "description": "", + "classification": "Lineage fields", + "label": "If lineage identification Is Other, Specify", + "fill_mode": "batch" + }, + "lineage_algorithm_software_version": { + "examples": [ + "2.1.10" + ], + "ontology": "GENEPIO:0001502", + "type": "string", + "description": "The version of the software used to determine the lineage/clade.", + "classification": "Lineage fields", + "label": "Lineage/clade analysis software version", + "fill_mode": "batch" + }, + "lineage_analysis_software_version": { + "examples": [ + "2.1.10" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "The version of the software used to determine the lineage/clade.", + "classification": "Lineage fields", + "label": "Lineage software version", + "fill_mode": "batch" + }, + "lineage_analysis_scorpio_version": { + "examples": [ + "2.1.10" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "The version of the scorpio data used to determine the lineage/clade.", + "classification": "Lineage fields", + "label": "Lineage/clade analysis software version", + "fill_mode": "batch" + }, + "lineage_analysis_constellation_version": { + "examples": [ + "2.1.10" + ], + "ontology": "NCIT:C111093", + "type": "string", + "description": "The version of the constellations databases used to determine the lineage/clade.", + "classification": "Lineage fields", + "label": "Lineage/clade analysis software version", + "fill_mode": "batch" + }, + "lineage_analysis_date": { + "examples": [ + "2022/10/24" + ], + "ontology": "0", + "type": "string", + "format": "date", + "description": "Date when the lineage analysis was performed", + "classification": "Lineage fields", + "label": "lineage/clade analysis date", + "fill_mode": "batch" + }, + "variant_name": { + "ontology": "", + "type": "string", + "description": "The variant classification of the lineage/clade i.e. alpha, beta, etc.", + "examples": [ + "Variant of Concern (VOC) [GENEPIO:0100083]" + ], + "classification": "Lineage fields", + "label": "Variant Name", + "fill_mode": "batch" + }, + "variant_designation": { + "enum": [ + "Variant of Interest (VOI) [GENEPIO:0100082]", + "Variant of Concern (VOC) [GENEPIO:0100083]", + "Variant Under Monitoring (VUM) [GENEPIO:0100279]" + ], + "ontology": "GENEPIO:0001503", + "type": "string", + "description": "The variant classification of the lineage/clade i.e. variant, variant of concern.", + "examples": [ + "Variant of Concern (VOC) [GENEPIO:0100083]" + ], + "classification": "Lineage fields", + "label": "Variant designation", + "fill_mode": "batch" + }, + "gene_name_1": { + "enum": [ + "E gene [GENEPIO:0100151]", + "M gene [GENEPIO:0100152]", + "N gene [GENEPIO:0100153]", + "Spike gene [GENEPIO:0100154]", + "orf1ab (rep) [GENEPIO:0100155]", + "orf1a (pp1a) [GENEPIO:0100156]", + "nsp11 [GENEPIO:0100157]", + "nsp1 [GENEPIO:0100158]", + "nsp2 [GENEPIO:0100159]", + "nsp3 [GENEPIO:0100160]", + "nsp4 [GENEPIO:0100161]", + "nsp5 [GENEPIO:0100162]", + "nsp6 [GENEPIO:0100163]", + "nsp7 [GENEPIO:0100164]", + "nsp8 [GENEPIO:0100165]", + "nsp9 [GENEPIO:0100166]", + "nsp10 [GENEPIO:0100167]", + "RdRp gene (nsp12) [GENEPIO:0100168]", + "hel gene (nsp13) [GENEPIO:0100169]", + "exoN gene (nsp14) [GENEPIO:0100170]", + "nsp15 [GENEPIO:0100171]", + "nsp16 [GENEPIO:0100172]", + "orf3a [GENEPIO:0100173]", + "orf3b [GENEPIO:0100174]", + "orf6 (ns6) [GENEPIO:0100175]", + "orf7a [GENEPIO:0100176]", + "orf7b (ns7b) [GENEPIO:0100177]", + "orf8 (ns8) [GENEPIO:0100178]", + "orf9b [GENEPIO:0100179]", + "orf9c [GENEPIO:0100180]", + "orf10 [GENEPIO:0100181]", + "orf14 [GENEPIO:0100182]", + "SARS-COV-2 5' UTR [GENEPIO:0100183]", + "Other [NCIT:C124261]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001507", + "type": "string", + "description": "The name of the gene used in the diagnostic RT-PCR test.", + "examples": [ + "E gene [GENEPIO:0100151]" + ], + "classification": "Pathogen diagnostic testing", + "label": "Gene Name 1", + "fill_mode": "batch" + }, + "diagnostic_pcr_Ct_value_1": { + "examples": [ + "21" + ], + "ontology": "GENEPIO:0001509", + "type": "string", + "description": "The Ct value result from a diagnostic SARS-CoV-2 RT-PCR test.", + "classification": "Pathogen diagnostic testing", + "label": "Diagnostic Pcr Ct Value 1", + "fill_mode": "batch" + }, + "gene_name_2": { + "enum": [ + "E gene [GENEPIO:0100151]", + "M gene [GENEPIO:0100152]", + "N gene [GENEPIO:0100153]", + "Spike gene [GENEPIO:0100154]", + "orf1ab (rep) [GENEPIO:0100155]", + "orf1a (pp1a) [GENEPIO:0100156]", + "nsp11 [GENEPIO:0100157]", + "nsp1 [GENEPIO:0100158]", + "nsp2 [GENEPIO:0100159]", + "nsp3 [GENEPIO:0100160]", + "nsp4 [GENEPIO:0100161]", + "nsp5 [GENEPIO:0100162]", + "nsp6 [GENEPIO:0100163]", + "nsp7 [GENEPIO:0100164]", + "nsp8 [GENEPIO:0100165]", + "nsp9 [GENEPIO:0100166]", + "nsp10 [GENEPIO:0100167]", + "RdRp gene (nsp12) [GENEPIO:0100168]", + "hel gene (nsp13) [GENEPIO:0100169]", + "exoN gene (nsp14) [GENEPIO:0100170]", + "nsp15 [GENEPIO:0100171]", + "nsp16 [GENEPIO:0100172]", + "orf3a [GENEPIO:0100173]", + "orf3b [GENEPIO:0100174]", + "orf6 (ns6) [GENEPIO:0100175]", + "orf7a [GENEPIO:0100176]", + "orf7b (ns7b) [GENEPIO:0100177]", + "orf8 (ns8) [GENEPIO:0100178]", + "orf9b [GENEPIO:0100179]", + "orf9c [GENEPIO:0100180]", + "orf10 [GENEPIO:0100181]", + "orf14 [GENEPIO:0100182]", + "SARS-COV-2 5' UTR [GENEPIO:0100183]", + "Other [NCIT:C124261]", + "Not Applicable [GENEPIO:0001619]", + "Not Collected [GENEPIO:0001620]", + "Not Provided [GENEPIO:0001668]", + "Missing [GENEPIO:0001618]", + "Restricted Access [GENEPIO:0001810]" + ], + "ontology": "GENEPIO:0001510", + "type": "string", + "description": "The name of the gene used in the diagnostic RT-PCR test.", + "examples": [ + "RdRp gene (nsp12) [GENEPIO:0100168]" + ], + "classification": "Pathogen diagnostic testing", + "label": "Gene Name 2", + "fill_mode": "batch" + }, + "diagnostic_pcr_Ct_value_2": { + "examples": [ + "36" + ], + "ontology": "GENEPIO:0001512", + "type": "string", + "description": "The cycle threshold (CT) value result from a diagnostic SARS-CoV-2 RT-PCR test.", + "classification": "Pathogen diagnostic testing", + "label": "Diagnostic Pcr Ct Value-2", + "fill_mode": "batch" + }, + "authors": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001517", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Authors", + "fill_mode": "batch" + }, + "gisaid_submitter_id": { + "examples": [ + "" + ], + "ontology": "NCIT:C54269", + "type": "string", + "description": "GISAID sequence ID.", + "classification": "Public databases", + "label": "GISAID submitter id", + "fill_mode": "sample" + }, + "gisaid_accession_id": { + "examples": [ + "NCIT:C180324" + ], + "ontology": "NCIT:C180324", + "type": "string", + "description": "GISAID sequence ID.", + "classification": "Public databases", + "label": "GISAID id", + "fill_mode": "sample" + }, + "gisaid_virus_name": { + "examples": [ + "hCoV-19/Canada/prov_rona_99/2020" + ], + "ontology": "GENEPIO:0100282", + "type": "string", + "description": "The user-defined GISAID virus name assigned to the sequence.", + "classification": "Public databases", + "label": "GISAID Virus Name", + "fill_mode": "sample" + }, + "gisaid_covv_type": { + "examples": ["betacoronavirus"], + "ontology": "NCIT:C25284", + "type": "string", + "description": "default must remain 'betacoronavirus'", + "clasification": "Database Identifiers", + "label": "GISAID covv type", + "fill_mode": "sample" + }, + "ena_analysis_accession": { + "examples": [ + "" + ], + "ontology": "GENEPIO_0001145", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Analysis Accession", + "fill_mode": "batch" + }, + "ena_study_accession": { + "examples": [ + "e.g PRJEB39632" + ], + "ontology": "GENEPIO_0001136", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Study accession", + "fill_mode": "batch" + }, + "ena_sample_accession": { + "examples": [ + "e.g SAMEA7098096" + ], + "ontology": "GENEPIO_0001139", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "ENA Sample ID", + "fill_mode": "batch" + }, + "ena_experiment_accession": { + "examples": [ + "e.g ERX4331406" + ], + "ontology": "BU_ISCIII:015", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Experiment Accession", + "fill_mode": "batch" + }, + "ena_run_accession": { + "examples": [ + "e.g ERX4331406" + ], + "ontology": "BU_ISCIII:016", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Run Accession", + "fill_mode": "batch" + }, + "ena_submission_accession": { + "examples": [ + "e.g ERA2794974" + ], + "ontology": "BU_ISCIII:017", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Submission Accession", + "fill_mode": "batch" + }, + "ena_experiment_title": { + "examples": [ + "e.g Illumina MiSeq paired end sequencing" + ], + "ontology": "ORNASEQ_0000004", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Experiment title", + "fill_mode": "batch" + }, + "study_title": { + "examples": [ + "e.g SARS-CoV-2 genomes from late April in Stockholm" + ], + "ontology": "OPMI_0000380", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Study title", + "fill_mode": "batch" + }, + "ena_study_alias": { + "examples": [ + "e.g Sweden" + ], + "ontology": "SIO_001066", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Study alias", + "table": [ + "study", + "experiments" + ] + }, + "broker_name": { + "examples": [ + "P17157_1007" + ], + "ontology": "BU_ISCIII:045", + "type": "string", + "description": "", + "classification": "Public databases", + "label": "Broker Name", + "fill_mode": "batch" + }, + "ena_first_created_date": { + "examples": [ + "e.g 2020-08-07" + ], + "ontology": "NCIT_C164483", + "type": "string", + "format": "date", + "description": "", + "classification": "Public databases", + "label": "First created date", + "fill_mode": "batch" + }, + "host health state": { + "examples": [ + "" + ], + "ontology": "GENEPIO:0001388", + "type": "string", + "description": " Status of the host", + "clasification": "Host information", + "label": "Host health state", + "table": [ + "sample" + ] + }, + "ena_sample_description":{ + "examples": [ + "Sample from Belgian Covid-19 patient. Sample was obtained at the Hospital AZ Rivierenland, in Antwerp, Belgium." + ], + "ontology": "sep:00196", + "type": "string", + "description": "Free text description of the sample.", + "clasification": "Sample collection and processing", + "label": "Sample Description", + "table": "sample" + }, + "file_format": { + "examples": [ + "BAM,CRAM,FASTQ" + ], + "enum": [ + "BAM [format:2572]", + "CRAM [format:3462]", + "FASTQ [format:1930]" + ], + "ontology": "NMR:1001459", + "type": "string", + "description": "The run data file model.", + "clasification": "Submission ENA", + "label": "File format", + "table": [ + "experiments" + ] + }, + "collector_name": { + "examples": [ + "John Smith, unknown" + ], + "ontology": "GENEPIO:0001797", + "type": "string", + "description": "Name of the person who collected the specimen", + "clasification": "Sample collection and processing", + "label": "Sample collector name" + }, + "experiment_alias": { + "examples": [ + "experiment_alias_7a" + ], + "ontology": "NCIT_C42790", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Experiment alias" + }, + "run_alias": { + "examples": [ + "e.g ena-EXPERIMENT-KAROLINSKA INSITUTET-29-07-2020-14:50:07:151-1" + ], + "ontology": "NCIT_C47911", + "type": "string", + "description": "", + "clasification": "Submission ENA", + "label": "Run Alias" + } + } +} diff --git a/relecov_tools/schema/sample_relecov.xml b/relecov_tools/schema/sample_relecov.xml new file mode 100644 index 00000000..4715010b --- /dev/null +++ b/relecov_tools/schema/sample_relecov.xml @@ -0,0 +1 @@ +SARS Sample 12697049Severe acute respiratory syndrome coronavirus 2SARS-CoV-2 Sample #1sample_nameID_001collecting_institutionHospital Universitario de Basurtogeographic_location_(country_and/or_sea)SpainisolateID_001host_scientific_nameHomo sapienshost_common_nameHumanhost_subject_id#131instrument_modelIllumina MiSeqfile_nameABC123_S1_L001_R1_001.fasstq.gztax_id2697049scientific_namesevere acute respiratory syndrome coronavirus 2common_nameSARS-CoV-2sample_descriptionSARS-CoV-2 Sample #1library_sourceMETAGENOMIClibrary_selectionRANDOM PCRlibrary_strategyWGSlibrary_layoutPAIRED \ No newline at end of file diff --git a/relecov_tools/sftp_client.py b/relecov_tools/sftp_client.py new file mode 100644 index 00000000..d9de82ca --- /dev/null +++ b/relecov_tools/sftp_client.py @@ -0,0 +1,266 @@ +import logging +import os +import paramiko +import rich.console +import stat +import sys +from relecov_tools.config_json import ConfigJson +import relecov_tools.utils + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class SftpRelecov: + """Class to handle SFTP connection with remote server. It uses paramiko library to establish + the connection. The class can be used to upload and download files from the remote server. + The class can be initialized with a configuration file and with the username and password. + If the configuration file is not provided, the class will try to read the configuration from + the environment variables. If configuration file is provided, the class will read the + configuration from the file. The format of the configuration file should be a json file with + the following keys: + { + "sftp_server": "server_name", + "sftp_port": "port_number" + } + """ + + def __init__(self, conf_file=None, username=None, password=None): + if conf_file is None: + config_json = ConfigJson() + self.sftp_server = config_json.get_topic_data("sftp_handle", "sftp_server") + self.sftp_port = config_json.get_topic_data("sftp_handle", "sftp_port") + else: + config_json = conf_file + if not os.path.isfile(conf_file): + log.error("Configuration file %s does not exists", conf_file) + stderr.print( + "[red] Configuration file does not exist. " + conf_file + "!" + ) + sys.exit(1) + j_data = relecov_tools.utils.read_json_file(conf_file) + try: + self.sftp_server = j_data["sftp_server"] + self.sftp_port = j_data["sftp_port"] + except KeyError as e: + log.error("Could not find the key %s in the config file", e) + stderr.print( + "[red] Could not find the key " + e + "in config file " + conf_file + ) + sys.exit(1) + self.user_name = username + self.password = password + self.client = paramiko.SSHClient() + self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + def open_connection(self): + """Establishing sftp connection""" + log.info("Setting credentials for SFTP connection with remote server") + self.client.connect( + hostname=self.sftp_server, + port=self.sftp_port, + username=self.user_name, + password=self.password, + allow_agent=False, + look_for_keys=False, + ) + try: + log.info("Trying to establish SFTP connection") + self.sftp = self.client.open_sftp() + except Exception as e: + log.error("Could not establish SFTP connection: %s", e) + stderr.print("[red]Could not establish SFTP connection") + return False + return True + + def list_remote_folders(self, folder_name, recursive=False): + """Creates a directories list from the given client remote path + + Args: + folder_name (str): folder name in remote path + recursive (bool, optional): finds all subdirectories too. Defaults to False. + + Returns: + directory_list(list(str)): Names of all folders within remote folder + """ + log.info("Listing directories in %s", folder_name) + directory_list = [] + self.open_connection() + try: + content_list = self.sftp.listdir_attr(folder_name) + subfolders = any(stat.S_ISDIR(item.st_mode) for item in content_list) + except (FileNotFoundError, OSError) as e: + log.error("Invalid folder at remote sftp %s", e) + raise + if not subfolders: + return [folder_name] + + def recursive_list(folder_name): + try: + attribute_list = self.sftp.listdir_attr(folder_name) + except (FileNotFoundError, OSError) as e: + log.error("Invalid folder at remote sftp %s", e) + raise + for attribute in attribute_list: + if stat.S_ISDIR(attribute.st_mode): + abspath = os.path.join(folder_name, attribute.filename) + directory_list.append(abspath) + recursive_list(abspath) + else: + continue + return directory_list + + if recursive: + directory_list = recursive_list(folder_name) + if folder_name != ".": + directory_list.append(folder_name) + return directory_list + try: + directory_list = [ + item.filename for item in content_list if stat.S_ISDIR(item.st_mode) + ] + except AttributeError: + return False + self.close_connection() + return directory_list + + def get_file_list(self, folder_name): + """Return a tuple with file name and directory path from remote + + Args: + folder_name (str): name of folder in remote repository + + Returns: + file_list (list(str)): list of files in remote folder + """ + log.info("Listing files in %s", folder_name) + file_list = [] + content_list = self.sftp.listdir_attr(folder_name) + file_list = [ + os.path.join(folder_name, content.filename) + for content in content_list + if stat.S_ISREG(content.st_mode) + ] + return file_list + + def get_from_sftp(self, file, destination, exist_ok=False): + """Download a file from remote sftp + + Args: + file (str): path of the file in remote sftp + destination (str): local path of the file after download + exist_ok (bool): Skip download if file exists in local destination + + Returns: + bool: True if download was successful, False if it was not + """ + if os.path.exists(destination) and exist_ok: + return True + else: + try: + self.sftp.get(file, destination) + return True + except FileNotFoundError as e: + log.error("Unable to fetch file %s ", e) + return False + + def make_dir(self, folder_name): + """Create a new directory in remote sftp + + Args: + folder_name (str): name of the directory to be created + + Returns: + bool: True if directory was created, False if it was not + """ + try: + self.sftp.mkdir(folder_name) + return True + except FileExistsError: + log.error("Directory %s already exists", folder_name) + stderr.print("[red]Directory already exists") + return False + + def rename_file(self, old_name, new_name): + """Rename a file in remote sftp + + Args: + old_name (str): current name of the file + new_name (str): new name of the file + + Returns: + bool: True if file was renamed, False if it was not + """ + try: + self.sftp.rename(old_name, new_name) + return True + except FileNotFoundError: + log.error("File %s not found", old_name) + stderr.print("[red]File not found") + return False + + def remove_file(self, file_name): + """Remove a file from remote sftp + + Args: + file_name (str): name of the file to be removed + + Returns: + bool: True if file was removed, False if it was not + """ + try: + self.sftp.remove(file_name) + return True + except FileNotFoundError: + log.error("File %s not found", file_name) + stderr.print("[red]File not found") + return False + + def remove_dir(self, folder_name): + """Remove a directory from remote sftp + + Args: + folder_name (str): name of the directory to be removed + + Returns: + bool: True if directory was removed, False if it was not + """ + try: + self.sftp.rmdir(folder_name) + return True + except FileNotFoundError: + log.error("Directory %s not found", folder_name) + stderr.print("[red]Directory not found") + return False + + def upload_file(self, local_path, remote_file): + """Upload a file to remote sftp + + Args: + localpath (str): path of the file in local machine + remote_file (str): path of the file in remote sftp + + Returns: + bool: True if file was uploaded, False if it was not + """ + try: + self.sftp.put(local_path, remote_file) + return True + except FileNotFoundError as e: + log.error("File not found %s", e) + stderr.print("[red]File not found") + return False + + def close_connection(self): + log.info("Closing SFTP connection") + try: + self.sftp.close() + except NameError: + return False + log.info("SFTP connection closed") + return True diff --git a/relecov_tools/templates/ENA.project.xsd b/relecov_tools/templates/ENA.project.xsd new file mode 100644 index 00000000..18294fa8 --- /dev/null +++ b/relecov_tools/templates/ENA.project.xsd @@ -0,0 +1,203 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A short name of the project. + + + + + A short descriptive title for the project. + + + + + + A long description of the scope of the project. + + + + + + + + + + + + + + A project for grouping submitted data together. + + + + + + + + + + + + + + + + + + + + + A project for grouping other projects together. + + + + + + + + + + + + + Other projects related to this project. + + + + + + + + + + + Identifies the project using + an accession number. + + + + + + + + + Identifies the project using + an accession number. + + + + + + + + + Identifies the project using + an accession number. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/relecov_tools/templates/ENA_template_FILE.xml b/relecov_tools/templates/ENA_template_FILE.xml new file mode 100644 index 00000000..424d83e6 --- /dev/null +++ b/relecov_tools/templates/ENA_template_FILE.xml @@ -0,0 +1,122 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_LIBRARY_SELECTION.xml b/relecov_tools/templates/ENA_template_LIBRARY_SELECTION.xml new file mode 100644 index 00000000..ea6d6cda --- /dev/null +++ b/relecov_tools/templates/ENA_template_LIBRARY_SELECTION.xml @@ -0,0 +1,33 @@ + +RANDOM +PCR +RANDOM PCR +RT-PCR +HMPR +MF +repeat fractionation +size fractionation +MSLL +cDNA +cDNA_randomPriming +cDNA_oligo_dT +PolyA +Oligo-dT +Inverse rRNA +Inverse rRNA selection +ChIP +ChIP-Seq +MNase +DNase +Hybrid Selection +Reduced Representation +Restriction Digest +5-methylcytidine antibody +MBD2 protein methyl-CpG binding domain +CAGE +RACE +MDA +padlock probes capture method +other +unspecified + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_LIBRARY_SOURCE.xml b/relecov_tools/templates/ENA_template_LIBRARY_SOURCE.xml new file mode 100644 index 00000000..95689332 --- /dev/null +++ b/relecov_tools/templates/ENA_template_LIBRARY_SOURCE.xml @@ -0,0 +1,11 @@ + +GENOMIC +GENOMIC SINGLE CELL +TRANSCRIPTOMIC +TRANSCRIPTOMIC SINGLE CELL +METAGENOMIC +METATRANSCRIPTOMIC +SYNTHETIC +VIRAL RNA +OTHER + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_LIBRARY_STRATEGY.xml b/relecov_tools/templates/ENA_template_LIBRARY_STRATEGY.xml new file mode 100644 index 00000000..afb46af2 --- /dev/null +++ b/relecov_tools/templates/ENA_template_LIBRARY_STRATEGY.xml @@ -0,0 +1,41 @@ + +WGS +WGA +WXS +RNA-Seq +ssRNA-seq +miRNA-Seq +ncRNA-Seq +FL-cDNA +EST +Hi-C +ATAC-seq +WCS +RAD-Seq +CLONE +POOLCLONE +AMPLICON +CLONEEND +FINISHING +ChIP-Seq +MNase-Seq +DNase-Hypersensitivity +Bisulfite-Seq +CTS +MRE-Seq +MeDIP-Seq +MBD-Seq +Tn-Seq +VALIDATION +FAIRE-seq +SELEX +RIP-Seq +ChIA-PET +Synthetic-Long-Read +Targeted-Capture +Tethered Chromatin Conformation Capture +NOMe-Seq +ChM-Seq +GBS +OTHER + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_PLATFORM.xml b/relecov_tools/templates/ENA_template_PLATFORM.xml new file mode 100644 index 00000000..7cd494b1 --- /dev/null +++ b/relecov_tools/templates/ENA_template_PLATFORM.xml @@ -0,0 +1,102 @@ + + + 454 GS + 454 GS 20 + 454 GS FLX + 454 GS FLX+ + 454 GS FLX Titanium + 454 GS Junior + unspecified + + + HiSeq X Five + HiSeq X Ten + Illumina Genome Analyzer + Illumina Genome Analyzer II + Illumina Genome Analyzer IIx + Illumina HiScanSQ + Illumina HiSeq 1000 + Illumina HiSeq 1500 + Illumina HiSeq 2000 + Illumina HiSeq 2500 + Illumina HiSeq 3000 + Illumina HiSeq 4000 + Illumina HiSeq X + Illumina iSeq 100 + Illumina MiSeq + Illumina MiniSeq + Illumina NovaSeq 6000 + NextSeq 500 + NextSeq 550 + NextSeq 1000 + NextSeq 2000 + unspecified + + + Helicos HeliScope + unspecified + + + AB SOLiD System + AB SOLiD System 2.0 + AB SOLiD System 3.0 + AB SOLiD 3 Plus System + AB SOLiD 4 System + AB SOLiD 4hq System + AB SOLiD PI System + AB 5500 Genetic Analyzer + AB 5500xl Genetic Analyzer + AB 5500xl-W Genetic Analysis System + unspecified + + + Complete Genomics + unspecified + + + BGISEQ-50 + BGISEQ-500 + MGISEQ-2000RS + + + MinION + GridION + PromethION + unspecified + + + PacBio RS + PacBio RS II + Sequel + Sequel II + unspecified + + + Ion Torrent PGM + Ion Torrent Proton + Ion Torrent S5 + Ion Torrent S5 XL + Ion Torrent Genexus + Ion GeneStudio S5 + Ion GeneStudio S5 Prime + Ion GeneStudio S5 Plus + unspecified + + + AB 3730xL Genetic Analyzer + AB 3730 Genetic Analyzer + AB 3500xL Genetic Analyzer + AB 3500 Genetic Analyzer + AB 3130xL Genetic Analyzer + AB 3130 Genetic Analyzer + AB 310 Genetic Analyzer + unspecified + + + DNBSEQ-T7 + DNBSEQ-G400 + DNBSEQ-G50 + DNBSEQ-G400 FAST + unspecified + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_experiments.xml b/relecov_tools/templates/ENA_template_experiments.xml new file mode 100755 index 00000000..7d4cf7a8 --- /dev/null +++ b/relecov_tools/templates/ENA_template_experiments.xml @@ -0,0 +1,51 @@ + + + + + + ${row.title} + + + ${row.design_description} + + ${row.spot_descriptor} + + + + ${row.library_name} + + + + + + + + + ${row.library_construction_protocol} + + + + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + diff --git a/relecov_tools/templates/ENA_template_runs.xml b/relecov_tools/templates/ENA_template_runs.xml new file mode 100755 index 00000000..d788c8d1 --- /dev/null +++ b/relecov_tools/templates/ENA_template_runs.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + diff --git a/relecov_tools/templates/ENA_template_samples_ERC000011.xml b/relecov_tools/templates/ENA_template_samples_ERC000011.xml new file mode 100644 index 00000000..46cc304c --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000011.xml @@ -0,0 +1,216 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + cell_type + ${row['cell_type']} + + + + + dev_stage + ${row['dev_stage']} + + + + + germline + ${row['germline']} + + + + + tissue_lib + ${row['tissue_lib']} + + + + + tissue_type + ${row['tissue_type']} + + + + + collection_date + ${row['collection_date']} + + + + + isolation_source + ${row['isolation_source']} + + + + + lat_lon + ${row['lat_lon']} + + + + + collected_by + ${row['collected_by']} + + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + identified_by + ${row['identified_by']} + + + + + environmental_sample + ${row['environmental_sample']} + + + + + mating_type + ${row['mating_type']} + + + + + sex + ${row['sex']} + + + + + lab_host + ${row['lab_host']} + + + + + host scientific name + ${row['host scientific name']} + + + + + bio_material + ${row['bio_material']} + + + + + culture_collection + ${row['culture_collection']} + + + + + specimen_voucher + ${row['specimen_voucher']} + + + + + cultivar + ${row['cultivar']} + + + + + ecotype + ${row['ecotype']} + + + + + isolate + ${row['isolate']} + + + + + sub_species + ${row['sub_species']} + + + + + variety + ${row['variety']} + + + + + sub_strain + ${row['sub_strain']} + + + + + cell_line + ${row['cell_line']} + + + + + serotype + ${row['serotype']} + + + + + serovar + ${row['serovar']} + + + + + strain + ${row['strain']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000012.xml b/relecov_tools/templates/ENA_template_samples_ERC000012.xml new file mode 100644 index 00000000..b0353355 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000012.xml @@ -0,0 +1,480 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + air environmental package + ${row['air environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + ventilation rate + ${row['ventilation rate']} + + + + + ventilation type + ${row['ventilation type']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + barometric pressure + ${row['barometric pressure']} + + + + + humidity + ${row['humidity']} + + + + + pollutants + ${row['pollutants']} + + + + + solar irradiance + ${row['solar irradiance']} + + + + + wind direction + ${row['wind direction']} + + + + + wind speed + ${row['wind speed']} + + + + + temperature + ${row['temperature']} + + + + + carbon dioxide + ${row['carbon dioxide']} + + + + + carbon monoxide + ${row['carbon monoxide']} + + + + + oxygen + ${row['oxygen']} + + + + + respirable particulate matter + ${row['respirable particulate matter']} + + + + + volatile organic compounds + ${row['volatile organic compounds']} + + + + + sample salinity + ${row['sample salinity']} + + + + + methane + ${row['methane']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000013.xml b/relecov_tools/templates/ENA_template_samples_ERC000013.xml new file mode 100644 index 00000000..89ab78d7 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000013.xml @@ -0,0 +1,566 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + host-associated environmental package + ${row['host-associated environmental package']} + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + host body product + ${row['host body product']} + + + + + host dry mass + ${row['host dry mass']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + host disease status + ${row['host disease status']} + + + + + host common name + ${row['host common name']} + + + + + host subject id + ${row['host subject id']} + + + + + host age + ${row['host age']} + + + + + host taxid + ${row['host taxid']} + + + + + host body habitat + ${row['host body habitat']} + + + + + host body site + ${row['host body site']} + + + + + host life stage + ${row['host life stage']} + + + + + host height + ${row['host height']} + + + + + host length + ${row['host length']} + + + + + host growth conditions + ${row['host growth conditions']} + + + + + host substrate + ${row['host substrate']} + + + + + host total mass + ${row['host total mass']} + + + + + host infra-specific name + ${row['host infra-specific name']} + + + + + host infra-specific rank + ${row['host infra-specific rank']} + + + + + host phenotype + ${row['host phenotype']} + + + + + host body temperature + ${row['host body temperature']} + + + + + host color + ${row['host color']} + + + + + host shape + ${row['host shape']} + + + + + host sex + ${row['host sex']} + + + + + temperature + ${row['temperature']} + + + + + sample salinity + ${row['sample salinity']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + host blood pressure diastolic + ${row['host blood pressure diastolic']} + + + + + host blood pressure systolic + ${row['host blood pressure systolic']} + + + + + host diet + ${row['host diet']} + + + + + host last meal + ${row['host last meal']} + + + + + host family relationship + ${row['host family relationship']} + + + + + host genotype + ${row['host genotype']} + + + + + gravidity + ${row['gravidity']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000014.xml b/relecov_tools/templates/ENA_template_samples_ERC000014.xml new file mode 100644 index 00000000..a8a9f475 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000014.xml @@ -0,0 +1,608 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + + study completion status + ${row['study completion status']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + human-associated environmental package + ${row['human-associated environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + host body product + ${row['host body product']} + + + + + medical history performed + ${row['medical history performed']} + + + + + urine/collection method + ${row['urine/collection method']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + host HIV status + ${row['host HIV status']} + + + + + host disease status + ${row['host disease status']} + + + + + lung/pulmonary disorder + ${row['lung/pulmonary disorder']} + + + + + lung/nose-throat disorder + ${row['lung/nose-throat disorder']} + + + + + blood/blood disorder + ${row['blood/blood disorder']} + + + + + urine/kidney disorder + ${row['urine/kidney disorder']} + + + + + urine/urogenital tract disorder + ${row['urine/urogenital tract disorder']} + + + + + host subject id + ${row['host subject id']} + + + + + IHMC medication code + ${row['IHMC medication code']} + + + + + host age + ${row['host age']} + + + + + host body site + ${row['host body site']} + + + + + drug usage + ${row['drug usage']} + + + + + host height + ${row['host height']} + + + + + host body-mass index + ${row['host body-mass index']} + + + + + IHMC ethnicity + ${row['IHMC ethnicity']} + + + + + host occupation + ${row['host occupation']} + + + + + host total mass + ${row['host total mass']} + + + + + host phenotype + ${row['host phenotype']} + + + + + host body temperature + ${row['host body temperature']} + + + + + host sex + ${row['host sex']} + + + + + presence of pets or farm animals + ${row['presence of pets or farm animals']} + + + + + temperature + ${row['temperature']} + + + + + sample salinity + ${row['sample salinity']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + smoker + ${row['smoker']} + + + + + major diet change in last six months + ${row['major diet change in last six months']} + + + + + weight loss in last three months + ${row['weight loss in last three months']} + + + + + travel outside the country in last six months + ${row['travel outside the country in last six months']} + + + + + host diet + ${row['host diet']} + + + + + twin sibling presence + ${row['twin sibling presence']} + + + + + host last meal + ${row['host last meal']} + + + + + amniotic fluid/gestation state + ${row['amniotic fluid/gestation state']} + + + + + host family relationship + ${row['host family relationship']} + + + + + amniotic fluid/maternal health status + ${row['amniotic fluid/maternal health status']} + + + + + host genotype + ${row['host genotype']} + + + + + amniotic fluid/foetal health status + ${row['amniotic fluid/foetal health status']} + + + + + host pulse + ${row['host pulse']} + + + + + amniotic fluid/color + ${row['amniotic fluid/color']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000015.xml b/relecov_tools/templates/ENA_template_samples_ERC000015.xml new file mode 100644 index 00000000..527a756b --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000015.xml @@ -0,0 +1,512 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + human gut environmental package + ${row['human gut environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + host body product + ${row['host body product']} + + + + + medical history performed + ${row['medical history performed']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + gastrointestinal tract disorder + ${row['gastrointestinal tract disorder']} + + + + + liver disorder + ${row['liver disorder']} + + + + + host disease status + ${row['host disease status']} + + + + + host subject id + ${row['host subject id']} + + + + + IHMC medication code + ${row['IHMC medication code']} + + + + + host age + ${row['host age']} + + + + + host body site + ${row['host body site']} + + + + + host height + ${row['host height']} + + + + + host body-mass index + ${row['host body-mass index']} + + + + + IHMC ethnicity + ${row['IHMC ethnicity']} + + + + + host occupation + ${row['host occupation']} + + + + + host total mass + ${row['host total mass']} + + + + + host phenotype + ${row['host phenotype']} + + + + + host body temperature + ${row['host body temperature']} + + + + + host sex + ${row['host sex']} + + + + + temperature + ${row['temperature']} + + + + + sample salinity + ${row['sample salinity']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + special diet + ${row['special diet']} + + + + + host diet + ${row['host diet']} + + + + + host last meal + ${row['host last meal']} + + + + + host family relationship + ${row['host family relationship']} + + + + + host genotype + ${row['host genotype']} + + + + + host pulse + ${row['host pulse']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000016.xml b/relecov_tools/templates/ENA_template_samples_ERC000016.xml new file mode 100644 index 00000000..9e3f1e74 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000016.xml @@ -0,0 +1,506 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + human oral environmental package + ${row['human oral environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + host body product + ${row['host body product']} + + + + + medical history performed + ${row['medical history performed']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + nose/mouth/teeth/throat disorder + ${row['nose/mouth/teeth/throat disorder']} + + + + + host disease status + ${row['host disease status']} + + + + + host subject id + ${row['host subject id']} + + + + + IHMC medication code + ${row['IHMC medication code']} + + + + + host age + ${row['host age']} + + + + + host body site + ${row['host body site']} + + + + + host height + ${row['host height']} + + + + + host body-mass index + ${row['host body-mass index']} + + + + + IHMC ethnicity + ${row['IHMC ethnicity']} + + + + + host occupation + ${row['host occupation']} + + + + + host total mass + ${row['host total mass']} + + + + + host phenotype + ${row['host phenotype']} + + + + + host body temperature + ${row['host body temperature']} + + + + + host sex + ${row['host sex']} + + + + + temperature + ${row['temperature']} + + + + + sample salinity + ${row['sample salinity']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + time since last toothbrushing + ${row['time since last toothbrushing']} + + + + + host diet + ${row['host diet']} + + + + + host last meal + ${row['host last meal']} + + + + + host family relationship + ${row['host family relationship']} + + + + + host genotype + ${row['host genotype']} + + + + + host pulse + ${row['host pulse']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000017.xml b/relecov_tools/templates/ENA_template_samples_ERC000017.xml new file mode 100644 index 00000000..93098334 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000017.xml @@ -0,0 +1,512 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + human skin environmental package + ${row['human skin environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + host body product + ${row['host body product']} + + + + + medical history performed + ${row['medical history performed']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + dermatology disorder + ${row['dermatology disorder']} + + + + + host disease status + ${row['host disease status']} + + + + + host subject id + ${row['host subject id']} + + + + + IHMC medication code + ${row['IHMC medication code']} + + + + + host age + ${row['host age']} + + + + + host body site + ${row['host body site']} + + + + + host height + ${row['host height']} + + + + + host body-mass index + ${row['host body-mass index']} + + + + + IHMC ethnicity + ${row['IHMC ethnicity']} + + + + + host occupation + ${row['host occupation']} + + + + + host total mass + ${row['host total mass']} + + + + + host phenotype + ${row['host phenotype']} + + + + + host body temperature + ${row['host body temperature']} + + + + + host sex + ${row['host sex']} + + + + + temperature + ${row['temperature']} + + + + + sample salinity + ${row['sample salinity']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + time since last wash + ${row['time since last wash']} + + + + + dominant hand + ${row['dominant hand']} + + + + + host diet + ${row['host diet']} + + + + + host last meal + ${row['host last meal']} + + + + + host family relationship + ${row['host family relationship']} + + + + + host genotype + ${row['host genotype']} + + + + + host pulse + ${row['host pulse']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000018.xml b/relecov_tools/templates/ENA_template_samples_ERC000018.xml new file mode 100644 index 00000000..c30042b8 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000018.xml @@ -0,0 +1,554 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + human vaginal environmental package + ${row['human vaginal environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + host body product + ${row['host body product']} + + + + + medical history performed + ${row['medical history performed']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + gynecological disorder + ${row['gynecological disorder']} + + + + + urogenital disorder + ${row['urogenital disorder']} + + + + + host disease status + ${row['host disease status']} + + + + + host subject id + ${row['host subject id']} + + + + + IHMC medication code + ${row['IHMC medication code']} + + + + + host age + ${row['host age']} + + + + + host body site + ${row['host body site']} + + + + + host height + ${row['host height']} + + + + + host body-mass index + ${row['host body-mass index']} + + + + + IHMC ethnicity + ${row['IHMC ethnicity']} + + + + + host occupation + ${row['host occupation']} + + + + + host total mass + ${row['host total mass']} + + + + + host phenotype + ${row['host phenotype']} + + + + + host body temperature + ${row['host body temperature']} + + + + + host sex + ${row['host sex']} + + + + + temperature + ${row['temperature']} + + + + + sample salinity + ${row['sample salinity']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + menarche + ${row['menarche']} + + + + + sexual activity + ${row['sexual activity']} + + + + + pregnancy + ${row['pregnancy']} + + + + + douche + ${row['douche']} + + + + + birth control + ${row['birth control']} + + + + + menopause + ${row['menopause']} + + + + + HRT + ${row['HRT']} + + + + + hysterectomy + ${row['hysterectomy']} + + + + + host diet + ${row['host diet']} + + + + + host last meal + ${row['host last meal']} + + + + + host family relationship + ${row['host family relationship']} + + + + + host genotype + ${row['host genotype']} + + + + + host pulse + ${row['host pulse']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000019.xml b/relecov_tools/templates/ENA_template_samples_ERC000019.xml new file mode 100644 index 00000000..8af01c6e --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000019.xml @@ -0,0 +1,694 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + microbial mat/biofilm environmental package + ${row['microbial mat/biofilm environmental package']} + + + geographic location (depth) + ${row['geographic location (depth)']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + biomass + ${row['biomass']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + alkalinity + ${row['alkalinity']} + + + + + mean friction velocity + ${row['mean friction velocity']} + + + + + mean peak friction velocity + ${row['mean peak friction velocity']} + + + + + pressure + ${row['pressure']} + + + + + temperature + ${row['temperature']} + + + + + turbidity + ${row['turbidity']} + + + + + pH + ${row['pH']} + + + + + alkyl diethers + ${row['alkyl diethers']} + + + + + aminopeptidase activity + ${row['aminopeptidase activity']} + + + + + ammonium + ${row['ammonium']} + + + + + bacterial carbon production + ${row['bacterial carbon production']} + + + + + bishomohopanol + ${row['bishomohopanol']} + + + + + bromide + ${row['bromide']} + + + + + calcium + ${row['calcium']} + + + + + carbon/nitrogen ratio + ${row['carbon/nitrogen ratio']} + + + + + chloride + ${row['chloride']} + + + + + chlorophyll + ${row['chlorophyll']} + + + + + diether lipids + ${row['diether lipids']} + + + + + dissolved carbon dioxide + ${row['dissolved carbon dioxide']} + + + + + dissolved hydrogen + ${row['dissolved hydrogen']} + + + + + dissolved inorganic carbon + ${row['dissolved inorganic carbon']} + + + + + dissolved organic carbon + ${row['dissolved organic carbon']} + + + + + dissolved organic nitrogen + ${row['dissolved organic nitrogen']} + + + + + methane + ${row['methane']} + + + + + dissolved oxygen + ${row['dissolved oxygen']} + + + + + glucosidase activity + ${row['glucosidase activity']} + + + + + magnesium + ${row['magnesium']} + + + + + n-alkanes + ${row['n-alkanes']} + + + + + nitrate + ${row['nitrate']} + + + + + nitrite + ${row['nitrite']} + + + + + nitrogen + ${row['nitrogen']} + + + + + organic carbon + ${row['organic carbon']} + + + + + organic matter + ${row['organic matter']} + + + + + organic nitrogen + ${row['organic nitrogen']} + + + + + particulate organic carbon + ${row['particulate organic carbon']} + + + + + petroleum hydrocarbon + ${row['petroleum hydrocarbon']} + + + + + phaeopigments + ${row['phaeopigments']} + + + + + phosphate + ${row['phosphate']} + + + + + phospholipid fatty acid + ${row['phospholipid fatty acid']} + + + + + potassium + ${row['potassium']} + + + + + redox potential + ${row['redox potential']} + + + + + salinity + ${row['salinity']} + + + + + total carbon + ${row['total carbon']} + + + + + silicate + ${row['silicate']} + + + + + sodium + ${row['sodium']} + + + + + total organic carbon + ${row['total organic carbon']} + + + + + water content + ${row['water content']} + + + + + sulfate + ${row['sulfate']} + + + + + sulfide + ${row['sulfide']} + + + + + total nitrogen + ${row['total nitrogen']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000020.xml b/relecov_tools/templates/ENA_template_samples_ERC000020.xml new file mode 100644 index 00000000..d26699db --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000020.xml @@ -0,0 +1,638 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + plant-associated environmental package + ${row['plant-associated environmental package']} + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + host dry mass + ${row['host dry mass']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + plant product + ${row['plant product']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + host wet mass + ${row['host wet mass']} + + + + + sample storage location + ${row['sample storage location']} + + + + + host disease status + ${row['host disease status']} + + + + + host common name + ${row['host common name']} + + + + + host age + ${row['host age']} + + + + + host taxid + ${row['host taxid']} + + + + + host life stage + ${row['host life stage']} + + + + + host height + ${row['host height']} + + + + + host length + ${row['host length']} + + + + + plant body site + ${row['plant body site']} + + + + + host total mass + ${row['host total mass']} + + + + + host infra-specific name + ${row['host infra-specific name']} + + + + + host infra-specific rank + ${row['host infra-specific rank']} + + + + + host phenotype + ${row['host phenotype']} + + + + + climate environment + ${row['climate environment']} + + + + + gaseous environment + ${row['gaseous environment']} + + + + + seasonal environment + ${row['seasonal environment']} + + + + + temperature + ${row['temperature']} + + + + + sample salinity + ${row['sample salinity']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + host genotype + ${row['host genotype']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + air temperature regimen + ${row['air temperature regimen']} + + + + + antibiotic regimen + ${row['antibiotic regimen']} + + + + + chemical mutagen + ${row['chemical mutagen']} + + + + + fertilizer regimen + ${row['fertilizer regimen']} + + + + + fungicide regimen + ${row['fungicide regimen']} + + + + + gravity + ${row['gravity']} + + + + + growth hormone regimen + ${row['growth hormone regimen']} + + + + + growth media + ${row['growth media']} + + + + + herbicide regimen + ${row['herbicide regimen']} + + + + + humidity regimen + ${row['humidity regimen']} + + + + + mineral nutrient regimen + ${row['mineral nutrient regimen']} + + + + + non-mineral nutrient regimen + ${row['non-mineral nutrient regimen']} + + + + + pesticide regimen + ${row['pesticide regimen']} + + + + + pH regimen + ${row['pH regimen']} + + + + + radiation regimen + ${row['radiation regimen']} + + + + + rainfall regimen + ${row['rainfall regimen']} + + + + + salt regimen + ${row['salt regimen']} + + + + + standing water regimen + ${row['standing water regimen']} + + + + + tissue culture growth media + ${row['tissue culture growth media']} + + + + + watering regimen + ${row['watering regimen']} + + + + + water temperature regimen + ${row['water temperature regimen']} + + + + + mechanical damage + ${row['mechanical damage']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000021.xml b/relecov_tools/templates/ENA_template_samples_ERC000021.xml new file mode 100644 index 00000000..50ecca6f --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000021.xml @@ -0,0 +1,658 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + sediment environmental package + ${row['sediment environmental package']} + + + geographic location (depth) + ${row['geographic location (depth)']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + biomass + ${row['biomass']} + + + + + density + ${row['density']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + alkyl diethers + ${row['alkyl diethers']} + + + + + aminopeptidase activity + ${row['aminopeptidase activity']} + + + + + ammonium + ${row['ammonium']} + + + + + bacterial carbon production + ${row['bacterial carbon production']} + + + + + bishomohopanol + ${row['bishomohopanol']} + + + + + bromide + ${row['bromide']} + + + + + calcium + ${row['calcium']} + + + + + carbon/nitrogen ratio + ${row['carbon/nitrogen ratio']} + + + + + chloride + ${row['chloride']} + + + + + chlorophyll + ${row['chlorophyll']} + + + + + diether lipids + ${row['diether lipids']} + + + + + dissolved carbon dioxide + ${row['dissolved carbon dioxide']} + + + + + dissolved hydrogen + ${row['dissolved hydrogen']} + + + + + dissolved inorganic carbon + ${row['dissolved inorganic carbon']} + + + + + dissolved organic carbon + ${row['dissolved organic carbon']} + + + + + dissolved organic nitrogen + ${row['dissolved organic nitrogen']} + + + + + methane + ${row['methane']} + + + + + dissolved oxygen + ${row['dissolved oxygen']} + + + + + glucosidase activity + ${row['glucosidase activity']} + + + + + magnesium + ${row['magnesium']} + + + + + n-alkanes + ${row['n-alkanes']} + + + + + nitrate + ${row['nitrate']} + + + + + nitrite + ${row['nitrite']} + + + + + nitrogen + ${row['nitrogen']} + + + + + organic carbon + ${row['organic carbon']} + + + + + organic matter + ${row['organic matter']} + + + + + organic nitrogen + ${row['organic nitrogen']} + + + + + particulate organic carbon + ${row['particulate organic carbon']} + + + + + petroleum hydrocarbon + ${row['petroleum hydrocarbon']} + + + + + phaeopigments + ${row['phaeopigments']} + + + + + phosphate + ${row['phosphate']} + + + + + phospholipid fatty acid + ${row['phospholipid fatty acid']} + + + + + potassium + ${row['potassium']} + + + + + redox potential + ${row['redox potential']} + + + + + salinity + ${row['salinity']} + + + + + total carbon + ${row['total carbon']} + + + + + silicate + ${row['silicate']} + + + + + sodium + ${row['sodium']} + + + + + total organic carbon + ${row['total organic carbon']} + + + + + water content + ${row['water content']} + + + + + sulfate + ${row['sulfate']} + + + + + sulfide + ${row['sulfide']} + + + + + total nitrogen + ${row['total nitrogen']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000022.xml b/relecov_tools/templates/ENA_template_samples_ERC000022.xml new file mode 100644 index 00000000..b129c400 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000022.xml @@ -0,0 +1,646 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + slope gradient + ${row['slope gradient']} + + + + + slope aspect + ${row['slope aspect']} + + + + + profile position + ${row['profile position']} + + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + + pooling of DNA extracts (if done) + ${row['pooling of DNA extracts (if done)']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + soil environmental package + ${row['soil environmental package']} + + + geographic location (depth) + ${row['geographic location (depth)']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + composite design/sieving (if any) + ${row['composite design/sieving (if any)']} + + + + + sample weight for DNA extraction + ${row['sample weight for DNA extraction']} + + + + + storage conditions (fresh/frozen/other) + ${row['storage conditions (fresh/frozen/other)']} + + + + + microbial biomass + ${row['microbial biomass']} + + + + + microbial biomass method + ${row['microbial biomass method']} + + + + + extreme_unusual_properties/salinity + ${row['extreme_unusual_properties/salinity']} + + + + + extreme_unusual_properties/salinity method + ${row['extreme_unusual_properties/salinity method']} + + + + + extreme_unusual_properties/heavy metals + ${row['extreme_unusual_properties/heavy metals']} + + + + + extreme_unusual_properties/heavy metals method + ${row['extreme_unusual_properties/heavy metals method']} + + + + + extreme_unusual_properties/Al saturation + ${row['extreme_unusual_properties/Al saturation']} + + + + + extreme_unusual_properties/Al saturation method + ${row['extreme_unusual_properties/Al saturation method']} + + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + link to climate information + ${row['link to climate information']} + + + + + link to classification information + ${row['link to classification information']} + + + + + links to additional analysis + ${row['links to additional analysis']} + + + + + current land use + ${row['current land use']} + + + + + current vegetation + ${row['current vegetation']} + + + + + current vegetation method + ${row['current vegetation method']} + + + + + horizon + ${row['horizon']} + + + + + horizon method + ${row['horizon method']} + + + + + mean annual and seasonal temperature + ${row['mean annual and seasonal temperature']} + + + + + mean annual and seasonal precipitation + ${row['mean annual and seasonal precipitation']} + + + + + soil_taxonomic/FAO classification + ${row['soil_taxonomic/FAO classification']} + + + + + soil_taxonomic/local classification + ${row['soil_taxonomic/local classification']} + + + + + soil_taxonomic/local classification method + ${row['soil_taxonomic/local classification method']} + + + + + soil type + ${row['soil type']} + + + + + soil type method + ${row['soil type method']} + + + + + drainage classification + ${row['drainage classification']} + + + + + texture + ${row['texture']} + + + + + texture method + ${row['texture method']} + + + + + pH + ${row['pH']} + + + + + pH method + ${row['pH method']} + + + + + water content method + ${row['water content method']} + + + + + total organic C method + ${row['total organic C method']} + + + + + total nitrogen method + ${row['total nitrogen method']} + + + + + total organic carbon + ${row['total organic carbon']} + + + + + water content + ${row['water content']} + + + + + total nitrogen + ${row['total nitrogen']} + + + + + history/previous land use + ${row['history/previous land use']} + + + + + history/previous land use method + ${row['history/previous land use method']} + + + + + history/crop rotation + ${row['history/crop rotation']} + + + + + history/agrochemical additions + ${row['history/agrochemical additions']} + + + + + history/tillage + ${row['history/tillage']} + + + + + history/fire + ${row['history/fire']} + + + + + history/flooding + ${row['history/flooding']} + + + + + history/extreme events + ${row['history/extreme events']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000023.xml b/relecov_tools/templates/ENA_template_samples_ERC000023.xml new file mode 100644 index 00000000..0e16a667 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000023.xml @@ -0,0 +1,548 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + wastewater/sludge environmental package + ${row['wastewater/sludge environmental package']} + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + biochemical oxygen demand + ${row['biochemical oxygen demand']} + + + + + chemical oxygen demand + ${row['chemical oxygen demand']} + + + + + pre-treatment + ${row['pre-treatment']} + + + + + primary treatment + ${row['primary treatment']} + + + + + reactor type + ${row['reactor type']} + + + + + secondary treatment + ${row['secondary treatment']} + + + + + sludge retention time + ${row['sludge retention time']} + + + + + tertiary treatment + ${row['tertiary treatment']} + + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + alkalinity + ${row['alkalinity']} + + + + + industrial effluent percent + ${row['industrial effluent percent']} + + + + + sewage type + ${row['sewage type']} + + + + + wastewater type + ${row['wastewater type']} + + + + + temperature + ${row['temperature']} + + + + + pH + ${row['pH']} + + + + + efficiency percent + ${row['efficiency percent']} + + + + + emulsions + ${row['emulsions']} + + + + + gaseous substances + ${row['gaseous substances']} + + + + + inorganic particles + ${row['inorganic particles']} + + + + + organic particles + ${row['organic particles']} + + + + + sample salinity + ${row['sample salinity']} + + + + + soluble inorganic material + ${row['soluble inorganic material']} + + + + + soluble organic material + ${row['soluble organic material']} + + + + + suspended solids + ${row['suspended solids']} + + + + + total phosphate + ${row['total phosphate']} + + + + + nitrate + ${row['nitrate']} + + + + + phosphate + ${row['phosphate']} + + + + + sodium + ${row['sodium']} + + + + + total nitrogen + ${row['total nitrogen']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000024.xml b/relecov_tools/templates/ENA_template_samples_ERC000024.xml new file mode 100644 index 00000000..43c57505 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000024.xml @@ -0,0 +1,798 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + water environmental package + ${row['water environmental package']} + + + geographic location (depth) + ${row['geographic location (depth)']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + biomass + ${row['biomass']} + + + + + density + ${row['density']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + alkalinity + ${row['alkalinity']} + + + + + atmospheric data + ${row['atmospheric data']} + + + + + conductivity + ${row['conductivity']} + + + + + water current + ${row['water current']} + + + + + fluorescence + ${row['fluorescence']} + + + + + light intensity + ${row['light intensity']} + + + + + mean friction velocity + ${row['mean friction velocity']} + + + + + mean peak friction velocity + ${row['mean peak friction velocity']} + + + + + downward PAR + ${row['downward PAR']} + + + + + photon flux + ${row['photon flux']} + + + + + pressure + ${row['pressure']} + + + + + temperature + ${row['temperature']} + + + + + tidal stage + ${row['tidal stage']} + + + + + pH + ${row['pH']} + + + + + total depth of water column + ${row['total depth of water column']} + + + + + alkyl diethers + ${row['alkyl diethers']} + + + + + aminopeptidase activity + ${row['aminopeptidase activity']} + + + + + ammonium + ${row['ammonium']} + + + + + bacterial carbon production + ${row['bacterial carbon production']} + + + + + bacterial production + ${row['bacterial production']} + + + + + bacterial respiration + ${row['bacterial respiration']} + + + + + bishomohopanol + ${row['bishomohopanol']} + + + + + bromide + ${row['bromide']} + + + + + calcium + ${row['calcium']} + + + + + carbon/nitrogen ratio + ${row['carbon/nitrogen ratio']} + + + + + chloride + ${row['chloride']} + + + + + chlorophyll + ${row['chlorophyll']} + + + + + diether lipids + ${row['diether lipids']} + + + + + dissolved carbon dioxide + ${row['dissolved carbon dioxide']} + + + + + dissolved hydrogen + ${row['dissolved hydrogen']} + + + + + dissolved inorganic carbon + ${row['dissolved inorganic carbon']} + + + + + dissolved inorganic nitrogen + ${row['dissolved inorganic nitrogen']} + + + + + dissolved inorganic phosphorus + ${row['dissolved inorganic phosphorus']} + + + + + dissolved organic carbon + ${row['dissolved organic carbon']} + + + + + dissolved organic nitrogen + ${row['dissolved organic nitrogen']} + + + + + dissolved oxygen + ${row['dissolved oxygen']} + + + + + glucosidase activity + ${row['glucosidase activity']} + + + + + magnesium + ${row['magnesium']} + + + + + n-alkanes + ${row['n-alkanes']} + + + + + nitrate + ${row['nitrate']} + + + + + nitrite + ${row['nitrite']} + + + + + nitrogen + ${row['nitrogen']} + + + + + organic carbon + ${row['organic carbon']} + + + + + organic matter + ${row['organic matter']} + + + + + organic nitrogen + ${row['organic nitrogen']} + + + + + particulate organic carbon + ${row['particulate organic carbon']} + + + + + particulate organic nitrogen + ${row['particulate organic nitrogen']} + + + + + petroleum hydrocarbon + ${row['petroleum hydrocarbon']} + + + + + phaeopigments + ${row['phaeopigments']} + + + + + phosphate + ${row['phosphate']} + + + + + phospholipid fatty acid + ${row['phospholipid fatty acid']} + + + + + potassium + ${row['potassium']} + + + + + primary production + ${row['primary production']} + + + + + redox potential + ${row['redox potential']} + + + + + salinity + ${row['salinity']} + + + + + silicate + ${row['silicate']} + + + + + sodium + ${row['sodium']} + + + + + soluble reactive phosphorus + ${row['soluble reactive phosphorus']} + + + + + sulfate + ${row['sulfate']} + + + + + sulfide + ${row['sulfide']} + + + + + suspended particulate matter + ${row['suspended particulate matter']} + + + + + total dissolved nitrogen + ${row['total dissolved nitrogen']} + + + + + total inorganic nitrogen + ${row['total inorganic nitrogen']} + + + + + total nitrogen + ${row['total nitrogen']} + + + + + total particulate carbon + ${row['total particulate carbon']} + + + + + total phosphorus + ${row['total phosphorus']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000025.xml b/relecov_tools/templates/ENA_template_samples_ERC000025.xml new file mode 100644 index 00000000..d2a78d80 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000025.xml @@ -0,0 +1,584 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + miscellaneous environmental package + ${row['miscellaneous environmental package']} + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + biomass + ${row['biomass']} + + + + + density + ${row['density']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + alkalinity + ${row['alkalinity']} + + + + + water current + ${row['water current']} + + + + + pressure + ${row['pressure']} + + + + + temperature + ${row['temperature']} + + + + + pH + ${row['pH']} + + + + + ammonium + ${row['ammonium']} + + + + + bromide + ${row['bromide']} + + + + + calcium + ${row['calcium']} + + + + + chloride + ${row['chloride']} + + + + + chlorophyll + ${row['chlorophyll']} + + + + + diether lipids + ${row['diether lipids']} + + + + + dissolved carbon dioxide + ${row['dissolved carbon dioxide']} + + + + + dissolved hydrogen + ${row['dissolved hydrogen']} + + + + + dissolved inorganic carbon + ${row['dissolved inorganic carbon']} + + + + + dissolved organic nitrogen + ${row['dissolved organic nitrogen']} + + + + + dissolved oxygen + ${row['dissolved oxygen']} + + + + + nitrate + ${row['nitrate']} + + + + + nitrite + ${row['nitrite']} + + + + + nitrogen + ${row['nitrogen']} + + + + + organic carbon + ${row['organic carbon']} + + + + + organic matter + ${row['organic matter']} + + + + + organic nitrogen + ${row['organic nitrogen']} + + + + + phosphate + ${row['phosphate']} + + + + + phospholipid fatty acid + ${row['phospholipid fatty acid']} + + + + + potassium + ${row['potassium']} + + + + + salinity + ${row['salinity']} + + + + + silicate + ${row['silicate']} + + + + + sodium + ${row['sodium']} + + + + + sulfate + ${row['sulfate']} + + + + + sulfide + ${row['sulfide']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000027.xml b/relecov_tools/templates/ENA_template_samples_ERC000027.xml new file mode 100644 index 00000000..c24d787d --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000027.xml @@ -0,0 +1,802 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + Event Date/Time + ${row['Event Date/Time']} + + + Latitude Start + ${row['Latitude Start']} + + + Longitude Start + ${row['Longitude Start']} + + + + Latitude End + ${row['Latitude End']} + + + + + Longitude End + ${row['Longitude End']} + + + + Depth + ${row['Depth']} + + + Protocol Label + ${row['Protocol Label']} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + environmental package + ${row['environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + Sampling Campaign + ${row['Sampling Campaign']} + + + Sampling Site + ${row['Sampling Site']} + + + Sampling Platform + ${row['Sampling Platform']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + biomass + ${row['biomass']} + + + + + density + ${row['density']} + + + + + oxygenation status of sample + ${row['oxygenation status of sample']} + + + + + organism count + ${row['organism count']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + Marine Region + ${row['Marine Region']} + + + Temperature + ${row['Temperature']} + + + Salinity + ${row['Salinity']} + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + + alkalinity + ${row['alkalinity']} + + + + + atmospheric data + ${row['atmospheric data']} + + + + + conductivity + ${row['conductivity']} + + + + + water current + ${row['water current']} + + + + + fluorescence + ${row['fluorescence']} + + + + + light intensity + ${row['light intensity']} + + + + + mean friction velocity + ${row['mean friction velocity']} + + + + + mean peak friction velocity + ${row['mean peak friction velocity']} + + + + + downward PAR + ${row['downward PAR']} + + + + + photon flux + ${row['photon flux']} + + + + + pressure + ${row['pressure']} + + + + + tidal stage + ${row['tidal stage']} + + + + + turbidity + ${row['turbidity']} + + + + + pH + ${row['pH']} + + + + + total depth of water column + ${row['total depth of water column']} + + + + + alkyl diethers + ${row['alkyl diethers']} + + + + + aminopeptidase activity + ${row['aminopeptidase activity']} + + + + + ammonium + ${row['ammonium']} + + + + + bacterial carbon production + ${row['bacterial carbon production']} + + + + + bacterial production + ${row['bacterial production']} + + + + + bacterial respiration + ${row['bacterial respiration']} + + + + + bishomohopanol + ${row['bishomohopanol']} + + + + + bromide + ${row['bromide']} + + + + + calcium + ${row['calcium']} + + + + + carbon/nitrogen ratio + ${row['carbon/nitrogen ratio']} + + + + + chloride + ${row['chloride']} + + + + + chlorophyll + ${row['chlorophyll']} + + + + + diether lipids + ${row['diether lipids']} + + + + + dissolved carbon dioxide + ${row['dissolved carbon dioxide']} + + + + + dissolved hydrogen + ${row['dissolved hydrogen']} + + + + + dissolved inorganic carbon + ${row['dissolved inorganic carbon']} + + + + + dissolved inorganic nitrogen + ${row['dissolved inorganic nitrogen']} + + + + + dissolved inorganic phosphorus + ${row['dissolved inorganic phosphorus']} + + + + + dissolved organic carbon + ${row['dissolved organic carbon']} + + + + + dissolved organic nitrogen + ${row['dissolved organic nitrogen']} + + + + + dissolved oxygen + ${row['dissolved oxygen']} + + + + + glucosidase activity + ${row['glucosidase activity']} + + + + + magnesium + ${row['magnesium']} + + + + + n-alkanes + ${row['n-alkanes']} + + + + + nitrate + ${row['nitrate']} + + + + + nitrite + ${row['nitrite']} + + + + + nitrogen + ${row['nitrogen']} + + + + + organic carbon + ${row['organic carbon']} + + + + + organic matter + ${row['organic matter']} + + + + + organic nitrogen + ${row['organic nitrogen']} + + + + + particulate organic carbon + ${row['particulate organic carbon']} + + + + + particulate organic nitrogen + ${row['particulate organic nitrogen']} + + + + + petroleum hydrocarbon + ${row['petroleum hydrocarbon']} + + + + + phaeopigments + ${row['phaeopigments']} + + + + + phosphate + ${row['phosphate']} + + + + + phospholipid fatty acid + ${row['phospholipid fatty acid']} + + + + + potassium + ${row['potassium']} + + + + + primary production + ${row['primary production']} + + + + + redox potential + ${row['redox potential']} + + + + + silicate + ${row['silicate']} + + + + + sodium + ${row['sodium']} + + + + + soluble reactive phosphorus + ${row['soluble reactive phosphorus']} + + + + + sulfate + ${row['sulfate']} + + + + + sulfide + ${row['sulfide']} + + + + + suspended particulate matter + ${row['suspended particulate matter']} + + + + + total dissolved nitrogen + ${row['total dissolved nitrogen']} + + + + + total inorganic nitrogen + ${row['total inorganic nitrogen']} + + + + + total nitrogen + ${row['total nitrogen']} + + + + + total particulate carbon + ${row['total particulate carbon']} + + + + + total phosphorus + ${row['total phosphorus']} + + + + + miscellaneous parameter + ${row['miscellaneous parameter']} + + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000028.xml b/relecov_tools/templates/ENA_template_samples_ERC000028.xml new file mode 100644 index 00000000..3b8e2832 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000028.xml @@ -0,0 +1,144 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + isolation_source + ${row['isolation_source']} + + + + lat_lon + ${row['lat_lon']} + + + + + collected_by + ${row['collected_by']} + + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + identified_by + ${row['identified_by']} + + + + + environmental_sample + ${row['environmental_sample']} + + + + + mating_type + ${row['mating_type']} + + + + host health state + ${row['host health state']} + + + + lab_host + ${row['lab_host']} + + + + host scientific name + ${row['host scientific name']} + + + + bio_material + ${row['bio_material']} + + + + + culture_collection + ${row['culture_collection']} + + + + + specimen_voucher + ${row['specimen_voucher']} + + + + isolate + ${row['isolate']} + + + + sub_species + ${row['sub_species']} + + + + + sub_strain + ${row['sub_strain']} + + + + + serovar + ${row['serovar']} + + + + + strain + ${row['strain']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000029.xml b/relecov_tools/templates/ENA_template_samples_ERC000029.xml new file mode 100644 index 00000000..df249a6a --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000029.xml @@ -0,0 +1,292 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + subject exposure + ${row['subject exposure']} + + + + + subject exposure duration + ${row['subject exposure duration']} + + + + + travel-relation + ${row['travel-relation']} + + + + + clinical setting + ${row['clinical setting']} + + + + + country of travel + ${row['country of travel']} + + + + collected_by + ${row['collected_by']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + identified_by + ${row['identified_by']} + + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + environmental_sample + ${row['environmental_sample']} + + + + mating_type + ${row['mating_type']} + + + + + genotype + ${row['genotype']} + + + + + pathotype + ${row['pathotype']} + + + + + host disease status + ${row['host disease status']} + + + + + host disease outcome + ${row['host disease outcome']} + + + + + host subject id + ${row['host subject id']} + + + + + host age + ${row['host age']} + + + + + host taxid + ${row['host taxid']} + + + + + host life stage + ${row['host life stage']} + + + + host health state + ${row['host health state']} + + + + host sex + ${row['host sex']} + + + + + lab_host + ${row['lab_host']} + + + + host scientific name + ${row['host scientific name']} + + + + passage_history + ${row['passage_history']} + + + + + sample storage conditions + ${row['sample storage conditions']} + + + + Is the sequenced pathogen host associated? + ${row['Is the sequenced pathogen host associated?']} + + + + bio_material + ${row['bio_material']} + + + + + culture_collection + ${row['culture_collection']} + + + + + specimen_voucher + ${row['specimen_voucher']} + + + + isolate + ${row['isolate']} + + + + sub_species + ${row['sub_species']} + + + + + sub_strain + ${row['sub_strain']} + + + + + sub_group + ${row['sub_group']} + + + + + sub_type + ${row['sub_type']} + + + + + serovar + ${row['serovar']} + + + + + strain + ${row['strain']} + + + + + host disease stage + ${row['host disease stage']} + + + + + isolation source host-associated + ${row['isolation source host-associated']} + + + + + host description + ${row['host description']} + + + + + isolation source non-host-associated + ${row['isolation source non-host-associated']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000030.xml b/relecov_tools/templates/ENA_template_samples_ERC000030.xml new file mode 100644 index 00000000..1c5bfad2 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000030.xml @@ -0,0 +1,184 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + Event Label + ${row['Event Label']} + + + + Event Date/Time Start + ${row['Event Date/Time Start']} + + + + Event Date/Time End + ${row['Event Date/Time End']} + + + + Latitude Start + ${row['Latitude Start']} + + + Longitude Start + ${row['Longitude Start']} + + + + Latitude End + ${row['Latitude End']} + + + + + Longitude End + ${row['Longitude End']} + + + + Depth + ${row['Depth']} + + + + Sample Collection Device + ${row['Sample Collection Device']} + + + + Protocol Label + ${row['Protocol Label']} + + + + Size Fraction Lower Threshold + ${row['Size Fraction Lower Threshold']} + + + + + Size Fraction Upper Threshold + ${row['Size Fraction Upper Threshold']} + + + + + Sample Status + ${row['Sample Status']} + + + + + Last Update Date + ${row['Last Update Date']} + + + + project name + ${row['project name']} + + + environmental package + ${row['environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + Sampling Campaign + ${row['Sampling Campaign']} + + + Sampling Station + ${row['Sampling Station']} + + + Sampling Platform + ${row['Sampling Platform']} + + + Marine Region + ${row['Marine Region']} + + + Salinity Sensor + ${row['Salinity Sensor']} + + + + Oxygen Sensor + ${row['Oxygen Sensor']} + + + + + Nitrate Sensor + ${row['Nitrate Sensor']} + + + + Temperature + ${row['Temperature']} + + + + Chlorophyll Sensor + ${row['Chlorophyll Sensor']} + + + + + Citation + ${row['Citation']} + + + + + Further Details + ${row['Further Details']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000031.xml b/relecov_tools/templates/ENA_template_samples_ERC000031.xml new file mode 100644 index 00000000..da0df22f --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000031.xml @@ -0,0 +1,456 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + finishing strategy + ${row['finishing strategy']} + + + + + annotation source + ${row['annotation source']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library construction method + ${row['library construction method']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + target gene + ${row['target gene']} + + + + + target subfragment + ${row['target subfragment']} + + + + + pcr primers + ${row['pcr primers']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + + pcr conditions + ${row['pcr conditions']} + + + + sequencing method + ${row['sequencing method']} + + + + sequence quality check + ${row['sequence quality check']} + + + + + chimera check + ${row['chimera check']} + + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + investigation type + ${row['investigation type']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + built environment environmental package + ${row['built environment environmental package']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + surface material + ${row['surface material']} + + + + + surface air contaminant + ${row['surface air contaminant']} + + + + + indoor surface + ${row['indoor surface']} + + + + indoor space + ${row['indoor space']} + + + filter type + ${row['filter type']} + + + heating and cooling system type + ${row['heating and cooling system type']} + + + + substructure type + ${row['substructure type']} + + + + light type + ${row['light type']} + + + building setting + ${row['building setting']} + + + building occupancy type + ${row['building occupancy type']} + + + space typical state + ${row['space typical state']} + + + typical occupant density + ${row['typical occupant density']} + + + occupancy at sampling + ${row['occupancy at sampling']} + + + occupant density at sampling + ${row['occupant density at sampling']} + + + ventilation type + ${row['ventilation type']} + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + sample size sorting method + ${row['sample size sorting method']} + + + + organism count + ${row['organism count']} + + + + specific host + ${row['specific host']} + + + + + health or disease status of specific host + ${row['health or disease status of specific host']} + + + + relative air humidity + ${row['relative air humidity']} + + + absolute air humidity + ${row['absolute air humidity']} + + + + surface humidity + ${row['surface humidity']} + + + + air temperature + ${row['air temperature']} + + + + surface temperature + ${row['surface temperature']} + + + + + surface moisture + ${row['surface moisture']} + + + + + surface moisture pH + ${row['surface moisture pH']} + + + + + dew point + ${row['dew point']} + + + + carbon dioxide + ${row['carbon dioxide']} + + + + subspecific genetic lineage + ${row['subspecific genetic lineage']} + + + + + trophic level + ${row['trophic level']} + + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + encoded traits + ${row['encoded traits']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000032.xml b/relecov_tools/templates/ENA_template_samples_ERC000032.xml new file mode 100644 index 00000000..3b9d32a0 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000032.xml @@ -0,0 +1,366 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + number of inoculated individuals + ${row['number of inoculated individuals']} + + + + + inoculation route + ${row['inoculation route']} + + + + + inoculation dose + ${row['inoculation dose']} + + + + + inoculation stock availability + ${row['inoculation stock availability']} + + + + + subject exposure + ${row['subject exposure']} + + + + + subject exposure duration + ${row['subject exposure duration']} + + + + + type exposure + ${row['type exposure']} + + + + + personal protective equipment + ${row['personal protective equipment']} + + + + + hospitalisation + ${row['hospitalisation']} + + + + + antiviral treatment + ${row['antiviral treatment']} + + + + + antiviral treatment initiation + ${row['antiviral treatment initiation']} + + + + + antiviral treatment dosage + ${row['antiviral treatment dosage']} + + + + + antiviral treatment duration + ${row['antiviral treatment duration']} + + + + + influenza vaccination type + ${row['influenza vaccination type']} + + + + + influenza vaccination date + ${row['influenza vaccination date']} + + + + + source of vaccination information + ${row['source of vaccination information']} + + + + + vaccine lot number + ${row['vaccine lot number']} + + + + + vaccine manufacturer + ${row['vaccine manufacturer']} + + + + + vaccine dosage + ${row['vaccine dosage']} + + + + + influenza-like illness at the time of sample collection + ${row['influenza-like illness at the time of sample collection']} + + + + + illness onset date + ${row['illness onset date']} + + + + + illness duration + ${row['illness duration']} + + + + + illness symptoms + ${row['illness symptoms']} + + + + + collection date + ${row['collection date']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + sample capture status + ${row['sample capture status']} + + + + + host disease outcome + ${row['host disease outcome']} + + + + host common name + ${row['host common name']} + + + host subject id + ${row['host subject id']} + + + + host age + ${row['host age']} + + + + host health state + ${row['host health state']} + + + host sex + ${row['host sex']} + + + host scientific name + ${row['host scientific name']} + + + influenza test method + ${row['influenza test method']} + + + influenza test result + ${row['influenza test result']} + + + other pathogens tested + ${row['other pathogens tested']} + + + other pathogens test result + ${row['other pathogens test result']} + + + + influenza virus type + ${row['influenza virus type']} + + + + + virus identifier + ${row['virus identifier']} + + + + + influenza strain unique number + ${row['influenza strain unique number']} + + + + + WHO/OIE/FAO clade (required for HPAI H5N1 viruses) + ${row['WHO/OIE/FAO clade (required for HPAI H5N1 viruses)']} + + + + + lineage:swl (required for H1N1 viruses) + ${row['lineage:swl (required for H1N1 viruses)']} + + + + collector name + ${row['collector name']} + + + collecting institution + ${row['collecting institution']} + + + + receipt date + ${row['receipt date']} + + + + + sample storage conditions + ${row['sample storage conditions']} + + + + + definition for seropositive sample + ${row['definition for seropositive sample']} + + + + + meaning of cut off value + ${row['meaning of cut off value']} + + + + + serotype (required for a seropositive sample) + ${row['serotype (required for a seropositive sample)']} + + + + + strain + ${row['strain']} + + + + + host habitat + ${row['host habitat']} + + + + + isolation source host-associated + ${row['isolation source host-associated']} + + + + + host description + ${row['host description']} + + + + + gravidity + ${row['gravidity']} + + + + + host behaviour + ${row['host behaviour']} + + + + + isolation source non-host-associated + ${row['isolation source non-host-associated']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000033.xml b/relecov_tools/templates/ENA_template_samples_ERC000033.xml new file mode 100644 index 00000000..efc27593 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000033.xml @@ -0,0 +1,240 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + subject exposure + ${row['subject exposure']} + + + + + subject exposure duration + ${row['subject exposure duration']} + + + + + type exposure + ${row['type exposure']} + + + + + personal protective equipment + ${row['personal protective equipment']} + + + + + hospitalisation + ${row['hospitalisation']} + + + + + illness duration + ${row['illness duration']} + + + + + illness symptoms + ${row['illness symptoms']} + + + + + collection date + ${row['collection date']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + sample capture status + ${row['sample capture status']} + + + + + host disease outcome + ${row['host disease outcome']} + + + + host common name + ${row['host common name']} + + + host subject id + ${row['host subject id']} + + + + host age + ${row['host age']} + + + + host health state + ${row['host health state']} + + + authors + ${row['authors']} + + + address + ${row['address']} + + + host sex + ${row['host sex']} + + + host scientific name + ${row['host scientific name']} + + + + virus identifier + ${row['virus identifier']} + + + + collector name + ${row['collector name']} + + + collecting institution + ${row['collecting institution']} + + + + receipt date + ${row['receipt date']} + + + + + sample storage conditions + ${row['sample storage conditions']} + + + + + definition for seropositive sample + ${row['definition for seropositive sample']} + + + + + serotype (required for a seropositive sample) + ${row['serotype (required for a seropositive sample)']} + + + + isolate + ${row['isolate']} + + + + strain + ${row['strain']} + + + + + host habitat + ${row['host habitat']} + + + + + isolation source host-associated + ${row['isolation source host-associated']} + + + + + host description + ${row['host description']} + + + + + gravidity + ${row['gravidity']} + + + + + host behaviour + ${row['host behaviour']} + + + + + isolation source non-host-associated + ${row['isolation source non-host-associated']} + + + + SUBMISSION_TOOL + ${tool_name} + + + ENA-CHECKLIST + ERC000033 + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + diff --git a/relecov_tools/templates/ENA_template_samples_ERC000034.xml b/relecov_tools/templates/ENA_template_samples_ERC000034.xml new file mode 100644 index 00000000..311846f4 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000034.xml @@ -0,0 +1,94 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + tissue_type + ${row['tissue_type']} + + + sex + ${row['sex']} + + + + date of birth + ${row['date of birth']} + + + + + date of death + ${row['date of death']} + + + + diagnosis + ${row['diagnosis']} + + + strain + ${row['strain']} + + + + tumor grading (OBI_0600002) + ${row['tumor grading (OBI_0600002)']} + + + + + treatment agent + ${row['treatment agent']} + + + + + treatment dose + ${row['treatment dose']} + + + + + treatment date + ${row['treatment date']} + + + + + Further Details + ${row['Further Details']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000035.xml b/relecov_tools/templates/ENA_template_samples_ERC000035.xml new file mode 100644 index 00000000..bca85bea --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000035.xml @@ -0,0 +1,240 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + cell_type + ${row['cell_type']} + + + + + dev_stage + ${row['dev_stage']} + + + + + organism part + ${row['organism part']} + + + + + ploidy + ${row['ploidy']} + + + + + infect + ${row['infect']} + + + + + protocol + ${row['protocol']} + + + + + sampling time point + ${row['sampling time point']} + + + + + initial time point + ${row['initial time point']} + + + + + growth condition + ${row['growth condition']} + + + + + genotype + ${row['genotype']} + + + + + sex + ${row['sex']} + + + + + age + ${row['age']} + + + + + genetic modification + ${row['genetic modification']} + + + + + phenotype + ${row['phenotype']} + + + + + cellular component + ${row['cellular component']} + + + + + individual + ${row['individual']} + + + + + disease staging + ${row['disease staging']} + + + + + immunoprecipitate + ${row['immunoprecipitate']} + + + + + replicate + ${row['replicate']} + + + + + cultivar + ${row['cultivar']} + + + + + ecotype + ${row['ecotype']} + + + + + cell_line + ${row['cell_line']} + + + + + strain + ${row['strain']} + + + + + time + ${row['time']} + + + + + dose + ${row['dose']} + + + + + chemical compound + ${row['chemical compound']} + + + + + experimental factor 1 + ${row['experimental factor 1']} + + + + + experimental factor 2 + ${row['experimental factor 2']} + + + + + experimental factor 3 + ${row['experimental factor 3']} + + + + + experimental factor 4 + ${row['experimental factor 4']} + + + + + experimental factor 5 + ${row['experimental factor 5']} + + + + + block + ${row['block']} + + + + + environmental stress + ${row['environmental stress']} + + + + + environmental history + ${row['environmental history']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000036.xml b/relecov_tools/templates/ENA_template_samples_ERC000036.xml new file mode 100644 index 00000000..672de586 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000036.xml @@ -0,0 +1,172 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + name of the sampling site + ${row['name of the sampling site']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + investigation type + ${row['investigation type']} + + + + surveillance target + ${row['surveillance target']} + + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + sampling time point + ${row['sampling time point']} + + + + + sample transportation temperature + ${row['sample transportation temperature']} + + + + + sample transportation date + ${row['sample transportation date']} + + + + + sample transportation time + ${row['sample transportation time']} + + + + + receipt date + ${row['receipt date']} + + + + sewage type + ${row['sewage type']} + + + + temperature + ${row['temperature']} + + + + + area of sampling site + ${row['area of sampling site']} + + + + + size of the catchment area + ${row['size of the catchment area']} + + + + + population size of the catchment area + ${row['population size of the catchment area']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000037.xml b/relecov_tools/templates/ENA_template_samples_ERC000037.xml new file mode 100644 index 00000000..12e05484 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000037.xml @@ -0,0 +1,620 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + ploidy + ${row['ploidy']} + + + + + number of replicons + ${row['number of replicons']} + + + + + extrachromosomal elements + ${row['extrachromosomal elements']} + + + + + estimated size + ${row['estimated size']} + + + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + + collected_by + ${row['collected_by']} + + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + identified_by + ${row['identified_by']} + + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + + environment (biome) + ${row['environment (biome)']} + + + + + environment (feature) + ${row['environment (feature)']} + + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + propagation + ${row['propagation']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + sampling time point + ${row['sampling time point']} + + + + plant structure + ${row['plant structure']} + + + plant developmental stage + ${row['plant developmental stage']} + + + + sampled age + ${row['sampled age']} + + + + + sample phenotype + ${row['sample phenotype']} + + + + + sample health state + ${row['sample health state']} + + + + + sample disease status + ${row['sample disease status']} + + + + + sample disease stage + ${row['sample disease stage']} + + + + + sample wet mass + ${row['sample wet mass']} + + + + + sample dry mass + ${row['sample dry mass']} + + + + + sample height + ${row['sample height']} + + + + + sample length + ${row['sample length']} + + + + + growth facility + ${row['growth facility']} + + + + + sample capture status + ${row['sample capture status']} + + + + + genotype + ${row['genotype']} + + + + + genetic modification + ${row['genetic modification']} + + + + + organism common name + ${row['organism common name']} + + + + + subspecific genetic lineage rank + ${row['subspecific genetic lineage rank']} + + + + + subspecific genetic lineage name + ${row['subspecific genetic lineage name']} + + + + + biological status + ${row['biological status']} + + + + + organism phenotype + ${row['organism phenotype']} + + + + + ancestral data + ${row['ancestral data']} + + + + + source material description + ${row['source material description']} + + + + + biotic relationship + ${row['biotic relationship']} + + + + + growth habit + ${row['growth habit']} + + + + + plant sex + ${row['plant sex']} + + + + + climate environment + ${row['climate environment']} + + + + + gaseous environment + ${row['gaseous environment']} + + + + + seasonal environment + ${row['seasonal environment']} + + + + + soil_taxonomic/FAO classification + ${row['soil_taxonomic/FAO classification']} + + + + + soil_taxonomic/local classification + ${row['soil_taxonomic/local classification']} + + + + + soil_taxonomic/local classification method + ${row['soil_taxonomic/local classification method']} + + + + + soil type + ${row['soil type']} + + + + + soil type method + ${row['soil type method']} + + + + + drainage classification + ${row['drainage classification']} + + + + + texture + ${row['texture']} + + + + + texture method + ${row['texture method']} + + + + + soil water content + ${row['soil water content']} + + + + + soil pH + ${row['soil pH']} + + + + plant growth medium + ${row['plant growth medium']} + + + + rooting conditions + ${row['rooting conditions']} + + + + + culture rooting medium + ${row['culture rooting medium']} + + + + + rooting medium macronutrients + ${row['rooting medium macronutrients']} + + + + + rooting medium micronutrients + ${row['rooting medium micronutrients']} + + + + + rooting medium organic supplements + ${row['rooting medium organic supplements']} + + + + + rooting medium carbon + ${row['rooting medium carbon']} + + + + + rooting medium regulators + ${row['rooting medium regulators']} + + + + + rooting medium solidifier + ${row['rooting medium solidifier']} + + + + + rooting medium pH + ${row['rooting medium pH']} + + + + + air temperature regimen + ${row['air temperature regimen']} + + + + + antibiotic regimen + ${row['antibiotic regimen']} + + + + + chemical mutagen + ${row['chemical mutagen']} + + + + + fertilizer regimen + ${row['fertilizer regimen']} + + + + + fungicide regimen + ${row['fungicide regimen']} + + + + + gravity + ${row['gravity']} + + + + + growth hormone regimen + ${row['growth hormone regimen']} + + + + + herbicide regimen + ${row['herbicide regimen']} + + + + + humidity regimen + ${row['humidity regimen']} + + + + + mineral nutrient regimen + ${row['mineral nutrient regimen']} + + + + + non-mineral nutrient regimen + ${row['non-mineral nutrient regimen']} + + + + + pesticide regimen + ${row['pesticide regimen']} + + + + + pH regimen + ${row['pH regimen']} + + + + + radiation regimen + ${row['radiation regimen']} + + + + + rainfall regimen + ${row['rainfall regimen']} + + + + + salt regimen + ${row['salt regimen']} + + + + + standing water regimen + ${row['standing water regimen']} + + + + + watering regimen + ${row['watering regimen']} + + + + + water temperature regimen + ${row['water temperature regimen']} + + + + + plant treatment + ${row['plant treatment']} + + + + + light regimen + ${row['light regimen']} + + + + + biotic regimen + ${row['biotic regimen']} + + + + + mechanical damage + ${row['mechanical damage']} + + + + + chemical administration + ${row['chemical administration']} + + + + + perturbation + ${row['perturbation']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000038.xml b/relecov_tools/templates/ENA_template_samples_ERC000038.xml new file mode 100644 index 00000000..48525157 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000038.xml @@ -0,0 +1,174 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + Event Date/Time + ${row['Event Date/Time']} + + + Latitude Start + ${row['Latitude Start']} + + + Longitude Start + ${row['Longitude Start']} + + + Depth + ${row['Depth']} + + + + Sample Collection Device + ${row['Sample Collection Device']} + + + + Protocol Label + ${row['Protocol Label']} + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + Sampling Campaign + ${row['Sampling Campaign']} + + + Sampling Station + ${row['Sampling Station']} + + + Sampling Platform + ${row['Sampling Platform']} + + + + storage conditions (fresh/frozen/other) + ${row['storage conditions (fresh/frozen/other)']} + + + + + sample health state + ${row['sample health state']} + + + + + sample disease status + ${row['sample disease status']} + + + + + Marine Region + ${row['Marine Region']} + + + + seabed habitat + ${row['seabed habitat']} + + + age + ${row['age']} + + + aquaculture origin + ${row['aquaculture origin']} + + + shellfish total weight + ${row['shellfish total weight']} + + + shellfish soft tissue weight + ${row['shellfish soft tissue weight']} + + + shell length + ${row['shell length']} + + + shell width + ${row['shell width']} + + + + adductor weight + ${row['adductor weight']} + + + + + gonad weight + ${row['gonad weight']} + + + + + shell markings + ${row['shell markings']} + + + + + toxin burden + ${row['toxin burden']} + + + + + treatment agent + ${row['treatment agent']} + + + + + chemical compound + ${row['chemical compound']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000039.xml b/relecov_tools/templates/ENA_template_samples_ERC000039.xml new file mode 100644 index 00000000..93c057e0 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000039.xml @@ -0,0 +1,194 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + dev_stage + ${row['dev_stage']} + + + + subject exposure + ${row['subject exposure']} + + + + + subject exposure duration + ${row['subject exposure duration']} + + + + + travel-relation + ${row['travel-relation']} + + + + + clinical setting + ${row['clinical setting']} + + + + + country of travel + ${row['country of travel']} + + + + + collection date + ${row['collection date']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + genotype + ${row['genotype']} + + + + + host disease outcome + ${row['host disease outcome']} + + + + + host common name + ${row['host common name']} + + + + + host subject id + ${row['host subject id']} + + + + + host age + ${row['host age']} + + + + + host health state + ${row['host health state']} + + + + + host sex + ${row['host sex']} + + + + + host scientific name + ${row['host scientific name']} + + + + collector name + ${row['collector name']} + + + collecting institution + ${row['collecting institution']} + + + + sample storage conditions + ${row['sample storage conditions']} + + + + isolate + ${row['isolate']} + + + + strain + ${row['strain']} + + + + + isolation source host-associated + ${row['isolation source host-associated']} + + + + + diagnostic method + ${row['diagnostic method']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + isolation source non-host-associated + ${row['isolation source non-host-associated']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000040.xml b/relecov_tools/templates/ENA_template_samples_ERC000040.xml new file mode 100644 index 00000000..42f63ef5 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000040.xml @@ -0,0 +1,140 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + Size Fraction Lower Threshold + ${row['Size Fraction Lower Threshold']} + + + + + Size Fraction Upper Threshold + ${row['Size Fraction Upper Threshold']} + + + + target gene + ${row['target gene']} + + + target subfragment + ${row['target subfragment']} + + + pcr primers + ${row['pcr primers']} + + + + isolation_source + ${row['isolation_source']} + + + + + collected_by + ${row['collected_by']} + + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + sample collection device or method + ${row['sample collection device or method']} + + + + environmental_sample + ${row['environmental_sample']} + + + + Salinity + ${row['Salinity']} + + + + + Further Details + ${row['Further Details']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000041.xml b/relecov_tools/templates/ENA_template_samples_ERC000041.xml new file mode 100644 index 00000000..b77db32f --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000041.xml @@ -0,0 +1,172 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + library construction method + ${row['library construction method']} + + + + + protocol + ${row['protocol']} + + + + + instrument for DNA concentration measurement + ${row['instrument for DNA concentration measurement']} + + + + + read quality filter + ${row['read quality filter']} + + + + + DNA concentration + ${row['DNA concentration']} + + + + + collection_date + ${row['collection_date']} + + + + + isolation_source + ${row['isolation_source']} + + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + sampling time point + ${row['sampling time point']} + + + + + sample transportation temperature + ${row['sample transportation temperature']} + + + + + sample transportation date + ${row['sample transportation date']} + + + + + sample transportation time + ${row['sample transportation time']} + + + + + receipt date + ${row['receipt date']} + + + + + links to additional analysis + ${row['links to additional analysis']} + + + + isolate + ${row['isolate']} + + + + sub_species + ${row['sub_species']} + + + + + Further Details + ${row['Further Details']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000043.xml b/relecov_tools/templates/ENA_template_samples_ERC000043.xml new file mode 100644 index 00000000..003e3c33 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000043.xml @@ -0,0 +1,154 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + Depth + ${row['Depth']} + + + + + collected_by + ${row['collected_by']} + + + + + collection date + ${row['collection date']} + + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + isolation and growth condition + ${row['isolation and growth condition']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + sample storage duration + ${row['sample storage duration']} + + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + growth condition + ${row['growth condition']} + + + + + Temperature + ${row['Temperature']} + + + + + Salinity + ${row['Salinity']} + + + + + sample storage conditions + ${row['sample storage conditions']} + + + + + light intensity + ${row['light intensity']} + + + + + pH + ${row['pH']} + + + + + culture_collection + ${row['culture_collection']} + + + + strain + ${row['strain']} + + + + Further Details + ${row['Further Details']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000044.xml b/relecov_tools/templates/ENA_template_samples_ERC000044.xml new file mode 100644 index 00000000..1b9dc04f --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000044.xml @@ -0,0 +1,128 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + + subject exposure + ${row['subject exposure']} + + + + + subject exposure duration + ${row['subject exposure duration']} + + + + + travel-relation + ${row['travel-relation']} + + + + + clinical setting + ${row['clinical setting']} + + + + + country of travel + ${row['country of travel']} + + + + collection_date + ${row['collection_date']} + + + collected_by + ${row['collected_by']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + host disease status + ${row['host disease status']} + + + + + host disease outcome + ${row['host disease outcome']} + + + + host scientific name + ${row['host scientific name']} + + + isolate + ${row['isolate']} + + + + sub_type + ${row['sub_type']} + + + + + serovar + ${row['serovar']} + + + + + serovar_in-silico + ${row['serovar_in-silico']} + + + + + isolation source host-associated + ${row['isolation source host-associated']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000045.xml b/relecov_tools/templates/ENA_template_samples_ERC000045.xml new file mode 100644 index 00000000..69bbc610 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000045.xml @@ -0,0 +1,78 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + collection_date + ${row['collection_date']} + + + isolation_source + ${row['isolation_source']} + + + + collected_by + ${row['collected_by']} + + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + receipt date + ${row['receipt date']} + + + + isolate + ${row['isolate']} + + + + serotype + ${row['serotype']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000047.xml b/relecov_tools/templates/ENA_template_samples_ERC000047.xml new file mode 100644 index 00000000..a974f1dc --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000047.xml @@ -0,0 +1,342 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + annotation source + ${row['annotation source']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + sequencing method + ${row['sequencing method']} + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + + number of standard tRNAs extracted + ${row['number of standard tRNAs extracted']} + + + + assembly software + ${row['assembly software']} + + + + feature prediction + ${row['feature prediction']} + + + + + reference database(s) + ${row['reference database(s)']} + + + + + similarity search method + ${row['similarity search method']} + + + + + 16S recovered + ${row['16S recovered']} + + + + + 16S recovery software + ${row['16S recovery software']} + + + + + tRNA extraction software + ${row['tRNA extraction software']} + + + + completeness score + ${row['completeness score']} + + + completeness software + ${row['completeness software']} + + + + completeness approach + ${row['completeness approach']} + + + + contamination score + ${row['contamination score']} + + + + contamination screening input + ${row['contamination screening input']} + + + + + contamination screening parameters + ${row['contamination screening parameters']} + + + + + decontamination software + ${row['decontamination software']} + + + + binning software + ${row['binning software']} + + + + reassembly post binning + ${row['reassembly post binning']} + + + + + MAG coverage software + ${row['MAG coverage software']} + + + + assembly quality + ${row['assembly quality']} + + + investigation type + ${row['investigation type']} + + + binning parameters + ${row['binning parameters']} + + + taxonomic identity marker + ${row['taxonomic identity marker']} + + + + taxonomic classification + ${row['taxonomic classification']} + + + + isolation_source + ${row['isolation_source']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + size fraction selected + ${row['size fraction selected']} + + + + sample derived from + ${row['sample derived from']} + + + metagenomic source + ${row['metagenomic source']} + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000048.xml b/relecov_tools/templates/ENA_template_samples_ERC000048.xml new file mode 100644 index 00000000..1e425abe --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000048.xml @@ -0,0 +1,346 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + annotation source + ${row['annotation source']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + sequencing method + ${row['sequencing method']} + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + + number of standard tRNAs extracted + ${row['number of standard tRNAs extracted']} + + + + assembly software + ${row['assembly software']} + + + + feature prediction + ${row['feature prediction']} + + + + + reference database(s) + ${row['reference database(s)']} + + + + + similarity search method + ${row['similarity search method']} + + + + + 16S recovered + ${row['16S recovered']} + + + + + 16S recovery software + ${row['16S recovery software']} + + + + + tRNA extraction software + ${row['tRNA extraction software']} + + + + completeness score + ${row['completeness score']} + + + completeness software + ${row['completeness software']} + + + + completeness approach + ${row['completeness approach']} + + + + contamination score + ${row['contamination score']} + + + + contamination screening input + ${row['contamination screening input']} + + + + + contamination screening parameters + ${row['contamination screening parameters']} + + + + + decontamination software + ${row['decontamination software']} + + + + assembly quality + ${row['assembly quality']} + + + investigation type + ${row['investigation type']} + + + taxonomic identity marker + ${row['taxonomic identity marker']} + + + + taxonomic classification + ${row['taxonomic classification']} + + + + sorting technology + ${row['sorting technology']} + + + single cell or viral particle lysis approach + ${row['single cell or viral particle lysis approach']} + + + + single cell or viral particle lysis kit protocol + ${row['single cell or viral particle lysis kit protocol']} + + + + WGA amplification approach + ${row['WGA amplification approach']} + + + + WGA amplification kit + ${row['WGA amplification kit']} + + + + collection_date + ${row['collection_date']} + + + isolation_source + ${row['isolation_source']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + size fraction selected + ${row['size fraction selected']} + + + + sample derived from + ${row['sample derived from']} + + + metagenomic source + ${row['metagenomic source']} + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000049.xml b/relecov_tools/templates/ENA_template_samples_ERC000049.xml new file mode 100644 index 00000000..a63ff04f --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000049.xml @@ -0,0 +1,420 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + estimated size + ${row['estimated size']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + annotation source + ${row['annotation source']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + sequencing method + ${row['sequencing method']} + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + + number of standard tRNAs extracted + ${row['number of standard tRNAs extracted']} + + + + assembly software + ${row['assembly software']} + + + + feature prediction + ${row['feature prediction']} + + + + + reference database(s) + ${row['reference database(s)']} + + + + + similarity search method + ${row['similarity search method']} + + + + + tRNA extraction software + ${row['tRNA extraction software']} + + + + + completeness score + ${row['completeness score']} + + + + + completeness software + ${row['completeness software']} + + + + + completeness approach + ${row['completeness approach']} + + + + + binning software + ${row['binning software']} + + + + + reassembly post binning + ${row['reassembly post binning']} + + + + + MAG coverage software + ${row['MAG coverage software']} + + + + assembly quality + ${row['assembly quality']} + + + investigation type + ${row['investigation type']} + + + + binning parameters + ${row['binning parameters']} + + + + + taxonomic identity marker + ${row['taxonomic identity marker']} + + + + + taxonomic classification + ${row['taxonomic classification']} + + + + + sorting technology + ${row['sorting technology']} + + + + + single cell or viral particle lysis approach + ${row['single cell or viral particle lysis approach']} + + + + + single cell or viral particle lysis kit protocol + ${row['single cell or viral particle lysis kit protocol']} + + + + + WGA amplification approach + ${row['WGA amplification approach']} + + + + + WGA amplification kit + ${row['WGA amplification kit']} + + + + source of UViGs + ${row['source of UViGs']} + + + virus enrichment approach + ${row['virus enrichment approach']} + + + predicted genome type + ${row['predicted genome type']} + + + predicted genome structure + ${row['predicted genome structure']} + + + detection type + ${row['detection type']} + + + viral identification software + ${row['viral identification software']} + + + + vOTU classification approach + ${row['vOTU classification approach']} + + + + + vOTU sequence comparison approach + ${row['vOTU sequence comparison approach']} + + + + + vOTU database + ${row['vOTU database']} + + + + + host prediction approach + ${row['host prediction approach']} + + + + + host prediction estimated accuracy + ${row['host prediction estimated accuracy']} + + + + isolation_source + ${row['isolation_source']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + size fraction selected + ${row['size fraction selected']} + + + + sample derived from + ${row['sample derived from']} + + + metagenomic source + ${row['metagenomic source']} + + + + specific host + ${row['specific host']} + + + + + known pathogenicity + ${row['known pathogenicity']} + + + + + observed biotic relationship + ${row['observed biotic relationship']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000050.xml b/relecov_tools/templates/ENA_template_samples_ERC000050.xml new file mode 100644 index 00000000..6bf6a1d7 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000050.xml @@ -0,0 +1,322 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + + experimental factor + ${row['experimental factor']} + + + + + reference for biomaterial + ${row['reference for biomaterial']} + + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + nucleic acid amplification + ${row['nucleic acid amplification']} + + + + + library size + ${row['library size']} + + + + + library reads sequenced + ${row['library reads sequenced']} + + + + + library vector + ${row['library vector']} + + + + + library screening strategy + ${row['library screening strategy']} + + + + + multiplex identifiers + ${row['multiplex identifiers']} + + + + + adapters + ${row['adapters']} + + + + sequencing method + ${row['sequencing method']} + + + + relevant electronic resources + ${row['relevant electronic resources']} + + + + + relevant standard operating procedures + ${row['relevant standard operating procedures']} + + + + + number of standard tRNAs extracted + ${row['number of standard tRNAs extracted']} + + + + assembly software + ${row['assembly software']} + + + + 16S recovered + ${row['16S recovered']} + + + + + 16S recovery software + ${row['16S recovery software']} + + + + + tRNA extraction software + ${row['tRNA extraction software']} + + + + + completeness score + ${row['completeness score']} + + + + + completeness software + ${row['completeness software']} + + + + + completeness approach + ${row['completeness approach']} + + + + + contamination score + ${row['contamination score']} + + + + + contamination screening input + ${row['contamination screening input']} + + + + + contamination screening parameters + ${row['contamination screening parameters']} + + + + + decontamination software + ${row['decontamination software']} + + + + binning software + ${row['binning software']} + + + + reassembly post binning + ${row['reassembly post binning']} + + + + + MAG coverage software + ${row['MAG coverage software']} + + + + + assembly quality + ${row['assembly quality']} + + + + investigation type + ${row['investigation type']} + + + binning parameters + ${row['binning parameters']} + + + + taxonomic identity marker + ${row['taxonomic identity marker']} + + + + + taxonomic classification + ${row['taxonomic classification']} + + + + isolation_source + ${row['isolation_source']} + + + collection date + ${row['collection date']} + + + + geographic location (altitude) + ${row['geographic location (altitude)']} + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + environment (biome) + ${row['environment (biome)']} + + + environment (feature) + ${row['environment (feature)']} + + + environment (material) + ${row['environment (material)']} + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + + source material identifiers + ${row['source material identifiers']} + + + + + sample collection device or method + ${row['sample collection device or method']} + + + + + sample material processing + ${row['sample material processing']} + + + + + amount or size of sample collected + ${row['amount or size of sample collected']} + + + + + size fraction selected + ${row['size fraction selected']} + + + + sample derived from + ${row['sample derived from']} + + + metagenomic source + ${row['metagenomic source']} + + + + relationship to oxygen + ${row['relationship to oxygen']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000051.xml b/relecov_tools/templates/ENA_template_samples_ERC000051.xml new file mode 100644 index 00000000..98f32477 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000051.xml @@ -0,0 +1,98 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + sample origin + ${row['sample origin']} + + + sample taxon name + ${row['sample taxon name']} + + + sample material + ${row['sample material']} + + + engrafted tumor sample passage + ${row['engrafted tumor sample passage']} + + + + engrafted tumor collection site + ${row['engrafted tumor collection site']} + + + + patient tumor site of collection + ${row['patient tumor site of collection']} + + + patient tumor type + ${row['patient tumor type']} + + + sample unique ID + ${row['sample unique ID']} + + + + engraftment host strain name + ${row['engraftment host strain name']} + + + + patient age at collection of tumor + ${row['patient age at collection of tumor']} + + + patient tumor diagnosis at time of collection + ${row['patient tumor diagnosis at time of collection']} + + + patient tumor primary site + ${row['patient tumor primary site']} + + + + was the PDX model humanised? + ${row['was the PDX model humanised?']} + + + + patient sex + ${row['patient sex']} + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000052.xml b/relecov_tools/templates/ENA_template_samples_ERC000052.xml new file mode 100644 index 00000000..366e9c59 --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000052.xml @@ -0,0 +1,226 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + project name + ${row['project name']} + + + sample volume or weight for DNA extraction + ${row['sample volume or weight for DNA extraction']} + + + + nucleic acid extraction + ${row['nucleic acid extraction']} + + + + + pcr primers + ${row['pcr primers']} + + + + + adapters + ${row['adapters']} + + + + sequencing method + ${row['sequencing method']} + + + reference host genome for decontamination + ${row['reference host genome for decontamination']} + + + + collection date + ${row['collection date']} + + + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + + trial length + ${row['trial length']} + + + + trial timepoint + ${row['trial timepoint']} + + + + sample storage temperature + ${row['sample storage temperature']} + + + + + sample storage location + ${row['sample storage location']} + + + + + sample storage buffer + ${row['sample storage buffer']} + + + + + sample storage container + ${row['sample storage container']} + + + + + host disease status + ${row['host disease status']} + + + + host common name + ${row['host common name']} + + + host subject id + ${row['host subject id']} + + + host taxid + ${row['host taxid']} + + + host body site + ${row['host body site']} + + + + host length + ${row['host length']} + + + + + host total mass + ${row['host total mass']} + + + + + host sex + ${row['host sex']} + + + + + host scientific name + ${row['host scientific name']} + + + + + host breed + ${row['host breed']} + + + + + host gutted mass + ${row['host gutted mass']} + + + + + host diet + ${row['host diet']} + + + + host diet treatment + ${row['host diet treatment']} + + + + host diet treatment concentration + ${row['host diet treatment concentration']} + + + + + host storage container + ${row['host storage container']} + + + + + host storage container pH + ${row['host storage container pH']} + + + + + host storage container temperature + ${row['host storage container temperature']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_samples_ERC000053.xml b/relecov_tools/templates/ENA_template_samples_ERC000053.xml new file mode 100644 index 00000000..34ef511f --- /dev/null +++ b/relecov_tools/templates/ENA_template_samples_ERC000053.xml @@ -0,0 +1,204 @@ + + + + + + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} + + ${row.common_name} + + + ${row.sample_description} + + + organism part + ${row['organism part']} + + + lifestage + ${row['lifestage']} + + + project name + ${row['project name']} + + + + tolid + ${row['tolid']} + + + + + barcoding center + ${row['barcoding center']} + + + + collected_by + ${row['collected_by']} + + + collection date + ${row['collection date']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + + geographic location (latitude) + ${row['geographic location (latitude)']} + + + geographic location (longitude) + ${row['geographic location (longitude)']} + + + geographic location (region and locality) + ${row['geographic location (region and locality)']} + + + + identified_by + ${row['identified_by']} + + + + + geographic location (depth) + ${row['geographic location (depth)']} + + + + + geographic location (elevation) + ${row['geographic location (elevation)']} + + + + habitat + ${row['habitat']} + + + + identifier_affiliation + ${row['identifier_affiliation']} + + + + + original collection date + ${row['original collection date']} + + + + + original geographic location + ${row['original geographic location']} + + + + + sample derived from + ${row['sample derived from']} + + + + + sample same as + ${row['sample same as']} + + + + + sample symbiont of + ${row['sample symbiont of']} + + + + + sample coordinator + ${row['sample coordinator']} + + + + + sample coordinator affiliation + ${row['sample coordinator affiliation']} + + + + sex + ${row['sex']} + + + + relationship + ${row['relationship']} + + + + + symbiont + ${row['symbiont']} + + + + collecting institution + ${row['collecting institution']} + + + + GAL + ${row['GAL']} + + + + + specimen_voucher + ${row['specimen_voucher']} + + + + + specimen_id + ${row['specimen_id']} + + + + + GAL_sample_id + ${row['GAL_sample_id']} + + + + + culture_or_strain_id + ${row['culture_or_strain_id']} + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + \ No newline at end of file diff --git a/relecov_tools/templates/ENA_template_studies.xml b/relecov_tools/templates/ENA_template_studies.xml new file mode 100755 index 00000000..072b450a --- /dev/null +++ b/relecov_tools/templates/ENA_template_studies.xml @@ -0,0 +1,49 @@ + + + + + + + ${row.title} + + ${row.study_abstract} + + ${row.center_project_name} + + + ${row.study_description} + + + + + + + PUBMED + ${row.pubmed_id} + + + + + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + + + diff --git a/relecov_tools/templates/ENA_template_submission.xml b/relecov_tools/templates/ENA_template_submission.xml new file mode 100644 index 00000000..86f1c567 --- /dev/null +++ b/relecov_tools/templates/ENA_template_submission.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/relecov_tools/templates/Relecov_metadata_template_v2.0.9.xlsx b/relecov_tools/templates/Relecov_metadata_template_v2.0.9.xlsx new file mode 100644 index 00000000..c7472e2d Binary files /dev/null and b/relecov_tools/templates/Relecov_metadata_template_v2.0.9.xlsx differ diff --git a/relecov_tools/templates/SRA.common.xsd b/relecov_tools/templates/SRA.common.xsd new file mode 100644 index 00000000..754f5fa5 --- /dev/null +++ b/relecov_tools/templates/SRA.common.xsd @@ -0,0 +1,1044 @@ + + + + + + + + + + + + + + Submitter designated name for the object. The name must be unique within the submission account. + + + + + + + The center name of the submitter. + + + + + + + The center name of the broker. + + + + + + + The object accession assigned by the archive. + + + + + + + + + + + + + Identifies an object by name within the namespace defined by attribute "refcenter". + + + + + + + The namespace of the attribute "refname". + + + + + + + Identifies a record by its accession. The scope of resolution is the entire Archive. + + + + + + + + + + Submitter designated name of the SRA document of this type. At minimum alias should + be unique throughout the submission of this document type. If center_name is specified, the name should + be unique in all submissions from that center of this document type. + + + + + + + Owner authority of this document and namespace for submitter's name of this document. + If not provided, then the submitter is regarded as "Individual" and document resolution + can only happen within the submission. + + + + + + + Broker authority of this document. If not provided, then the broker is considered "direct". + + + + + + + The document's accession as assigned by the Home Archive. + + + + + + + + + + Identifies a record by name that is known within the namespace defined by attribute "refcenter" + Use this field when referencing an object for which an accession has not yet been issued. + + + + + + + The center namespace of the attribute "refname". When absent, the namespace is assumed to be the current submission. + + + + + + + Identifies a record by its accession. The scope of resolution is the entire Archive. + + + + + + + + + + + Alternative/explanatory description of the same object/identifier. + + + + + + + + + + + + A string value that constrains the domain of named + identifiers (namespace). + + + + + + + + + Set of record identifiers. + + + + + A primary identifier in the INSDC namespace. + + + + + A secondary identifier in the INSDC namespace. + + + + + An identifer rom a public non-INSDC resource. + + + + + A submitter provided identifier. + + + + + A universally unique identifier that requires no namespace. + + + + + + + + + INSDC controlled vocabulary of permitted cross references. + Please see http://www.insdc.org/db_xref.html . For example, FLYBASE. + + + + + + Accession in the referenced database. For example, FBtr0080008 (in FLYBASE). + + + + + + + Text label to display for the link. + + + + + + + + + + + + Text label to display for the link. + + + + + + + The internet service link (file:, http:, ftp:, etc). + + + + + + + + + Reusable attributes to encode tag-value pairs with optional units. + + + + + + + Name of the attribute. + + + + + + + Value of the attribute. + + + + + + + Optional scientific units. + + + + + + + + + + Reusable external links type to encode URL links, Entrez links, and db_xref links. + + + + + + + + + + Text label to display for the link. + + + + + + The internet service link (file:, http:, ftp: etc). + + + + + + + + + + + + + + NCBI controlled vocabulary of permitted cross references. Please see http://www.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi? . + + + + + + + + Numeric record id meaningful to the NCBI Entrez system. + + + + + + + Accession string meaningful to the NCBI Entrez system. + + + + + + + + How to label the link. + + + + + + + + + + + + + + + The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the + monolithic spot sequence. The spot descriptor contains aspects of the experimental design, + platform, and processing information. There will be two methods of specification: one + will be an index into a table of typical decodings, the other being an exact specification. + + + + + + + + + Number of base/color calls, cycles, or flows per + spot (raw sequence length or flow length including all + application and technical tags and mate pairs, but not including + gap lengths). This value will be platform dependent, library + dependent, and possibly run dependent. Variable length platforms + will still have a constant flow/cycle length. + + + + + + + + READ_INDEX starts at 0 and is incrementally increased for each sequential READ_SPEC within a SPOT_DECODE_SPEC + + + + + READ_LABEL is a name for this tag, and can be used to on output to determine read name, for example F or R. + + + + + + + + + + + + + + + + + + + + + + + + + + + + There are various methods to ordering the reads on the spot. + + + + + + The read is located beginning at the offset or cycle relative to another read. + This choice is appropriate for example when specifying a read + that follows a variable length expected sequence(s). + + + + + + + Specify the read index that precedes this read. + + + + + + + Specify the read index that follows this read. + + + + + + + + + The location of the read start in terms of base count (1 is beginning of spot). + + + + + + + A set of choices of expected basecalls for a current read. Read will be zero-length if none is found. + + + + + + + + Element's body contains a basecall, attribute provide description of this read meaning as well as matching rules. + + + + + + + + + When match occurs, the read will be tagged with this group membership + + + + + + + Minimum number of matches to trigger identification. + + + + + + + Maximum number of mismatches + + + + + + + Where the match should occur. Changes the rules on how min_match and max_mismatch are counted. + + + + + + + + Only @max_mismatch influences matching process + + + + + + + Both matches and mismatches are counted. + When @max_mismatch is exceeded - it is not a match. + When @min_match is reached - match is declared. + + + + + + + Both matches and mismatches are counted. + When @max_mismatch is exceeded - it is not a match. + When @min_match is reached - match is declared. + + + + + + + + + + + + + + + Specify whether the spot should have a default length for this tag if the expected base cannot be matched. + + + + + + + Specify an optional starting point for tag (base offset from 1). + + + + + + + + + + + + + + + + + + + The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be + determined by the Center. + + + + + 454 technology use 1-color sequential flows + + + + + + + + + + Illumina is 4-channel flowgram with 1-to-1 mapping between basecalls and flows + + + + + + + + + + Helicos is similar to 454 technology - uses 1-color sequential flows + + + + + + + + + + ABI is 4-channel flowgram with 1-to-1 mapping between basecalls and flows + + + + + + + + + + CompleteGenomics platform type. At present there is no instrument model. + + + + + + + + + + + + + + + + + + + + Oxford Nanopore platform type. nanopore-based electronic single molecule analysis + + + + + + + + + + PacificBiosciences platform type for the single molecule real time (SMRT) technology. + + + + + + + + + + Ion Torrent Personal Genome Machine (PGM) from Life Technologies. + + + + + + + + + + Sequencers based on capillary electrophoresis technology manufactured by LifeTech (formerly Applied + BioSciences). + + + + + + + + + + Sequencers based on DNBSEQ by MGI Tech. + + + + + + + + + + + + + + + + + Tells the Archive who will execute the sample demultiplexing operation.. + + + + + + + + There shall be no sample de-multiplexing at the level of assiging individual reads to sample pool members. + + + + + + + The submitter has assigned individual reads to sample pool members by providing individual files + containing reads with the same member assignment. + + + + + + + + + + + + + + The PipelineType identifies the sequence or tree of actions to + process the sequencing data. + + + + + + + + + Lexically ordered value that allows for the pipe section to be hierarchically ordered. The float primitive data type is + used to allow for pipe sections to be inserted later on. + + + + + + + STEP_INDEX of the previous step in the workflow. Set toNIL if the first pipe section. + + + + + + + Name of the program or process for primary analysis. This may include a test or condition + that leads to branching in the workflow. + + + + + + + Version of the program or process for primary analysis. + + + + + + + Notes about the program or process for primary analysis. + + + + + + + + Name of the processing pipeline section. + + + + + + + + + + Reference assembly details. + + + + + + A standard genome assembly. + + + + + + A recognized name for the genome assembly. + + + + + Identifies the genome assembly + using an accession number and a sequence version. + + + + + + + + Other genome assembly. + + + + + + Description of the genome + assembly. + + + + + A link to the genome + assembly. + + + + + + Text label to display for the + link. + + + + + The internet service link + (file:, http:, ftp:, etc). + + + + + + + + + + + + + Reference assembly and sequence details. + + + + + Reference assembly details. + + + + + Reference sequence details. + + + + + A recognized name for the + reference sequence. + + + + + + Accession.version with version being mandatory + + + + + + + + + This is how Reference Sequence is labeled in submission file(s). + It is equivalent to SQ label in BAM. + Optional when submitted file uses INSDC accession.version + + + + + + + + + + + + + + Generic processing pipeline specification. + + + + + Processing directives tell the Sequence Read Archive how to + treat the input data, if any treatment is requested. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Undifferentiated early AB SOLiD system + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/relecov_tools/templates/SRA.experiment.xsd b/relecov_tools/templates/SRA.experiment.xsd new file mode 100644 index 00000000..e9ee8f5f --- /dev/null +++ b/relecov_tools/templates/SRA.experiment.xsd @@ -0,0 +1,791 @@ + + + + + + + + + + + Sequencing technique intended for this library. + + + + + Whole Genome Sequencing - random sequencing of the whole genome (see pubmed 10731132 for details) + + + + + + Whole Genome Amplification followed by random sequencing. (see pubmed 1631067,8962113 for details) + + + + + + Random sequencing of exonic regions selected from the genome. (see pubmed 20111037 for details) + + + + + + Random sequencing of whole transcriptome, also known as Whole Transcriptome Shotgun Sequencing, or WTSS). (see + pubmed 18611170 for details) + + + + + Strand-specific RNA sequencing. + + + + + + Micro RNA sequencing strategy designed to capture post-transcriptional RNA elements and include non-coding + functional elements. (see pubmed 21787409 for details) + + + + + Capture of other non-coding RNA types, including post-translation modification types such as snRNA (small + nuclear RNA) or snoRNA (small nucleolar RNA), or expression regulation types such as siRNA (small interfering RNA) or + piRNA/piwi/RNA (piwi-interacting RNA). + + + + + Full-length sequencing of cDNA templates + + + + + Single pass sequencing of cDNA templates + + + + + Chromosome Conformation Capture technique where a biotin-labeled nucleotide is incorporated at the ligation junction, enabling selective purification of chimeric DNA ligation junctions followed by deep sequencing. + + + + + Assay for Transposase-Accessible Chromatin (ATAC) strategy is used to study genome-wide chromatin accessibility. alternative method to DNase-seq that uses an engineered Tn5 transposase to cleave DNA and to integrate primer DNA sequences into the cleaved genomic DNA. + + + + + Random sequencing of a whole chromosome or other replicon isolated from a genome. + + + + + + Genomic clone based (hierarchical) sequencing. + + + + + Shotgun of pooled clones (usually BACs and Fosmids). + + + + + Sequencing of overlapping or distinct PCR or RT-PCR products. For example, metagenomic community profiling + using SSU rRNA . + + + + + Clone end (5', 3', or both) sequencing. + + + + + Sequencing intended to finish (close) gaps in existing coverage. + + + + + ChIP-seq, Chromatin ImmunoPrecipitation, reveals binding sites of specific proteins, typically transcription factors (TFs) using antibodies to extract DNA fragments bound to the target protein. + + + + + Identifies well-positioned nucleosomes. uses Micrococcal Nuclease (MNase) is an endo-exonuclease that processively digests DNA until an obstruction, such as a nucleosome, is reached. + + + + + Sequencing of hypersensitive sites, or segments of open chromatin that are more readily cleaved by DNaseI. + + + + + + MethylC-seq. Sequencing following treatment of DNA with bisulfite to convert cytosine residues to uracil + depending on methylation status. + + + + + Concatenated Tag Sequencing + + + + + Methylation-Sensitive Restriction Enzyme Sequencing. + + + + + Methylated DNA Immunoprecipitation Sequencing. + + + + + Methyl CpG Binding Domain Sequencing. + + + + + Quantitatively determine fitness of bacterial genes based on how many times a purposely seeded transposon gets + inserted into each gene of a colony after some time. + + + + + CGHub special request: Independent experiment to re-evaluate putative variants. + + + + + Formaldehyde Assisted Isolation of Regulatory Elements. Reveals regions of open chromatin. + + + + + Systematic Evolution of Ligands by Exponential enrichment + + + + + Direct sequencing of RNA immunoprecipitates (includes CLIP-Seq, HITS-CLIP and PAR-CLIP). + + + + + Direct sequencing of proximity-ligated chromatin immunoprecipitates. + + + + + binning and barcoding of large DNA fragments to facilitate assembly of the fragment + + + + + Enrichment of a targeted subset of loci. + + + + + + Nucleosome Occupancy and Methylome sequencing. + + + + + ChIPmentation combines chromatin immunoprecipitation with sequencing library preparation by Tn5 transposase (see pubmed 26280331 for details) + + + + + Genotyping by sequencing is a method to discover single nucleotide polymorphisms for genotyping studies. + + + + + Library strategy not listed. + + + + + + + + The LIBRARY_SOURCE specifies the type of source material that is being sequenced. + + + + + Genomic DNA (includes PCR products from genomic DNA). + + + + + + Transcription products or non genomic DNA (EST, cDNA, RT-PCR, screened libraries). + + + + + + Mixed material from metagenome. + + + + + Transcription products from community targets + + + + + Synthetic DNA. + + + + + Viral RNA. + + + + + Other, unspecified, or unknown library source material. + + + + + + + + Method used to enrich the target in the sequence library preparation + + + + + No Selection or Random selection + + + + + target enrichment via PCR + + + + + Source material was selected by randomly generated primers. + + + + + target enrichment via + + + + + Hypo-methylated partial restriction digest + + + + + Methyl Filtrated + + + + + Selection for less repetitive (and more gene rich) sequence through Cot filtration (CF) or other fractionation + techniques based on DNA kinetics. + + + + + Physical selection of size appropriate targets. + + + + + Methylation Spanning Linking Library + + + + + PolyA selection or enrichment for messenger RNA (mRNA); synonymize with PolyA + + + + + + + PolyA selection or enrichment for messenger RNA (mRNA); should replace cDNA enumeration. + + + + + enrichment of messenger RNA (mRNA) by hybridization to Oligo-dT. + + + + + depletion of ribosomal RNA by oligo hybridization. + + + + + depletion of ribosomal RNA by inverse oligo hybridization. + + + + + Chromatin immunoprecipitation + + + + + Chromatin immunoPrecipitation, reveals binding sites of specific proteins, typically transcription factors (TFs) using antibodies to extract DNA fragments bound to the target protein. + + + + + Identifies well-positioned nucleosomes. uses Micrococcal Nuclease (MNase) is an endo-exonuclease that processively digests DNA until an obstruction, such as a nucleosome, is reached. + + + + + DNase I endonuclease digestion and size selection reveals regions of chromatin where the DNA is highly sensitive to DNase I. + + + + + Selection by hybridization in array or solution. + + + + + Reproducible genomic subsets, often generated by restriction fragment size selection, containing a manageable + number of loci to facilitate re-sampling. + + + + + DNA fractionation using restriction enzymes. + + + + + Selection of methylated DNA fragments using an antibody raised against 5-methylcytosine or 5-methylcytidine + (m5C). + + + + + Enrichment by methyl-CpG binding domain. + + + + + Cap-analysis gene expression. + + + + + Rapid Amplification of cDNA Ends. + + + + + Multiple Displacement Amplification, a non-PCR based DNA amplification technique that amplifies a minute + quantifies of DNA to levels suitable for genomic analysis. + + + + + Targeted sequence capture protocol covering an arbitrary set of nonrepetitive genomics targets. An example is + capture bisulfite sequencing using padlock probes (BSPP). + + + + + Other library enrichment, screening, or selection process. + + + + + Library enrichment, screening, or selection is not specified. + + + + + + + + + + + + + + + + + + Assignment of read_group_tag to decoded read + + + + + + + + + + Label a sample within a scope of the pool + + + + + Proportion of this sample (in percent) that was included in sample pool. + + + + + + + + + + + + + Identifies a list of group/pool/multiplex sample members. This implies that + this sample record is a group, pool, or multiplex, but it continues to receive + its own accession and can be referenced by an experiment. By default if + no match to any of the listed members can be determined, then the default + sample reference is used. + + + + + + + Reference to the sample that is used when read membership cannot be determined. A default member should + be provided if there exists a possibility that some reads will be left over from barcode/MID resolution. A default member + is not needed when defining a true pool (where individual samples are not distinguished in the reads), or the reads have + been partitioned among the pool members (no leftovers). + + + + + Reference to the sample as determined from barcode/MID resolution or read partition. + + + + + + + + + + + + The LIBRARY_DESCRIPTOR specifies the origin of the material being + sequenced and any treatments that the material might have undergone that affect the + sequencing result. This specification is needed even if the platform does not + require a library construction step per se. + + + + + + The submitter's name for this library. + + + + + + + + + + LIBRARY_LAYOUT specifies whether to expect single, paired, or other configuration of reads. + In the case of paired reads, information about the relative distance and orientation is specified. + + + + + + + + + Reads are unpaired (usual case). + + + + + + + + + + + + + + + + + + Names the gene(s) or locus(loci) or other genomic feature(s) targeted by the sequence. + + + + + + + + + Reference to an archived primer or + probe set. Example: dbProbe + + + + + + + + + Bacterial small subunit ribosomal RNA, a locus used for + phylogenetic studies of bacteria and as a target for random target PCR in + environmental biodiversity screening. + + + + + Eukaryotic small subunit ribosomal RNA, a locus used for + phylogenetic studies of eukaryotes and as a target for random target PCR in + environmental biodiversity screening. + + + + + Structural ribosomal RNA for the large component, or large + subunit (LSU) of eukaryotic cytoplasmic ribosomes.. + + + + + RuBisCO large subunit : ribulose-1,5-bisphosphate + carboxylase/oxygenase large subunit, a locus used for phylogenetic studies + of plants. + + + + + Maturase K gene, a locus used for phylogenetic studies of + plants. + + + + + Mitochondrial cytochrome c oxidase 1 gene, a locus used for + phylogenetic studies of animals + + + + + Internal transcribed spacers 1 and 2 plus 5.8S rRNA region, + a locus used for phylogenetic studies of fungi. + + + + + All exonic regions of the genome. + + + + + Other locus, please describe. + + + + + + + + + Submitter supplied description of alternate locus and auxiliary + information. + + + + + + + + + + + + + + The optional pooling strategy indicates how the library or libraries are organized if multiple samples are involved. + + + + + + + + + + Free form text describing the protocol by which the sequencing library was constructed. + + + + + + + + + + Goal and setup of the individual library including library was constructed. + + + + + + Pick a sample to associate this experiment with. The sample may be an individual or a pool, + depending on how it is specified. + + + + + + The LIBRARY_DESCRIPTOR specifies the origin of the material being sequenced and any + treatments that the material might have undergone that affect the sequencing result. This specification is + needed even if the platform does not require a library construction step per se. + + + + + + The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the + monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and + processing information. There will be two methods of specification: one will be an index into a table of + typical decodings, the other being an exact specification. This construct is needed for loading data and for + interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input + files or from one input files). + + + + + + + + + + + An Experiment specifies of what will be sequenced and how the sequencing will be performed. + It does not contain results. + An Experiment is composed of a design, a platform selection, and processing parameters. + + + + + + + + + + Short text that can be used to call out experiment records in searches or in displays. + This element is technically optional but should be used for all new records. + + + + + + + Identifies the parent study. + + + + + + + + + + + The library design including library properties, layout, protocol, targeting information, and spot and gap + descriptors. + + + + + + The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. + This will be determined by the Center. + + + + + + + + + + Links to resources related to this experiment or experiment set (publication, datasets, online databases). + + + + + + + + + + + + + Properties and attributes of the experiment. These can be entered as free-form + tag-value pairs. + + + + + + + + + + + + + + + + + + + + + + + An EXPERMENT_SET is a container for a set of experiments and a common namespace. + + + + + + + diff --git a/relecov_tools/templates/SRA.run.xsd b/relecov_tools/templates/SRA.run.xsd new file mode 100644 index 00000000..bc65c328 --- /dev/null +++ b/relecov_tools/templates/SRA.run.xsd @@ -0,0 +1,548 @@ + + + + + + + + + + + A run contains a group of reads generated for a particular experiment. + + + + + + + + + Short text that can be used to define submissions in searches or in displays. + + + + + + Identifies the parent experiment. + + + + + + + + + + + + + + + + + The type of the run. + + + + + + + + + + + + + + + + Data files associated with the run. + + + + + + + + + + The READ_LABEL can associate a certain file to a certain read_label defined in the SPOT_DESCRIPTOR. + + + + + + + + + + + + + + + + + + + + The name or relative pathname of a run data file. + + + + + The run data file model. + + + + + + Sequence Read Archives native format in serialized (single file) form. + + + + + Standard Short Read Format file (.srf), all platforms + + + + + 454 Standard Flowgram Format file (.sff) + + + + + + Combined nucleotide/qualities sequence file in .fastq form. + Please see SRA File Formats Guide for definitions of the definition and restrictions on this form. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Tab delimited text file used to deliver certain auxiliary data along with sequencing submissions (only needed for certain + use cases). The first line is devoted to column headers. Each column is dedicated to an INDSC + data series type. + Please see SRA File Formats Guide for definitions of the definition and restrictions on this form. + + + + + + + A combination of 454 primary analysis output files, including + seq + qual + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + 454 base calls (for example .seq or .fna). + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + 454 quality scores (for example .qual). + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + A kind of fastq format specific to the Helicos platform. + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + A combination of SOLiD primary analysis output files, including: + csfasta + _QV.qual + _intensity.ScaledCY3.fasta + _intensity.ScaledCY5.fasta + _intensity.ScaledFTC.fasta + _intensity.ScaledTXR.fasta + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Colorspace calls (for example .csfasta) + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Colorspace quality scores (for example .qual) + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Pacific Biosciences Hierarchical Data Format. Please see + SRA File Formats Guide for definitions of these file formats. + + + + + + + Binary SAM format that combines alignment and sequencing data. + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Binary CRAM format that combines alignment and sequencing data. + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Please see SRA File Formats Guide for definitions of these file formats, + and the SRA Submission Guidelines document for data series that are appropriate for your study. + Sequence and qualities are minimally required. + + + + + + + Oxford Nanopore data format. + + + + + + + + + + How the input data are scored for quality. + + + + + + + + The quality score is expressed as a probability of error in log form: + -10 log(1/p) where p is the probability of error, with value range 0..63, + 0 meaning no base call. + + + + + + + The quality score is expressed as the ratio of error to non-error in log form: + -10 log(p/(1-p)) where p is the probability of error, with value range -40..40. + The SRA will convert these into phred scale during loadtime. + + + + + + + + + + Character used in representing the minimum quality value. + Helps specify how to decode text rendering of quality data. + + + + + + + + ASCII character based encoding. + + + + + + + Single decimal value per quality score. + + + + + + + Single hexadecimal value per quality score. + + + + + + + + + + Character used in representing the minimum quality value. Helps specify how to decode text rendering of quality data. + + + + + + + + ASCII value 33. Typically used for range 0..63. + + + + + + + ASCII value 64. Typically used for range 0..60. + + + + + + + + + + + Checksum method used. + + + + + + + + Checksum generated by the MD5 method (md5sum in unix). + + + + + + + Checksum generated by the SHA-256 method . + + + + + + + + + + Checksum of uncompressed file. + + + + + + + Checksum of unenrypted file(used in conjunction with checksum of encrypted file). + + + + + + + + + + + + + Allow for an individual DATA_BLOCK to be associated with a member of a sample pool. + + + + + + + + + + Links to resources related to this RUN or RUN set (publication, datasets, online databases). + + + + + + + + + + + + + Properties and attributes of a RUN. These can be entered as free-form + tag-value pairs. For certain studies, submitters may be asked to follow a + community established ontology when describing the work. + + + + + + + + + + + + + ISO date when the run took place. + + + + + + + If applicable, the name of the contract sequencing center that executed the run. + Example: 454MSC. + + + + + + + + + + + + + + + + + RUN_SET serves as a container for a set of runs and a name space + for establishing referential integrity between them. + + + + + + + diff --git a/relecov_tools/templates/SRA.sample.xsd b/relecov_tools/templates/SRA.sample.xsd new file mode 100644 index 00000000..37d94e14 --- /dev/null +++ b/relecov_tools/templates/SRA.sample.xsd @@ -0,0 +1,127 @@ + + + + + + + + + + + A Sample defines an isolate of sequenceable material upon which + sequencing experiments can be based. The Sample object may be a surrogate for taxonomy + accession or an anonymized individual identifier. Or, it may fully specify + provenance and isolation method of the starting material. + + + + + + + + + Short text that can be used to call out sample records in search results or in displays. + + + + + + + + + + NCBI Taxonomy Identifier. This is appropriate for individual organisms and + some environmental samples. + + + + + + + Scientific name of sample that distinguishes its taxonomy. Please use a + name or synonym that is tracked in the INSDC Taxonomy database. + Also, this field can be used to confirm the TAXON_ID setting. + + + + + + + GenBank common name of the organism. Examples: human, mouse. + + + + + + + + + + + Free-form text describing the sample, its origin, and its method of isolation. + + + + + + + + + Links to resources related to this sample or sample set (publication, datasets, online databases). + + + + + + + + + + + + + Properties and attributes of a sample. These can be entered as free-form + tag-value pairs. For certain studies, submitters may be asked to follow a + community established ontology when describing the work. + + + + + + + + + + + + + + + + + + + + + + + + SAMPLE_SET serves as a container for a set of samples and a name space + for establishing referential integrity between them. + + + + + + + + diff --git a/relecov_tools/templates/SRA.study.xsd b/relecov_tools/templates/SRA.study.xsd new file mode 100644 index 00000000..15fdce88 --- /dev/null +++ b/relecov_tools/templates/SRA.study.xsd @@ -0,0 +1,290 @@ + + + + + + + + + + + A Study is a container for a sequencing investigation that may comprise multiple experiments. + The Study has an overall goal, but is otherwise minimally defined in the SRA. + A Study is composed of a descriptor, zero or more experiments, and zero or more analyses. + The submitter may decorate the Study with web links and properties. + + + + + + + + + + + + Title of the study as would be used in a publication. + + + + + + The STUDY_TYPE presents a controlled vocabulary for expressing the overall purpose of the study. + + + + + + + + + Sequencing of a single organism. + + + + + + + Sequencing of a community. + + + + + + + Sequencing and characterization of transcription elements. + + + + + + + Sequencing of a sample with respect to a reference. + + + + + + + Cellular differentiation study. + + + + + + + Sequencing of modified, synthetic, or transplanted genomes. + + + + + + + Sequencing of recovered genomic material. + + + + + + + Study of gene expression regulation. + + + + + + + Study of cancer genomics. + + + + + + + Study of populations and evolution through genomics. + + + + + + + RNA sequencing study. + + + + + + + The study investigates the exons of the genome. + + + + + + + The study is sequencing clone pools (BACs, fosmids, other constructs). + + + + + + Sequencing of transcription elements. + + + + + + Study type not listed. + + + + + + + + + + + To propose a new term, select Other and enter a new study type. + + + + + + + + + Briefly describes the goals, purpose, and scope of the Study. This need not be listed if it can be + inherited from a referenced publication. + + + + + + + DEPRECATED. Use STUDY@center_name instead. + Controlled vocabulary identifying the sequencing center, core facility, consortium, or laboratory responsible for the study. + + + + + + + Submitter defined project name. This field is intended for backward tracking of the study record to the submitter's LIMS. + + + + + + + DEPRECATED (use RELATED_STUDIES.STUDY instead). + The required PROJECT_ID accession is generated by the Genome Project database at NCBI + and will be valid also at the other archival institutions. + + + + + + + + + + + + + Related study or project record from a list of supported databases. + The study's information is derived from this project record rather + than stored as first class information. + + + + + + + Whether this study object is designated as the primary source + of the study or project information. + + + + + + + + + + + + + + + More extensive free-form description of the study. + + + + + + + + + + + Links to resources related to this study (publication, datasets, online databases). + + + + + + + + + + + + + Properties and attributes of the study. These can be entered as free-form + tag-value pairs. For certain studies, submitters may be asked to follow a + community established ontology when describing the work. + + + + + + + + + + + + + + + + + + + + + + + An STUDY_SET is a container for a set of studies and a common namespace. + + + + + + + + diff --git a/relecov_tools/templates/SRA.submission.xsd b/relecov_tools/templates/SRA.submission.xsd new file mode 100644 index 00000000..9497871c --- /dev/null +++ b/relecov_tools/templates/SRA.submission.xsd @@ -0,0 +1,354 @@ + + + + + + + + + + + A Submission type is used to describe an object that contains submission actions to be performed by the archive. + + + + + + + + + Short text that can be used to define submissions in searches or in displays. + + + + + + + + + + + + Name of contact person for this submission. + + + + + + + Internet address of person or service to inform on any status changes for this submission. + + + + + + + Internet address of person or service to inform on any errors for this submission. + + + + + + + + + + + + + + + Action to be executed by the archive. + + + + + + Add an object to the archive. + + + + + Filename or relative path to the XML file being submitted. + + + + + The type of the XML file being submitted. + + + + + + + + + + + + + + + + + + + + + + Modify an object in the archive. + + + + + Filename or relative path to the XML file being updated. + + + + + The type of the XML file being updated. + + + + + + + + + + + + + + + + + + + + + + Cancels a private object and its dependent objects. + + + + + Accession or refname of the object that is being cancelled. + + + + + + + Suppresses a public object and its dependent objects. + + + + + Accession or refname of the object that is being suppressed. + + + + + The date when a temporarily suppressed object will be made public. + + + + + + + Kills a public object and its dependent objects. + + + + + Accession or refname of the object that is being killed. + + + + + The date when a temporarily killed object will be made public. + + + + + + + Make the object public only when the hold date expires. + + + + + + Accession or refname of the object that is being made public + when the hold date expires. If not specified then + all objects in the submission will be assigned the hold date. + + + + + + The date when the submission will be made public. + + + + + + + The object will be released immediately to public. + + + + + + Accession or refname of the object that is made public. + If not specified then all objects in the submission will + made public. + + + + + + + + This action is required for data submitted to European Genome-Phenome Archive (EGA). + + + + + + This action will rollback the submission from the database + + + + + + Validates the submitted XMLs without actually submitting them. + + + + + Filename or relative path to the XML file being validated. + + + + + The type of the XML file being validated. + + + + + + + + + + + + + + + + + + + + + + Returns the receipt for a given submission alias. + + + + + Submission alias. + + + + + + + + + + + + + + + Archive created links to associated submissions. + + + + + + + + + + + + + Archive assigned properties and attributes of a SUBMISSION. + + + + + + + + + + + + + + Submitter assigned preparation date of this submission object. + + + + + + + Submitter assigned comment. + + + + + + + Laboratory name within submitting institution. + + + + + + + + + + + + + + + + + + An SUBMISSION_SET is a container for a set of studies and a common namespace. + + + + + + + + diff --git a/relecov_tools/test/my_test_file.xlsx b/relecov_tools/test/my_test_file.xlsx new file mode 100644 index 00000000..e93ac26e Binary files /dev/null and b/relecov_tools/test/my_test_file.xlsx differ diff --git a/relecov_tools/test/sftp_config.yaml b/relecov_tools/test/sftp_config.yaml new file mode 100644 index 00000000..07d84433 --- /dev/null +++ b/relecov_tools/test/sftp_config.yaml @@ -0,0 +1,9 @@ +sftp_server: "sftprelecov.isciii.es" +sftp_port: "22" +sftp_user : "usuario_test" +sftp_passwd : "U[9[Gpyu3." +storage_local_folder: "/tmp/relecov" +tmp_folder_for_metadata: "/tmp/relecov/tmp" +allowed_sample_extensions: + - .fastq.gz + - .fasta diff --git a/relecov_tools/test/test_cases.txt b/relecov_tools/test/test_cases.txt new file mode 100644 index 00000000..430ae49a --- /dev/null +++ b/relecov_tools/test/test_cases.txt @@ -0,0 +1,9 @@ +relecov-tools -l /tmp/logs.log download -u usuario_test -p U[9[Gpyu3. +relecov-tools -l /tmp/logs.log download -f relecov_tools/test/sftp_config.yaml +relecov-tools read-lab-metadata -m relecov_tools/example_data/METADATA_LAB_TEST.xlsx -s relecov_tools/example_data/samples_data_COD_test_01_20220422.json -o /tmp +relecov-tools validate -j relecov_tools/example_data/processed_METADATA_LAB_TEST.json -m relecov_tools/example_data/METADATA_LAB_TEST.xlsx -o /tmp +relecov-tools map -j relecov_tools/example_data/processed_METADATA_LAB_TEST.json -d ENA -o /tmp +relecov-tools map -j relecov_tools/example_data/processed_METADATA_LAB_TEST.json -d GISAID -o /tmp +relecov-tools update-db -j relecov_tools/example_data/processed_METADATA_LAB_TEST.json +relecov-tools read-bioinfo-metadata -m relecov_tools/example_data/lab_metadata_20220208.xlsx -i /relecov_tools/example_data -o relecov_tools/example_data +relecov-tools upload-to-ena -e relecov_tools/example_data/to_ena_2.json -o relecov_tools/test -a add -c ISCIII diff --git a/relecov_tools/upload_database.py b/relecov_tools/upload_database.py new file mode 100644 index 00000000..eb5d5c4f --- /dev/null +++ b/relecov_tools/upload_database.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python +import sys +import os +import re +import glob +import json +import logging +import rich.console +import time + +import relecov_tools.utils +from relecov_tools.config_json import ConfigJson +from relecov_tools.rest_api import RestApi +from relecov_tools.log_summary import LogSum + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class UpdateDatabase: + def __init__( + self, + user=None, + passwd=None, + json_file=None, + type_of_info=None, + platform=None, + server_url=None, + full_update=False, + ): + # Get the user and password for the database + if user is None: + user = relecov_tools.utils.prompt_text( + msg="Enter username for upload data to server" + ) + self.user = user + if passwd is None: + passwd = relecov_tools.utils.prompt_text(msg="Enter credential password") + self.passwd = passwd + # get the default coonfiguration used the instance + self.config_json = ConfigJson() + if json_file is None: + json_file = relecov_tools.utils.prompt_path( + msg="Select the json file which have the data to map" + ) + if not os.path.isfile(json_file): + log.error("json data file %s does not exist ", json_file) + stderr.print(f"[red] json data file {json_file} does not exist") + sys.exit(1) + self.json_data = relecov_tools.utils.read_json_file(json_file) + self.json_file = json_file + schema = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "schema", + self.config_json.get_topic_data("json_schemas", "relecov_schema"), + ) + self.schema = relecov_tools.utils.read_json_file(schema) + if full_update is True: + self.full_update = True + self.server_url = None + else: + self.full_update = False + if type_of_info is None: + type_of_info = relecov_tools.utils.prompt_selection( + "Select:", + ["sample", "bioinfodata", "variantdata"], + ) + self.type_of_info = type_of_info + # collect data for plarform to upload data + if platform is None: + platform = relecov_tools.utils.prompt_selection( + "Select:", + ["iskylims", "relecov"], + ) + self.platform = platform + if server_url is None: + self.server_url = server_url + # Get configuration settings for upload database + try: + self.platform_settings = self.config_json.get_topic_data( + "upload_database", "platform" + ) + except KeyError as e: + logtxt = f"Unable to fetch parameters for {platform} {e}" + stderr.print(f"[red]{logtxt}") + log.error(logtxt) + sys.exit(1) + # create the instance for logging the summary information + json_dir = os.path.dirname(os.path.realpath(self.json_file)) + lab_code = json_dir.split("/")[-2] + self.logsum = LogSum( + output_location=json_dir, unique_key=lab_code, path=json_dir + ) + + def get_schema_ontology_values(self): + """Read the schema and extract the values of ontology with the label""" + ontology_dict = {} + for prop, values in self.schema["properties"].items(): + if "ontology" in values and values["ontology"] != "": + ontology_dict[values["ontology"]] = prop + return ontology_dict + + def map_iskylims_sample_fields_values(self, sample_fields, s_project_fields): + """Map the values to the properties send to databasee + in json schema based on label + """ + sample_list = [] + s_fields = list(sample_fields.keys()) + for row in self.json_data: + s_dict = {} + for key, value in row.items(): + found_ontology = re.search(r"(.+) \[\w+:.*", value) + if found_ontology: + # remove the ontology data from item value + value = found_ontology.group(1) + if key in s_project_fields: + s_dict[key] = value + if key in s_fields: + s_dict[sample_fields[key]] = value + if key not in s_project_fields and key not in s_fields: + # just for debugging, write the fields that will not + # be included in iSkyLIMS request + log.debug("not key %s in iSkyLIMS", key) + # include the fixed value + fixed_value = self.config_json.get_topic_data( + "upload_database", "iskylims_fixed_values" + ) + for prop, val in fixed_value.items(): + s_dict[prop] = val + # Adding tha specimen_source field to set sample_type + try: + s_dict["sample_type"] = row["specimen_source"] + except KeyError as e: + logtxt = f"Unable to fetch specimen_source from json file {e}" + self.logsum.add_warning(entry=logtxt) + s_dict["sample_type"] = "Other" + sample_list.append(s_dict) + # if sample_entry_date is not set then, add the current date + if "sample_entry_date" not in row: + logtxt = "sample_entry_date is not in the sample fields" + self.logsum.add_warning(entry=logtxt) + stderr.print(f"[yellow]{logtxt}") + s_dict["sample_entry_date"] = time.strftime("%Y-%m-%d") + + return sample_list + + def get_iskylims_fields_sample(self): + """2 requests are sent to iSkyLIMS. One for getting the sample fields + These fields are mapped using the ontology. + The second request is for getting the sample project fields. These are + mapped using the label value. + """ + + sample_fields = {} + s_project_fields = [] + # get the ontology values for mapping values in sample fields + ontology_dict = self.get_schema_ontology_values() + sample_url = self.platform_settings["iskylims"]["url_sample_fields"] + sample_fields_raw = self.platform_rest_api.get_request(sample_url, "", "") + + if "ERROR" in sample_fields_raw: + logtxt1 = f"Unable to fetch data from {self.platform}." + logtxt2 = f" Received error {sample_fields_raw['ERROR']}" + self.logsum.add_error(entry=str(logtxt1 + logtxt2)) + stderr.print(f"[red]{logtxt1 + logtxt2}") + sys.exit(1) + + for _, values in sample_fields_raw["DATA"].items(): + if "ontology" in values: + try: + property = ontology_dict[values["ontology"]] + # sample_fields has a key, the label in metadata, and as value + # the field name for the sample + sample_fields[property] = values["field_name"] + except KeyError as e: + self.logsum.add_warning(entry=f"Error mapping ontology {e}") + # stderr.print(f"[red]Error mapping ontology {e}") + else: + # for the ones that do not have ontology label in the sample field + # and have an empty value: sample_fields[key] = "" + logtxt = f"No ontology found for {values.get('field_name')}" + self.logsum.add_warning(entry=(logtxt)) + # fetch label for sample Project + s_project_url = self.platform_settings["iskylims"]["url_project_fields"] + param = self.platform_settings["iskylims"]["param_sample_project"] + p_name = self.platform_settings["iskylims"]["project_name"] + s_project_fields_raw = self.platform_rest_api.get_request( + s_project_url, param, p_name + ) + if "ERROR" in s_project_fields_raw: + logtxt1 = f"Unable to fetch data from {self.platform}." + logtxt2 = f" Received error {s_project_fields_raw['ERROR']}" + self.logsum.add_error(entry=str(logtxt1 + logtxt2)) + return + else: + log.info("Fetched sample project fields from iSkyLIMS") + stderr.print("[blue] Fetched sample project fields from iSkyLIMS") + for field in s_project_fields_raw["DATA"]: + s_project_fields.append(field["sample_project_field_name"]) + return [sample_fields, s_project_fields] + + def map_relecov_sample_data(self): + """Select the values from self.json_data""" + field_values = [] + r_fields = self.config_json.get_topic_data( + "upload_database", "relecov_sample_metadata" + ) + + for row in self.json_data: + s_dict = {} + for r_field in r_fields: + if r_field in row: + s_dict[r_field] = row[r_field] + else: + s_dict[r_field] = None + field_values.append(s_dict) + return field_values + + def update_database(self, field_values, post_url): + """Send the request to update database""" + post_url = self.platform_settings[self.platform][post_url] + suces_count = 0 + request_count = 0 + for chunk in field_values: + req_sample = "" + request_count += 1 + if "sample_name" in chunk: + stderr.print( + f"[blue] sending request for sample {chunk['sample_name']}" + ) + req_sample = chunk["sample_name"] + elif "sequencing_sample_id" in chunk: + stderr.print( + f"[blue] sending request for sample {chunk['sequencing_sample_id']}" + ) + req_sample = chunk["sequencing_sample_id"] + self.logsum.feed_key(sample=req_sample) + result = self.platform_rest_api.post_request( + json.dumps(chunk), + {"user": self.user, "pass": self.passwd}, + post_url, + ) + if "ERROR" in result: + if result["ERROR"] == "Server not available": + # retry to connect to server + for i in range(10): + # wait 5 sec before resending the request + time.sleep(5) + result = self.platform_rest_api.post_request( + json.dumps(chunk), + {"user": self.user, "pass": self.passwd}, + self.platform_settings[post_url], + ) + if "ERROR" not in result: + break + if i == 9 and "ERROR" in result: + logtxt = f"Unable to sent the request to {self.platform}" + self.logsum.add_error(entry=logtxt, sample=req_sample) + stderr.print(f"[red]{logtxt}") + continue + + elif "is not defined" in result["ERROR_TEST"].lower(): + error_txt = result["ERROR_TEST"] + logtxt = f"Sample {req_sample}: {error_txt}" + self.logsum.add_error(entry=logtxt, sample=req_sample) + stderr.print(f"[yellow]Warning: {logtxt}") + continue + elif "already defined" in result["ERROR_TEST"].lower(): + logtxt = f"Request to {self.platform} already defined" + self.logsum.add_warning(entry=logtxt, sample=req_sample) + stderr.print(f"[yellow]{logtxt} for sample {req_sample}") + continue + else: + logtxt = f"Error {result['ERROR']} in request to {self.platform}" + self.logsum.add_error(entry=logtxt, sample=req_sample) + stderr.print(f"[red]{logtxt}") + continue + log.info( + "stored data in %s iskylims for sample %s", + self.platform, + req_sample, + ) + stderr.print(f"[green] Successful request for {req_sample}") + suces_count += 1 + if request_count == suces_count: + stderr.print( + f"All {self.type_of_info} data sent sucessfuly to {self.platform}" + ) + else: + logtxt = "%s of the %s requests were sent to %s" + self.logsum.add_warning( + entry=logtxt % (suces_count, request_count, self.platform), + sample=req_sample, + ) + stderr.print( + f"[yellow]logtxt % {suces_count} {request_count} {self.platform})" + ) + return + + def store_data(self, type_of_info, server_name): + """Collect data from json file and split them to store data in iSkyLIMS + and in Relecov Platform + """ + map_fields = {} + + if type_of_info == "sample": + if server_name == "iskylims": + log.info("Getting sample fields from %s", server_name) + stderr.print(f"[blue] Getting sample fields from {server_name}") + sample_fields, s_project_fields = self.get_iskylims_fields_sample() + log.info("Selecting sample fields") + stderr.print("[blue] Selecting sample fields") + map_fields = self.map_iskylims_sample_fields_values( + sample_fields, s_project_fields + ) + else: + stderr.print("[blue] Selecting sample fields") + map_fields = self.map_relecov_sample_data() + post_url = "store_samples" + + elif type_of_info == "bioinfodata": + post_url = "bioinfodata" + map_fields = self.json_data + + elif type_of_info == "variantdata": + post_url = "variantdata" + map_fields = self.json_data + + self.update_database(map_fields, post_url) + stderr.print(f"[green]Upload process to {self.platform} completed") + + def start_api(self, platform): + """Open connection torwards database server API""" + # Get database settings + try: + p_settings = self.platform_settings[platform] + except KeyError as e: + logtxt = f"Unable to fetch parameters for {platform} {e}" + stderr.print(f"[red]{logtxt}") + self.logsum.add_error(entry=logtxt) + sys.exit(1) + if self.server_url is None: + server_url = p_settings["server_url"] + else: + server_url = self.server_url + self.platform = platform + self.api_url = p_settings["api_url"] + self.platform_rest_api = RestApi(server_url, self.api_url) + return + + def update_db(self): + """Run the update database process with the provided input""" + if self.full_update is True: + self.server_name = "iskylims" + self.type_of_info = "sample" + self.start_api(self.server_name) + self.store_data(self.type_of_info, self.server_name) + + self.server_name = "relecov" + self.start_api(self.server_name) + for datatype in self.types_of_data: + log_text = f"Sending {datatype} data to {self.server_name}" + log.info(log_text) + stderr.print(log_text) + self.type_of_info = datatype + # TODO: Handling for servers with different datatype needs + if datatype == "variantdata": + json_dir = os.path.dirname(os.path.realpath(self.json_file)) + long_tables = glob.glob(os.path.join(json_dir, "*long_table*.json")) + if not long_tables: + json_file = relecov_tools.utils.prompt_path( + msg="Select long_table json file for variant data" + ) + else: + json_file = long_tables[0] + log.info("Selected %s file for variant data", str(json_file)) + self.json_data = relecov_tools.utils.read_json_file(json_file) + self.store_data(datatype, self.server_name) + else: + self.start_api(self.platform) + self.store_data(self.type_of_info, self.platform) + self.logsum.create_error_summary(called_module="update-db") + return diff --git a/relecov_tools/upload_ena_protocol.py b/relecov_tools/upload_ena_protocol.py new file mode 100755 index 00000000..f965712d --- /dev/null +++ b/relecov_tools/upload_ena_protocol.py @@ -0,0 +1,365 @@ +import logging +import rich.console +import json +import pandas as pd +import sys +import os +import ftplib +import relecov_tools.utils +from datetime import datetime +from relecov_tools.config_json import ConfigJson + +from ena_upload.ena_upload import extract_targets +from ena_upload.ena_upload import run_construct +from ena_upload.ena_upload import construct_submission +from ena_upload.ena_upload import send_schemas +from ena_upload.ena_upload import process_receipt +from ena_upload.ena_upload import update_table +from ena_upload.ena_upload import update_table_simple + +pd.options.mode.chained_assignment = None + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=relecov_tools.utils.rich_force_colors(), +) + + +class EnaUpload: + def __init__( + self, + user=None, + passwd=None, + center=None, + source_json=None, + template_path=None, + dev=None, + action=None, + metadata_types=None, + upload_fastq=None, + output_path=None, + ): + if user is None: + self.user = relecov_tools.utils.prompt_text( + msg="Enter your username defined in ENA" + ) + else: + self.user = user + if passwd is None: + self.passwd = relecov_tools.utils.prompt_password( + msg="Enter your password to ENA" + ) + else: + self.passwd = passwd + if center is None: + self.center = relecov_tools.utils.prompt_text(msg="Enter your center name") + else: + self.center = center + if source_json is None: + self.source_json_file = relecov_tools.utils.prompt_path( + msg="Select the ENA json file to upload" + ) + else: + self.source_json_file = source_json + if not os.path.exists(self.source_json_file): + log.error("json data file %s does not exist ", self.source_json_file) + stderr.print(f"[red]json data file {self.source_json_file} does not exist") + sys.exit(1) + if template_path is None: + self.template_path = relecov_tools.utils.prompt_path( + msg="Select the folder containing ENA templates" + ) + # e.g. template_folder = "/home/user/github_repositories/relecov-tools/relecov_tools/templates" + else: + self.template_path = template_path + if not os.path.exists(self.template_path): + stderr.print("[red]Error: ENA template folder does not exist") + sys.exit(1) + if dev is None: + self.dev = relecov_tools.utils.prompt_yn_question( + msg="Do you want to test upload data?" + ) + else: + self.dev = dev + if action is None: + self.action = relecov_tools.utils.prompt_selection( + msg="Select the action to upload to ENA", + choices=["ADD", "MODIFY", "CANCEL", "RELEASE"], + ) + elif action.upper() not in ["ADD", "MODIFY", "CANCEL", "RELEASE"]: + stderr.print(f"[red] Action '{action}' not supported") + sys.exit(1) + else: + self.action = action.upper() + + if output_path is None: + self.output_path = relecov_tools.utils.prompt_path( + msg="Select the folder to store the xml files" + ) + else: + self.output_path = output_path + + self.upload_fastq_files = upload_fastq + + all_metadata_types = ["study", "run", "experiment", "sample"] + if metadata_types is None: + # If not specified, all metadata xmls are generated and submitted + self.metadata_types = all_metadata_types + else: + self.metadata_types = metadata_types.split(",") + if not all(xml in all_metadata_types for xml in self.metadata_types): + wrong_types = [ + xml for xml in self.metadata_types if xml not in all_metadata_types + ] + log.error("Unsupported metadata xml types: " + str(wrong_types)) + stderr.print(f"[red]Unsupported metadata xml types: {wrong_types}") + sys.exit(1) + + config_json = ConfigJson() + self.config_json = config_json + self.checklist = self.config_json.get_configuration("ENA_fields")["checklist"] + with open(self.source_json_file, "r") as fh: + json_data = json.loads(fh.read()) + self.json_data = json_data + if self.dev: + self.url = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA" + else: + self.url = "https://www.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA" + + def table_formatting(self, schemas_dataframe_raw, source): + """Some fields in the dataframe need special formatting""" + formated_df = schemas_dataframe_raw[source] + formated_df.insert(3, "status", self.action) + formated_df.rename( + columns={ + (str(source) + "_alias"): "alias", + (str(source) + "_title"): "title", + }, + inplace=True, + ) + if self.action in ["CANCEL", "MODIFY", "RELEASE"]: + formated_df.rename( + columns={"ena_" + str(source) + "_accession": "accession"}, inplace=True + ) + if source == "study": + formated_df = formated_df.drop_duplicates(subset=["alias"]) + if source == "sample": + formated_df.insert(4, "ENA_CHECKLIST", self.checklist) + """ + file_name and file_checksum are fields with a structure like this: + file_nameR1--file_nameR2 / file_checksumR1--file_checksumR2 + The run table needs a row for each strand, so these fields are splitted + """ + if source == "run": + formated_df["file_name"] = formated_df["file_name"].str.split("--") + formated_df = formated_df.explode("file_name").reset_index(drop=True) + formated_df["file_checksum"] = [ + x.split("--")[0] if index % 2 == 0 else x.split("--")[1] + for index, x in enumerate(formated_df["file_checksum"]) + ] + + return formated_df + + def dataframes_from_json(self, json_data): + """The xml is built using a dictionary of dataframes as a base structure""" + source_options = self.metadata_types + schemas_dataframe = {} + schemas_dataframe_raw = {} + acces_fields = self.config_json.get_topic_data("ENA_fields", "accession_fields") + filtered_access_fields = [ + fd for fd in acces_fields if any(source in fd for source in source_options) + ] + all_missing_accessions = [] + if self.action in ["CANCEL", "MODIFY", "RELEASE"]: + for source in source_options: + missing_accessions = [ + samp["sample_name"] + for samp in json_data + for fd in filtered_access_fields + if (source in fd and fd not in samp.keys()) + ] + if missing_accessions: + log.error("Found samples in json without proper ena accessions") + stderr.print(f"[red]Found samples missing {source} accession ids:") + all_missing_accessions.extend(missing_accessions) + if all_missing_accessions: + stderr.print("Not committed samples:\n", all_missing_accessions) + + for source in source_options: + source_topic = "_".join(["df", source, "fields"]) + source_fields = self.config_json.get_topic_data("ENA_fields", source_topic) + if self.action in ["CANCEL", "MODIFY", "RELEASE"]: + source_fields.append(str("ena_" + source + "_accession")) + source_dict = { + field: [ + sample[field] + for sample in json_data + if sample["sample_name"] not in all_missing_accessions + ] + for field in source_fields + } + schemas_dataframe_raw[source] = pd.DataFrame.from_dict(source_dict) + schemas_dataframe[source] = self.table_formatting( + schemas_dataframe_raw, source + ) + + return schemas_dataframe + + def save_tables(self, schemas_dataframe, date): + """Save the dataframes into csv files""" + stderr.print(f"Saving dataframes in {self.output_path}") + for source, table in schemas_dataframe.items(): + table_name = str(self.output_path + source + date + "_table.csv") + table.to_csv(table_name, sep=",") + + def update_json(self, updated_schemas_df, json_data): + access_dict = {} + updated_json_data = json_data.copy() + for source, table in updated_schemas_df.items(): + access_list = [x for x in table["accession"]] + access_dict[source] = access_list + """run accessions are duplicated for R1/R2 so they need to be removed""" + if source == "run": + del access_dict[source][1::2] + for source, acclist in access_dict.items(): + accession_field_name = str("ena_" + source + "_accession") + for sample, accession in zip(updated_json_data, acclist): + sample[accession_field_name] = accession + return updated_json_data + + def xml_submission(self, json_data, schemas_dataframe, batch_index=None): + """The metadata is submitted in an xml format""" + schema_targets = extract_targets(self.action, schemas_dataframe) + + tool = self.config_json.get_configuration("ENA_fields")["tool"] + + if self.action in ["ADD", "MODIFY"]: + schema_xmls = run_construct( + self.template_path, schema_targets, self.center, self.checklist, tool + ) + submission_xml = construct_submission( + self.template_path, + self.action, + schema_xmls, + self.center, + self.checklist, + tool, + ) + elif self.action in ["CANCEL", "RELEASE"]: + # when CANCEL/RELEASE, only the accessions are needed + # schema_xmls is only used to record the following 'submission' + schema_xmls = {} + submission_xml = construct_submission( + self.template_path, + self.action, + schema_targets, + self.center, + self.checklist, + tool, + ) + schema_xmls["submission"] = submission_xml + + stderr.print(f"\nProcessing submission to ENA server: {self.url}") + + receipt = send_schemas(schema_xmls, self.url, self.user, self.passwd).text + if not os.path.exists(self.output_path): + os.mkdir(self.output_path) + date = str(datetime.now().strftime("%Y%m%d-%H%M%S")) + receipt_name = "receipt_" + date + ".xml" + receipt_dir = os.path.join(self.output_path, receipt_name) + stderr.print(f"Printing receipt to {receipt_dir}") + + with open(f"{receipt_dir}", "w") as fw: + fw.write(receipt) + try: + schema_update = process_receipt(receipt.encode("utf-8"), self.action) + except ValueError: + log.error("There was an ERROR during submission:") + sys.exit(receipt) + if str(self.action) in ["ADD", "MODIFY"]: + updated_schemas_df = update_table( + schemas_dataframe, schema_targets, schema_update + ) + else: + updated_schemas_df = update_table_simple( + schemas_dataframe, schema_targets, self.action + ) + + updated_json = self.update_json(updated_schemas_df, json_data) + if batch_index is None: + suffix = str("_" + date + ".json") + else: + suffix = str("_" + date + "_batch" + str(batch_index) + ".json") + updated_json_name = ( + os.path.splitext(os.path.basename(self.source_json_file))[0] + suffix + ) + relecov_tools.utils.write_json_fo_file(updated_json, updated_json_name) + + self.save_tables(updated_schemas_df, date) + return + + def fastq_submission(self, json_data): + """The fastq files are submitted apart from the metadata""" + stderr.print("Submitting fastq files") + json_dataframe = pd.DataFrame(json_data) + file_paths = {} + file_paths_r2 = {} + for path in json_dataframe["r1_fastq_filepath"]: + file_paths[os.path.basename(path)] = os.path.abspath(path) + + for path in json_dataframe["r2_fastq_filepath"]: + file_paths_r2[os.path.basename(path)] = os.path.abspath(path) + + file_paths.update(file_paths_r2) + + session = ftplib.FTP("webin2.ebi.ac.uk", self.user, self.passwd) + for filename, path in file_paths.items(): + stderr.print("Uploading path: " + path + " with filename: " + filename) + try: + file = open(path, "rb") # file to send + g = session.storbinary(f"STOR {filename}", file) + stderr.print(g) # send the file + file.close() # close file and FTP + except BaseException as err: + stderr.print(f"ERROR: {err}") + # print("ERROR: If your connection times out at this stage, it propably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000.") + g2 = session.quit() + stderr.print(g2) + return + + def large_json_upload(self, json_data): + """ + Split large json into smaller jsons of maximum size 20 + due to limitations in submissions to ENA's API + """ + ena_api_limit = 20 + number_of_batchs = len(range(0, len(json_data), ena_api_limit)) + stderr.print(f"Splitting the json data in {number_of_batchs} batchs...") + for index, x in range(0, len(json_data), ena_api_limit): + batch_index = str(index + 1) + stderr.print(f"[blue]Processing batch {batch_index}...") + self.standard_upload(json_data[x : x + ena_api_limit], batch_index) + return + + def standard_upload(self, json_data, batch_index=None): + """Create the required files and upload to ENA""" + schemas_dataframe = self.dataframes_from_json(json_data) + stderr.print("[blue]Successfull creation of dataframes") + if self.upload_fastq_files: + self.fastq_submission(json_data) + stderr.print("Preparing xml files for submission...") + self.xml_submission(json_data, schemas_dataframe, batch_index) + return + + def upload(self): + """Handle the data and upload it to ENA""" + if len(self.json_data) <= 50: + self.standard_upload(self.json_data) + else: + stderr.print("[yellow]Json is too large to be submitted. Splitting it...") + self.large_json_upload(self.json_data) + stderr.print("[green] Finished execution") + return diff --git a/relecov_tools/utils.py b/relecov_tools/utils.py new file mode 100755 index 00000000..e048d276 --- /dev/null +++ b/relecov_tools/utils.py @@ -0,0 +1,460 @@ +#!/usr/bin/env python +""" +Common utility function used for relecov_tools package. +""" +import os +import sys +import glob +import hashlib +import logging +import questionary +import json +import openpyxl +import yaml +import gzip +import re +import shutil +from itertools import islice +from Bio import SeqIO +from rich.console import Console +from datetime import datetime +from tabulate import tabulate + +log = logging.getLogger(__name__) + + +def file_exists(file_to_check): + """ + Input: + file_to_check # file name to check if exists + Return: + True if exists + """ + if os.path.isfile(file_to_check): + return True + return False + + +def safe_remove(file_path): + try: + os.remove(file_path) + except OSError: + return False + return True + + +def get_files_match_condition(condition): + """find all path names that matches with the condition""" + return glob.glob(condition) + + +def read_json_file(j_file): + """Read json file.""" + with open(j_file, "r") as fh: + try: + data = json.load(fh) + except (UnicodeDecodeError, ValueError): + raise + + return data + + +def read_excel_file(f_name, sheet_name, header_flag, leave_empty=True): + """Read the input excel file and give the information in a list + of dictionaries + """ + wb_file = openpyxl.load_workbook(f_name, data_only=True) + ws_metadata_lab = wb_file[sheet_name] + try: + heading_row = [ + idx + 1 for idx, x in enumerate(ws_metadata_lab.values) if header_flag in x + ][0] + except IndexError: + raise KeyError(f"Header flag '{header_flag}' could not be found in {f_name}") + heading = [str(i.value).strip() for i in ws_metadata_lab[heading_row] if i.value] + ws_data = [] + for row in islice(ws_metadata_lab.values, heading_row, ws_metadata_lab.max_row): + l_row = list(row) + # Ignore the empty rows + if all(cell is None for cell in l_row): + continue + data_row = {} + for idx in range(0, len(heading)): + if l_row[idx] is None: + if leave_empty: + data_row[heading[idx]] = None + else: + data_row[heading[idx]] = "Not Provided [GENEPIO:0001668]" + else: + data_row[heading[idx]] = l_row[idx] + ws_data.append(data_row) + + return ws_data, heading_row + + +def read_csv_file_return_dict(file_name, sep, key_position=None): + """Read csv or tsv file, according to separator, and return a dictionary + where the main key is the first column, if key position is None otherwise + the index value of the key position is used as key + """ + try: + with open(file_name, "r") as fh: + lines = fh.readlines() + except FileNotFoundError: + raise + heading = lines[0].strip().split(sep) + if len(heading) == 1: + return {"ERROR": "not valid format"} + file_data = {} + for line in lines[1:]: + line_s = line.strip().split(sep) + if key_position is None: + file_data[line_s[0]] = {} + for idx in range(1, len(heading)): + file_data[line_s[0]][heading[idx]] = line_s[idx] + else: + file_data[line_s[key_position]] = {} + for idx in range(len(heading)): + if idx == key_position: + continue + file_data[line_s[key_position]][heading[idx]] = line_s[idx] + + return file_data + + +def read_fasta_return_SeqIO_instance(file_name): + """Read fasta and return SeqIO instance""" + try: + return SeqIO.read(file_name, "fasta") + except FileNotFoundError: + raise + + +def read_yml_file(file_name): + """Read yml file""" + with open(file_name, "r") as fh: + try: + return yaml.safe_load(fh) + except yaml.YAMLError: + raise + + +def get_md5_from_local_folder(local_folder): + """Fetch the md5 values for each file in the file list""" + md5_results = {} + reg_for_md5 = os.path.join(local_folder, "*.md5") + # reg_for_non_md5 = os.path.join(local_folder, "*[!.md5]") + md5_files = glob.glob(reg_for_md5) + if not md5_files: + return False + else: + for md5_file in md5_files: + file_path_name, f_ext = os.path.splitext(md5_file) + if not file_exists(file_path_name): + log.error("Found md5 file but not %s", file_path_name) + continue + file_name = os.path.basename(file_path_name) + fh = open(md5_file, "r") + md5_results[file_name] = fh.read() + fh.close() + return md5_results + + +def read_md5_checksum(file_name, avoid_chars=list()): + """Read MD5_checksum file and return a dict of {file: md5_hash} + + Args: + file_name (str): file containing "md5hash file" in tab separated format + avoid_chars (list(str), optional): Lines with any of these elements + will be skipped. Defaults to list(). + + Returns: + hash_dict(dict): dictionary of {file: md5_hash} + """ + try: + with open(file_name, "r") as file: + content = file.read() + except FileNotFoundError: + raise + clean_content = content.replace("*", "") + lines = clean_content.splitlines() + translation = str.maketrans("", "", "'\"") + if any("\t" in line for line in lines): + lines = [line.strip().translate(translation).split("\t") for line in lines] + elif any("," in line for line in lines): + lines = [line.strip().translate(translation).split(",") for line in lines] + else: + lines = [line.strip().translate(translation).split() for line in lines] + clean_lines = [ + x for x in lines if not any(ch in string for ch in avoid_chars for string in x) + ] + # md5sum should always have 2 columns: hash - path + md5_lines = [line for line in clean_lines if len(line) == 2] + if not md5_lines: + return False + # split paths for both windows "\" and linux "/" using regex [\\/] + hash_dict = {re.split(r"[\\/]", line[1])[-1]: line[0].lower() for line in md5_lines} + return hash_dict + + +def delete_local_folder(folder): + """Delete download folder because files does not complain requisites""" + log.info("Deleting local folder %s", folder) + shutil.rmtree(folder, ignore_errors=True) + return True + + +def calculate_md5(file_name): + """Calculate the md5 value for the file name""" + return hashlib.md5(open(file_name, "rb").read()).hexdigest() + + +def write_md5_file(file_name, md5_value): + """Write md5 to file""" + with open(file_name, "w") as fh: + fh.write(md5_value) + return + + +def create_md5_files(local_folder, file_list): + """Create the md5 files and return their value""" + md5_results = {} + for file_name in file_list: + md5_results[file_name] = [ + local_folder, + calculate_md5(os.path.join(local_folder, file_name)), + ] + md5_file_name = file_name + ".md5" + write_md5_file( + os.path.join(local_folder, md5_file_name), md5_results[file_name][1] + ) + return md5_results + + +def save_local_md5(file_name, md5_value): + """Save the MD5 value""" + with open(file_name, "w") as fh: + fh.write(md5_value) + return True + + +def write_json_fo_file(data, file_name): + """Write metadata to json file""" + with open(file_name, "w", encoding="utf-8") as fh: + fh.write(json.dumps(data, indent=4, sort_keys=True, ensure_ascii=False)) + return True + + +def compress_file(file): + """compress a given file with gzip, adding .gz extension afterwards + + Args: + file (str): path to the given file + """ + try: + with open(file, "rb") as raw, gzip.open(f"{file}.gz", "wb") as comp: + comp.writelines(raw) + return True + except FileNotFoundError: + return False + + +def rich_force_colors(): + """ + Check if any environment variables are set to force Rich to use coloured output + """ + if ( + os.getenv("GITHUB_ACTIONS") + or os.getenv("FORCE_COLOR") + or os.getenv("PY_COLORS") + ): + return True + return None + + +stderr = Console( + stderr=True, style="dim", highlight=False, force_terminal=rich_force_colors() +) + + +def prompt_text(msg): + source = questionary.text(msg).unsafe_ask() + return source + + +def prompt_password(msg): + source = questionary.password(msg).unsafe_ask() + return source + + +def prompt_tmp_dir_path(): + stderr.print("Temporal directory destination to execute service") + source = questionary.path("Source path").unsafe_ask() + return source + + +def prompt_selection(msg, choices): + selection = questionary.select(msg, choices=choices).unsafe_ask() + return selection + + +def prompt_path(msg): + source = questionary.path(msg).unsafe_ask() + return source + + +def prompt_yn_question(msg): + confirmation = questionary.confirm(msg).unsafe_ask() + return confirmation + + +def prompt_skip_folder_creation(): + stderr.print("Do you want to skip folder creation? (Y/N)") + confirmation = questionary.confirm("Skip?", default=False).unsafe_ask() + return confirmation + + +def prompt_checkbox(msg, choices): + selected_options = questionary.checkbox(msg, choices=choices).unsafe_ask() + return selected_options + + +def get_file_date(file_path): + """Get the modification date of a file.""" + try: + # Get the modification time of the file + mtime = os.path.getmtime(file_path) + # Convert the modification time to a datetime object + file_date = datetime.fromtimestamp(mtime) + # Format date + formatted_date = file_date.strftime("%Y/%m/%d") + return formatted_date + except FileNotFoundError: + # Handle file not found error + print(f"File not found: {file_path}") + return None + + +def select_most_recent_files_per_sample(paths_list): + """Selects the most recent file for each sample among potentially duplicated files. + Input: + - paths_list: a list of sample's file paths. + Output: + - List of file paths containig the most recent/up-to-date file for each sample. + """ + filename_groups = {} + # Count occurrences of each filename and group files by sample names + for file in paths_list: + # TODO: So far, it uses split method to identify this pattern: [sample1.pangolin.csv, sample1.pangolin_20240310.csv]. It should be improve to parse files based on a different character matching field. + file_name = os.path.basename(file).split(".")[0] + if file_name in filename_groups: + filename_groups[file_name].append(file) + else: + filename_groups[file_name] = [file] + # Filter out sample names with only one file + duplicated_files = [ + (sample_name, file_paths) + for sample_name, file_paths in filename_groups.items() + if len(file_paths) > 1 + ] + # Iterate over duplicated files to select the most recent one for each sample + for sample_name, file_paths in duplicated_files: + stderr.print( + f"\tMore than one file found for sample {sample_name}. Selecting the most recent one." + ) + # Sort files by modification time (most recent first) + sorted_files = sorted( + file_paths, key=lambda file_path: os.path.getmtime(file_path), reverse=True + ) + # Select the most recent file + selected_file = sorted_files[0] + stderr.print(f"\tSelected file for sample {sample_name}: {selected_file}") + # Remove other files for the same sample from the filtered_files dictionary + filename_groups[sample_name] = [selected_file] + # Update filename_groups with filtered files + filename_groups = [ + (sample_name, file_path) + for sample_name, file_paths in filename_groups.items() + for file_path in file_paths + ] + # Reformat variable to retrieve a list of file paths + file_path_list = [sample_file_path for _, sample_file_path in filename_groups] + return file_path_list + + +def print_log_report( + log_report, categories=None, sections=["warning", "valid", "error"] +): + color_codes = { + "error": "\033[91m", # Red + "warning": "\033[93m", # Orange + "valid": "\033[92m", # Green + "reset": "\033[0m", # Reset color + } + table_data = [] + for section_name, section_data in log_report.items(): + if section_name in sections: + for category, items in section_data.items(): + if categories is None or category in categories: + colored_category = ( + f"{color_codes[section_name]}{category}{color_codes['reset']}" + ) + for item in items: + colored_message = ( + f"{color_codes[section_name]}{item}{color_codes['reset']}" + ) + table_data.append( + [section_name, colored_category, colored_message] + ) + print( + tabulate( + table_data, + headers=["Log type", "Category", "Message"], + tablefmt="fancy_grid", + ) + ) + + +def prompt_create_outdir( + path, folder_name=None, prompt_message="Define path to store the output:" +): + """Ensure the directory exists or prompt the user to define and create it.""" + # Check path + if not path: + path = prompt_path(prompt_message) + stderr.print(f"Chosen directory: {path}") + + # Check folder_name + if not folder_name: + default_folder = prompt_yn_question( + "Do you want to use the default directory ('results') to store the results? (yes/no):" + ) + if not default_folder: + folder_name = prompt_text("Write your output directory: ") + else: + folder_name = "results" + + # Prevent duplicate folder names in the path + if os.path.basename(path) == folder_name: + global_path = path + else: + global_path = os.path.join(path, folder_name) + + if not os.path.exists(global_path): + create_folder = prompt_yn_question( + f"The directory does not exist. Do you want to create '{folder_name}' folder in this path? (yes/no):" + ) + if create_folder: + os.makedirs(global_path) + stderr.print(f"[green]Folder '{folder_name}' created at {path}") + else: + stderr.print("[red]Directory creation aborted.") + sys.exit(1) + elif os.path.isdir(global_path): + os.makedirs(global_path, exist_ok=True) + stderr.print(f"[green]Defining '{folder_name}' as output folder") + else: + stderr.print("[red]The provided path is not a directory.") + sys.exit(1) + + return global_path diff --git a/relecov_tools/xml_files/samples/submission_add.xml b/relecov_tools/xml_files/samples/submission_add.xml new file mode 100644 index 00000000..8edfc2ad --- /dev/null +++ b/relecov_tools/xml_files/samples/submission_add.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/relecov_tools/xml_files/samples/submission_modify.xml b/relecov_tools/xml_files/samples/submission_modify.xml new file mode 100644 index 00000000..d9bc1a3a --- /dev/null +++ b/relecov_tools/xml_files/samples/submission_modify.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/relecov_tools/xml_files/study/project.xml b/relecov_tools/xml_files/study/project.xml new file mode 100644 index 00000000..5e244c5a --- /dev/null +++ b/relecov_tools/xml_files/study/project.xml @@ -0,0 +1,9 @@ + + + Example project for ENA submission workshop + This study was created as part of an ENA submissions workshop + + + + + diff --git a/relecov_tools/xml_files/study/receipt.xml b/relecov_tools/xml_files/study/receipt.xml new file mode 100644 index 00000000..06d1e5ba --- /dev/null +++ b/relecov_tools/xml_files/study/receipt.xml @@ -0,0 +1,12 @@ +curl -u Webin-60330:W2b3nENA%2021 -F "SUBMISSION=@submission.xml" -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/" + + + + + + + In project, alias:"workshop_project", accession:"". The object being added already exists in the submission account with accession: "PRJEB51674". + This submission is a TEST submission and will be discarded within 24 hours + + ADD + diff --git a/relecov_tools/xml_files/study/submission.xml b/relecov_tools/xml_files/study/submission.xml new file mode 100644 index 00000000..8edfc2ad --- /dev/null +++ b/relecov_tools/xml_files/study/submission.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..adef8f84 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +click +questionary +jsonschema +packaging +prompt_toolkit>=3.0.3 +rich>=10.0.0 +requests==2.27.1 +paramiko>=2.10.1 +pyyaml==6.0.1 +openpyxl>=3.1.2 +ena-upload-cli +bio==1.4.0 +xlsxwriter==3.2.0 +bs4==0.0.2 +tabulate +pandas diff --git a/schema/gisaid_V0.json b/schema/gisaid_V0.json deleted file mode 100644 index 7c01894b..00000000 --- a/schema/gisaid_V0.json +++ /dev/null @@ -1,166 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft/2022-01/gisaid_schema#", - "required": [ - "type", - "virus_name", - "submitter", - "originating_lab", - "address", - "collection_date", - "location", - "host", - "patient_age", - "gender", - "sequencing_technology", - "fasta_filename", - ], - "type": "object", - "properties": { - "type": { - "examples": ["betacoronavirus"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "default must remain betacoronavirus", - "clasification":"Database Identifiers", - }, - "virus_name": { - "examples": ["e.g. hCoV-19/Netherlands/Gelderland-01/2020 (Must be FASTA-Header from the FASTA file all_sequences.fasta)"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The user-defined GISAID virus name assigned to the sequence.", - "clasification":"Database Identifiers", - }, - "submitter": { - "examples": ["Public Health Agency of Canada"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "enter your GISAID-Username", - "clasification":"Database Identifiers", - }, - "originating_lab": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Where the clinical specimen or virus isolate was first obtained", - "clasification":"Sample collection and processing", - }, - "address": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The mailing address of the agency submitting the sample.", - "clasification":"Sample collection and processing", - }, - "collection_date": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Date in the format YYYY or YYYY-MM or YYYY-MM-DD. Caution: collection date may be considered public health identifiable information. If this date is considered identifiable, it is acceptable to add 'jitter' to the collection date by adding or subtracting a calendar day (acceptable by GISAID). Do not change the collection date in your original records. Alternatively, ”received date” may be used as a substitute in the data you share. The date should be provided in ISO 8601 standard format YYYY-MM-DD.", - "format":"date", - "clasification":"Sample collection and processing", - }, - "location": { - "examples": ["e.g. Europe / Germany / Bavaria / Munich"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The country of origin of the sample.", - "clasification":"Sample collection and processing", - }, - "host": { - "examples": ["e.g. Human, Environment, Canine, Manis javanica, Rhinolophus affinis, etc"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The taxonomic, or scientific name of the host.", - "clasification":"Host information", - }, - "patient_age": { - "Enums": [ - "0 - 9 [GENEPIO:0100049]", - "10 - 19 [GENEPIO:0100050]", - "20 - 29 [GENEPIO:0100051]", - "30 - 39 [GENEPIO:0100052]", - "40 - 49 [GENEPIO:0100053]", - "50 - 59 [GENEPIO:0100054]", - "60 - 69 [GENEPIO:0100055]", - "70 - 79 [GENEPIO:0100056]", - "80 - 89 [GENEPIO:0100057]", - "90 - 99 [GENEPIO:0100058]", - "100+ [GENEPIO:0100059]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "examples": ["50 - 59 [GENEPIO:0100054]"], - "ontology": "GENEPIO:0001394", - "type": "string", - "description": "The age category of the host at the time of sampling.", - "clasification":"Host information", - }, - "gender": { - "Enums": [ - "Female [NCIT:C46110]", - "Male [NCIT:C46109]", - "Non-binary Gender [GSSO:000132]", - "Transgender (assigned male at birth) [GSSO:004004]", - "Transgender (assigned female at birth) [GSSO:004005]", - "Undeclared [NCIT:C110959]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "examples": ["Male [NCIT:C46109]"], - "ontology": "GENEPIO:0001395", - "type": "string", - "description": "The gender of the host at the time of sample collection.", - "clasification":"Host information", - }, - "sequencing_technology": { - "examples": ["e.g. Illumina Miseq, Sanger, Nanopore MinION, Ion Torrent, etc."], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The model of the sequencing instrument used.", - "clasification":"Sequencing", - }, - "fasta_filename": { - "examples": ["e.g. all_sequences.fasta not c:\users\meier\docs\all_sequences.fasta"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Filename that contains the sequence without path", - "clasification":"Bioinformatics and QC metrics", - }, - "additional_host_information": { - "examples": ["e.g. Patient infected while traveling in …."], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "If the information is unknown or can not be shared, leave blank.", - "clasification":"Host information", - }, - "Last vaccinated": { - "examples": ["e.g. 04/09/2021"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The date the host received their last dose of vaccine.", - "format":"date", - "clasification":"Host information", - }, - "outbreak": { - "examples": ["Date, Location e.g. type of gathering, Family cluster, etc."], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "If the information is unknown or can not be shared, leave blank.", - "clasification":"Host information", - }, - "Sampling Strategy": { - "examples": ["e.g. Sentinel surveillance (ILI), Sentinel surveillance (ARI), Sentinel surveillance (SARI), Non-sentinel-surveillance (hospital), Non-sentinel-surveillance (GP network), Longitudinal sampling on same patient(s), S gene dropout"], - "ontology": "GENEPIO:", - "type": "string", - "description": "The reason that the sample was sequenced.", - "clasification":"Sequencing", - }, - } - -} diff --git a/schema/phage_plus_V0.json b/schema/phage_plus_V0.json deleted file mode 100644 index f81543d2..00000000 --- a/schema/phage_plus_V0.json +++ /dev/null @@ -1,2123 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft/2019-09/schema#", - "required": [ - "sample_name", - "collecting_institution", - "submitting_institution", - "sample_collection_date", - "geo_loc_country", - "geo_loc_state", - "organism", - "isolate", - "host_scientific_name", - "host_disease", - "sequencing_instrument_model", - "sequencing_instrument_platform", - "consensus_sequence_software_name", - "consensus_sequence_software_version" - ], - "type": "object", - "properties": { - "sample_name": { - "examples": ["prov_rona_99"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The user-defined name for the sample.", - "clasification":"Database Identifiers", - }, - "collecting_institution": { - "examples": ["Public Health Agency of Canada"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The name of the agency that collected the original sample.", - }, - "submitting_institution": { - "examples": ["Centers for Disease Control and Prevention"], - "ontology": "GENEPIO:0001159", - "type": "string", - "description": "The name of the agency that generated the sequence.", - }, - "sample_collection_date": { - "examples": ["3/19/2020"], - "ontology": "GENEPIO:0001174", - "type": "string", - "description": "The date on which the sample was collected.", - "format":"date" - }, - "geo_loc_name_country": { - "Enums": [ - "Afghanistan [GAZ:00006882]", - "Albania [GAZ:00002953]", - "Algeria [GAZ:00000563]", - "American Samoa [GAZ:00003957]", - "Andorra [GAZ:00002948]", - "Angola [GAZ:00001095]", - "Anguilla [GAZ:00009159]", - "Antarctica [GAZ:00000462]", - "Antigua and Barbuda [GAZ:00006883]", - "Argentina [GAZ:00002928]", - "Armenia [GAZ:00004094]", - "Aruba [GAZ:00004025]", - "Ashmore and Cartier Islands [GAZ:00005901]", - "Australia [GAZ:00000463]", - "Austria [GAZ:00002942]", - "Azerbaijan [GAZ:00004941]", - "Bahamas [GAZ:00002733]", - "Bahrain [GAZ:00005281]", - "Baker Island [GAZ:00007117]", - "Bangladesh [GAZ:00003750]", - "Barbados [GAZ:00001251]", - "Bassas da India [GAZ:00005810]", - "Belarus [GAZ:00006886]", - "Belgium [GAZ:00002938]", - "Belize [GAZ:00002934]", - "Benin [GAZ:00000904]", - "Bermuda [GAZ:00001264]", - "Bhutan [GAZ:00003920]", - "Bolivia [GAZ:00002511]", - "Borneo [GAZ:00025355]", - "Bosnia and Herzegovina [GAZ:00006887]", - "Botswana [GAZ:00001097]", - "Bouvet Island [GAZ:00001453]", - "Brazil [GAZ:00002828]", - "British Virgin Islands [GAZ:00003961]", - "Brunei [GAZ:00003901]", - "Bulgaria [GAZ:00002950]", - "Burkina Faso [GAZ:00000905]", - "Burundi [GAZ:00001090]", - "Cambodia [GAZ:00006888]", - "Cameroon [GAZ:00001093]", - "Canada [GAZ:00002560]", - "Cape Verde [GAZ:00001227]", - "Cayman Islands [GAZ:00003986]", - "Central African Republic [GAZ:00001089]", - "Chad [GAZ:00000586]", - "Chile [GAZ:00002825]", - "China [GAZ:00002845]", - "Christmas Island [GAZ:00005915]", - "Clipperton Island [GAZ:00005838]", - "Cocos Islands [GAZ:00009721]", - "Colombia [GAZ:00002929]", - "Comoros [GAZ:00005820]", - "Cook Islands [GAZ:00053798]", - "Coral Sea Islands [GAZ:00005917]", - "Costa Rica [GAZ:00002901]", - "Cote d'Ivoire [GAZ:00000906]", - "Croatia [GAZ:00002719]", - "Cuba [GAZ:00003762]", - "Curacao [GAZ:00012582]", - "Cyprus [GAZ:00004006]", - "Czech Republic [GAZ:00002954]", - "Democratic Republic of the Congo [GAZ:00001086]", - "Denmark [GAZ:00005852]", - "Djibouti [GAZ:00000582]", - "Dominica [GAZ:00006890]", - "Dominican Republic [GAZ:00003952]", - "Ecuador [GAZ:00002912]", - "Egypt [GAZ:00003934]", - "El Salvador [GAZ:00002935]", - "Equatorial Guinea [GAZ:00001091]", - "Eritrea [GAZ:00000581]", - "Estonia [GAZ:00002959]", - "Eswatini [GAZ:00001099]", - "Ethiopia [GAZ:00000567]", - "Europa Island [GAZ:00005811]", - "Falkland Islands (Islas Malvinas) [GAZ:00001412]", - "Faroe Islands [GAZ:00059206]", - "Fiji [GAZ:00006891]", - "Finland [GAZ:00002937]", - "France [GAZ:00003940]", - "French Guiana [GAZ:00002516]", - "French Polynesia [GAZ:00002918]", - "French Southern and Antarctic Lands [GAZ:00003753]", - "Gabon [GAZ:00001092]", - "Gambia [GAZ:00000907]", - "Gaza Strip [GAZ:00009571]", - "Georgia [GAZ:00004942]", - "Germany [GAZ:00002646]", - "Ghana [GAZ:00000908]", - "Gibraltar [GAZ:00003987]", - "Glorioso Islands [GAZ:00005808]", - "Greece [GAZ:00002945]", - "Greenland [GAZ:00001507]", - "Grenada [GAZ:02000573]", - "Guadeloupe [GAZ:00067142]", - "Guam [GAZ:00003706]", - "Guatemala [GAZ:00002936]", - "Guernsey [GAZ:00001550]", - "Guinea [GAZ:00000909]", - "Guinea-Bissau [GAZ:00000910]", - "Guyana [GAZ:00002522]", - "Haiti [GAZ:00003953]", - "Heard Island and McDonald Islands [GAZ:00009718]", - "Honduras [GAZ:00002894]", - "Hong Kong [GAZ:00003203]", - "Howland Island [GAZ:00007120]", - "Hungary [GAZ:00002952]", - "Iceland [GAZ:00000843]", - "India [GAZ:00002839]", - "Indonesia [GAZ:00003727]", - "Iran [GAZ:00004474]", - "Iraq [GAZ:00004483]", - "Ireland [GAZ:00002943]", - "Isle of Man [GAZ:00052477]", - "Israel [GAZ:00002476]", - "Italy [GAZ:00002650]", - "Jamaica [GAZ:00003781]", - "Jan Mayen [GAZ:00005853]", - "Japan [GAZ:00002747]", - "Jarvis Island [GAZ:00007118]", - "Jersey [GAZ:00001551]", - "Johnston Atoll [GAZ:00007114]", - "Jordan [GAZ:00002473]", - "Juan de Nova Island [GAZ:00005809]", - "Kazakhstan [GAZ:00004999]", - "Kenya [GAZ:00001101]", - "Kerguelen Archipelago [GAZ:00005682]", - "Kingman Reef [GAZ:00007116]", - "Kiribati [GAZ:00006894]", - "Kosovo [GAZ:00011337]", - "Kuwait [GAZ:00005285]", - "Kyrgyzstan [GAZ:00006893]", - "Laos [GAZ:00006889]", - "Latvia [GAZ:00002958]", - "Lebanon [GAZ:00002478]", - "Lesotho [GAZ:00001098]", - "Liberia [GAZ:00000911]", - "Libya [GAZ:00000566]", - "Liechtenstein [GAZ:00003858]", - "Line Islands [GAZ:00007144]", - "Lithuania [GAZ:00002960]", - "Luxembourg [GAZ:00002947]", - "Macau [GAZ:00003202]", - "Madagascar [GAZ:00001108]", - "Malawi [GAZ:00001105]", - "Malaysia [GAZ:00003902]", - "Maldives [GAZ:00006924]", - "Mali [GAZ:00000584]", - "Malta [GAZ:00004017]", - "Marshall Islands [GAZ:00007161]", - "Martinique [GAZ:00067143]", - "Mauritania [GAZ:00000583]", - "Mauritius [GAZ:00003745]", - "Mayotte [GAZ:00003943]", - "Mexico [GAZ:00002852]", - "Micronesia [GAZ:00005862]", - "Midway Islands [GAZ:00007112]", - "Moldova [GAZ:00003897]", - "Monaco [GAZ:00003857]", - "Mongolia [GAZ:00008744]", - "Montenegro [GAZ:00006898]", - "Montserrat [GAZ:00003988]", - "Morocco [GAZ:00000565]", - "Mozambique [GAZ:00001100]", - "Myanmar [GAZ:00006899]", - "Namibia [GAZ:00001096]", - "Nauru [GAZ:00006900]", - "Navassa Island [GAZ:00007119]", - "Nepal [GAZ:00004399]", - "Netherlands [GAZ:00002946]", - "New Caledonia [GAZ:00005206]", - "New Zealand [GAZ:00000469]", - "Nicaragua [GAZ:00002978]", - "Niger [GAZ:00000585]", - "Nigeria [GAZ:00000912]", - "Niue [GAZ:00006902]", - "Norfolk Island [GAZ:00005908]", - "North Korea [GAZ:00002801]", - "North Macedonia [GAZ:00006895]", - "North Sea [GAZ:00002284]", - "Northern Mariana Islands [GAZ:00003958]", - "Norway [GAZ:00002699]", - "Oman [GAZ:00005283]", - "Pakistan [GAZ:00005246]", - "Palau [GAZ:00006905]", - "Panama [GAZ:00002892]", - "Papua New Guinea [GAZ:00003922]", - "Paracel Islands [GAZ:00010832]", - "Paraguay [GAZ:00002933]", - "Peru [GAZ:00002932]", - "Philippines [GAZ:00004525]", - "Pitcairn Islands [GAZ:00005867]", - "Poland [GAZ:00002939]", - "Portugal [GAZ:00004126]", - "Puerto Rico [GAZ:00006935]", - "Qatar [GAZ:00005286]", - "Republic of the Congo [GAZ:00001088]", - "Reunion [GAZ:00003945]", - "Romania [GAZ:00002951]", - "Ross Sea [GAZ:00023304]", - "Russia [GAZ:00002721]", - "Rwanda [GAZ:00001087]", - "Saint Helena [GAZ:00000849]", - "Saint Kitts and Nevis [GAZ:00006906]", - "Saint Lucia [GAZ:00006909]", - "Saint Pierre and Miquelon [GAZ:00003942]", - "Saint Martin [GAZ:00005841]", - "Saint Vincent and the Grenadines [GAZ:02000565]", - "Samoa [GAZ:00006910]", - "San Marino [GAZ:00003102]", - "Sao Tome and Principe [GAZ:00006927]", - "Saudi Arabia [GAZ:00005279]", - "Senegal [GAZ:00000913]", - "Serbia [GAZ:00002957]", - "Seychelles [GAZ:00006922]", - "Sierra Leone [GAZ:00000914]", - "Singapore [GAZ:00003923]", - "Sint Maarten [GAZ:00012579]", - "Slovakia [GAZ:00002956]", - "Slovenia [GAZ:00002955]", - "Solomon Islands [GAZ:00005275]", - "Somalia [GAZ:00001104]", - "South Africa [GAZ:00001094]", - "South Georgia and the South Sandwich Islands [GAZ:00003990]", - "South Korea [GAZ:00002802]", - "South Sudan [GAZ:00233439]", - "Spain [GAZ:00003936]", - "Spratly Islands [GAZ:00010831]", - "Sri Lanka [GAZ:00003924]", - "State of Palestine [GAZ:00002475]", - "Sudan [GAZ:00000560]", - "Suriname [GAZ:00002525]", - "Svalbard [GAZ:00005396]", - "Swaziland [GAZ:00001099]", - "Sweden [GAZ:00002729]", - "Switzerland [GAZ:00002941]", - "Syria [GAZ:00002474]", - "Taiwan [GAZ:00005341]", - "Tajikistan [GAZ:00006912]", - "Tanzania [GAZ:00001103]", - "Thailand [GAZ:00003744]", - "Timor-Leste [GAZ:00006913]", - "Togo [GAZ:00000915]", - "Tokelau [GAZ:00260188]", - "Tonga [GAZ:00006916]", - "Trinidad and Tobago [GAZ:00003767]", - "Tromelin Island [GAZ:00005812]", - "Tunisia [GAZ:00000562]", - "Turkey [GAZ:00000558]", - "Turkmenistan [GAZ:00005018]", - "Turks and Caicos Islands [GAZ:00003955]", - "Tuvalu [GAZ:00009715]", - "USA [GAZ:00002459]", - "Uganda [GAZ:00001102]", - "Ukraine [GAZ:00002724]", - "United Arab Emirates [GAZ:00005282]", - "United Kingdom [GAZ:00002637]", - "Uruguay [GAZ:00002930]", - "Uzbekistan [GAZ:00004979]", - "Vanuatu [GAZ:00006918]", - "Venezuela [GAZ:00002931]", - "Viet Nam [GAZ:00003756]", - "Virgin Islands [GAZ:00003959]", - "Wake Island [GAZ:00007111]", - "Wallis and Futuna [GAZ:00007191]", - "West Bank [GAZ:00009572]", - "Western Sahara [GAZ:00000564]", - "Yemen [GAZ:00005284]", - "Zambia [GAZ:00001107]", - "Zimbabwe [GAZ:00001106]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001181", - "type": "string", - "description": "The country of origin of the sample.", - "examples": ["South Africa [GAZ:00001094]"] - }, - "geo_loc_state": { - "examples": ["Western Cape"], - "ontology": "GENEPIO:0001185", - "type": "string", - "description": "The state/province/territory of origin of the sample.", - }, - "organism": { - "Enums": [ - "Coronaviridae [NCBITaxon:11118]", - "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001191", - "type": "string", - "description": "Taxonomic name of the organism.", - "examples": [ - "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]" - ] - }, - "isolate": { - "examples": ["SARS-CoV-2/human/USA/CA-CDPH-001/2020"], - "ontology": "GENEPIO:0001644", - "type": "string", - "description": "Identifier of the specific isolate.", - "clasification":"Sample collection and processing", - }, - "host_scientific_name": { - "Enums": [ - "Bos taurus [NCBITaxon:9913]", - "Canis lupus familiaris [NCBITaxon:9615]", - "Chiroptera [NCBITaxon:9397]", - "Columbidae [NCBITaxon:8930]", - "Felis catus [NCBITaxon:9685]", - "Gallus gallus [NCBITaxon:9031]", - "Homo sapiens [NCBITaxon:9606]", - "Manis [NCBITaxon:9973]", - "Manis javanica [NCBITaxon:9974]", - "Neovison vison [NCBITaxon:452646]", - "Panthera leo [NCBITaxon:9689]", - "Panthera tigris [NCBITaxon:9694]", - "Rhinolophidae [NCBITaxon:58055]", - "Rhinolophus affinis [NCBITaxon:59477]", - "Sus scrofa domesticus [NCBITaxon:9825]", - "Viverridae [NCBITaxon:9673]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001387", - "type": "string", - "description": "The taxonomic, or scientific name of the host.", - "examples": ["Homo sapiens [NCBITaxon:9606]"] - }, - "host_disease": { - "Enums": [ - "COVID-19 [MONDO:0100096]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001391", - "type": "string", - "description": "The name of the disease experienced by the host.", - "examples": ["COVID-19 [MONDO:0100096]"] - }, - "sequencing_instrument_model": { - "Enums": [ - "Illumina sequencing instrument [GENEPIO:0100105]", - "Illumina Genome Analyzer [GENEPIO:0100106]", - "Illumina Genome Analyzer II [GENEPIO:0100107]", - "Illumina Genome Analyzer IIx [GENEPIO:0100108]", - "Illumina HiScanSQ [GENEPIO:0100109]", - "Illumina HiSeq [GENEPIO:0100110]", - "Illumina HiSeq X [GENEPIO:0100111]", - "Illumina HiSeq X Five [GENEPIO:0100112]", - "Illumina HiSeq X Ten [GENEPIO:0100113]", - "Illumina HiSeq 1000 [GENEPIO:0100114]", - "Illumina HiSeq 1500 [GENEPIO:0100115]", - "Illumina HiSeq 2000 [GENEPIO:0100116]", - "Illumina HiSeq 2500 [GENEPIO:0100117]", - "Illumina HiSeq 3000 [GENEPIO:0100118]", - "Illumina HiSeq 4000 [GENEPIO:0100119]", - "Illumina iSeq [GENEPIO:0100120]", - "Illumina iSeq 100 [GENEPIO:0100121]", - "Illumina NovaSeq [GENEPIO:0100122]", - "Illumina NovaSeq 6000 [GENEPIO:0100123]", - "Illumina MiniSeq [GENEPIO:0100124]", - "Illumina MiSeq [GENEPIO:0100125]", - "Illumina NextSeq [GENEPIO:0100126]", - "Illumina NextSeq 500 [GENEPIO:0100127]", - "Illumina NextSeq 550 [GENEPIO:0100128]", - "Illumina NextSeq 2000 [GENEPIO:0100129]", - "Pacific Biosciences sequencing instrument [GENEPIO:0100130]", - "PacBio RS [GENEPIO:0100131]", - "PacBio RS II [GENEPIO:0100132]", - "PacBio Sequel [GENEPIO:0100133]", - "PacBio Sequel II [GENEPIO:0100134]", - "Ion Torrent sequencing instrument [GENEPIO:0100135]", - "Ion Torrent PGM [GENEPIO:0100136]", - "Ion Torrent Proton [GENEPIO:0100137]", - "Ion Torrent S5 XL [GENEPIO:0100138]", - "Ion Torrent S5 [GENEPIO:0100139]", - "Oxford Nanopore sequencing instrument [GENEPIO:0100140]", - "Oxford Nanopore GridION [GENEPIO:0100141]", - "Oxford Nanopore MinION [GENEPIO:0100142]", - "Oxford Nanopore PromethION [GENEPIO:0100143]", - "BGI Genomics sequencing instrument [GENEPIO:0100144]", - "BGI SEQ-500 [GENEPIO:0100145]", - "MGI sequencing instrument [GENEPIO:0100146]", - "MGI DNBSEQ-T7 [GENEPIO:0100147]", - "MGI DNBSEQ-G400 [GENEPIO:0100148]", - "MGI DNBSEQ-G400RS FAST [GENEPIO:0100149]", - "MGI DNBSEQ-G50 [GENEPIO:0100150]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001452", - "type": "string", - "description": "The model of the sequencing instrument used.", - "examples": ["Oxford Nanopore MinION [GENEPIO:0100142]"] - }, - "sequencing_instrument_platform": { - "examples": ["MinIon"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The model of the sequencing instrument used.", - }, - "consensus_sequence_software_name": { - "examples": ["Ivar"], - "ontology": "GENEPIO:0001463", - "type": "string", - "description": "The name of software used to generate the consensus sequence.", - }, - "consensus_sequence_software_version": { - "examples": ["1.3"], - "ontology": "GENEPIO:0001469", - "type": "string", - "description": "The version of the software used to generate the consensus sequence.", - }, - "submitting_lab_sequence_id": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Sample ID given by the submitting laboratory", - }, - "bioproject_umbrella_accession_ENA": { - "examples": ["PRJNA623807"], - "ontology": "GENEPIO:0001133", - "type": "string", - "description": "The INSDC umbrella accession number of the BioProject to which the BioSample belongs.", - "clasification":"Database Identifiers", - }, - "bioproject_accession_ENA": { - "examples": ["PRJNA12345"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The INSDC accession number of the BioProject(s) to which the BioSample belongs.", - "clasification":"Database Identifiers", - }, - "biosample_accession_ENA": { - "examples": ["SAMN14180202"], - "ontology": "GENEPIO:0001139", - "type": "string", - "description": "The identifier assigned to a BioSample in INSDC archives.", - "clasification":"Database Identifiers", - }, - "sra_accession": { - "examples": ["SRR11177792"], - "ontology": "GENEPIO:0001142", - "type": "string", - "description": "The Sequence Read Archive (SRA), European Nucleotide Archive (ENA) or DDBJ Sequence Read Archive (DRA) identifier linking raw read data, methodological metadata and quality control metrics submitted to the INSDC.", - "clasification":"Database Identifiers", - }, - "genBank/ENA/DDBJ_accession": { - "examples": ["MN908947.3"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The GenBank/ENA/DDBJ identifier assigned to the sequence in the INSDC archives.", - "clasification":"Database Identifiers", - }, - "gisaid_accession": { - "examples": ["EPI_ISL_123456"], - "ontology": "GENEPIO:0001147", - "type": "string", - "description": "The GISAID accession number assigned to the sequence.", - "clasification":"Database Identifiers", - }, - "virus_name": { - "examples": ["hCoV-19/Canada/prov_rona_99/2020"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The user-defined GISAID virus name assigned to the sequence.", - "clasification":"Database Identifiers", - }, - "collecting_institution_email": { - "examples": ["johnnyblogs@lab.ca"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The email address of the contact responsible for follow-up regarding the sample.", - "clasification":"Sample collection and processing", - }, - "collecting_institution_address": { - "examples": ["655 Lab St, Vancouver, British Columbia, V5N 2A2, Canada"], - "ontology": "GENEPIO:0001158", - "type": "string", - "description": "The mailing address of the agency submitting the sample.", - "clasification":"Sample collection and processing", - }, - "submitting_institution_email": { - "examples": ["RespLab@lab.ca"], - "ontology": "GENEPIO:0001165", - "type": "string", - "description": "The email address of the contact responsible for follow-up regarding the sequence.", - "clasification":"Sample collection and processing", - }, - "submitting_institution_address": { - "examples": ["123 Sunnybrooke St, Toronto, Ontario, M4P 1L6, Canada"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The mailing address of the agency submitting the sequence.", - "clasification":"Sample collection and processing", - }, - "sample_received_date": { - "examples": ["3/21/2020"], - "ontology": "GENEPIO:0001179", - "type": "string", - "description": "The date on which the sample was received.", - "format":"date", - "clasification":"Sample collection and processing", - }, - "shipping_date": { - "examples": ["3/20/2020"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The date on which the sample was sent.", - "format":"date", - "clasification":"Sample collection and processing", - }, - "results_emission_date": { - "examples": ["3/23/2020"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The date on which the results were emitted.", - "format":"date", - "clasification":"Sample collection and processing", - }, - "geo_loc_region": { - "examples": ["Derbyshire"], - "ontology": "GENEPIO:0100280", - "type": "string", - "description": "The county/region of origin of the sample.", - "clasification":"Sample collection and processing", - }, - "geo_loc_city": { - "examples": ["Vancouver"], - "ontology": "GENEPIO:0001189", - "type": "string", - "description": "The city of origin of the sample.", - "clasification":"Sample collection and processing", - }, - "geo_loc_latitude": { - "examples": ["38.98 N"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The latitude coordinates of the geographical location of sample collection.", - "clasification":"Sample collection and processing", - }, - "geo_loc_longitude": { - "examples": ["77.11 W"], - "ontology": "OBI:0001621", - "type": "string", - "description": "The longitude coordinates of the geographical location of sample collection.", - "clasification":"Sample collection and processing", - }, - "anatomical_material": { - "Enums": [ - "Blood [UBERON:0000178]", - "Fluid [UBERON:0006314]", - "Fluid (Cerebrospinal (CSF)) [UBERON:0001359]", - "Fluid (Pericardial) [UBERON:0002409]", - "Fluid (Pleural) [UBERON:0001087]", - "Fluid (Vaginal) [UBERON:0036243]", - "Fluid (Amniotic) [UBERON:0000173]", - "Saliva [UBERON:0001836]", - "Tissue [UBERON:0000479]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001211", - "type": "string", - "description": "A substance obtained from an anatomical part of an organism e.g. tissue, blood.", - "examples": ["Blood [UBERON:0000178]"] - }, - "anatomical_part": { - "Enums": [ - "Anus [UBERON:0001245]", - "Duodenum [UBERON:0002114]", - "Eye [UBERON:0000970]", - "Intestine [UBERON:0000160]", - "Lower respiratory tract [UBERON:0001558]", - "Bronchus [UBERON:0002185]", - "Lung [UBERON:0002048]", - "Bronchiole [UBERON:0002186]", - "Alveolar sac [UBERON:0002169]", - "Pleural sac [UBERON:0009778]", - "Pleural cavity [UBERON:0002402]", - "Trachea [UBERON:0003126]", - "Rectum [UBERON:0001052]", - "Skin [UBERON:0001003]", - "Stomach [UBERON:0000945]", - "Upper respiratory tract [UBERON:0001557]", - "Anterior Nares [UBERON:2001427]", - "Esophagus [UBERON:0001043]", - "Ethmoid sinus [UBERON:0002453]", - "Nasal Cavity [UBERON:0001707]", - "Middle Nasal Turbinate [UBERON:0005921]", - "Inferior Nasal Turbinate [UBERON:0005922]", - "Nasopharynx (NP) [UBERON:0001728]", - "Oropharynx (OP) [UBERON:0001729]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001214", - "type": "string", - "description": "An anatomical part of an organism e.g. oropharynx. ", - "examples": ["Nasopharynx (NP) [UBERON:0001728]"] - }, - "body_product": { - "Enums": [ - "Breast Milk [UBERON:0001913]", - "Feces [UBERON:0001988]", - "Mucus [UBERON:0000912]", - "Semen [UBERON:0006530]", - "Sputum [UBERON:0007311]", - "Sweat [UBERON:0001089]", - "Tear [UBERON:0001827]", - "Urine [UBERON:0001088]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001216", - "type": "string", - "description": "A substance excreted/secreted from an organism e.g. feces, urine, sweat.", - "examples": ["Feces [UBERON:0001988]"] - }, - "environmental_material": { - "Enums": [ - "Air vent [ENVO:03501208]", - "Banknote [ENVO:00003896]", - "Bed rail [ENVO:03501209]", - "Building Floor [ENVO:01000486]", - "Cloth [ENVO:02000058]", - "Control Panel [ENVO:03501210]", - "Door [ENVO:03501220]", - "Door Handle [ENVO:03501211]", - "Face Mask [OBI:0002787]", - "Face Shield [OBI:0002791]", - "Food [FOODON:00002403]", - "Food Packaging [FOODON:03490100]", - "Glass [ENVO:01000481]", - "Handrail [ENVO:03501212]", - "Hospital Gown [OBI:0002796]", - "Light Switch [ENVO:03501213]", - "Locker [ENVO:03501214]", - "N95 Mask [OBI:0002790]", - "Nurse Call Button [ENVO:03501215]", - "Paper [ENVO:03501256]", - "Particulate Matter [ENVO:01000060]", - "Plastic [ENVO:01000404]", - "PPE Gown [GENEPIO:0100025]", - "Sewage [ENVO:00002018]", - "Sink [ENVO:01000990]", - "Soil [ENVO:00001998]", - "Stainless Steel [ENVO:03501216]", - "Tissue Paper [ENVO:03501217]", - "Toilet Bowl [ENVO:03501218]", - "Water [ENVO:00002006]", - "Wastewater [ENVO:00002001]", - "Window [ENVO:03501219]", - "Wood [ENVO:00002040]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001223", - "type": "string", - "description": "A substance obtained from the natural or man-made environment e.g. soil, water, sewage, door handle, bed handrail, face mask.", - "examples": ["Face Mask [OBI:0002787]"] - }, - "environmental_site": { - "Enums": [ - "Acute care facility [ENVO:03501135]", - "Animal house [ENVO:00003040]", - "Bathroom [ENVO:01000422]", - "Clinical assessment centre [ENVO:03501136]", - "Conference venue [ENVO:03501127]", - "Corridor [ENVO:03501121]", - "Daycare [ENVO:01000927]", - "Emergency room (ER) [ENVO:03501145]", - "Family practice clinic [ENVO:03501186]", - "Group home [ENVO:03501196]", - "Homeless shelter [ENVO:03501133]", - "Hospital [ENVO:00002173]", - "Intensive Care Unit (ICU) [ENVO:03501152]", - "Long Term Care Facility [ENVO:03501194]", - "Patient room [ENVO:03501180]", - "Prison [ENVO:03501204]", - "Production Facility [ENVO:01000536]", - "School [ENVO:03501130]", - "Sewage Plant [ENVO:00003043]", - "Subway train [ENVO:03501109]", - "Wet market [ENVO:03501198]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001232", - "type": "string", - "description": "An environmental location may describe a site in the natural or built environment e.g. hospital, wet market, bat cave.", - "examples": ["Hospital [ENVO:00002173]"] - }, - "collection_device": { - "Enums": [ - "Air filter [ENVO:00003968]", - "Blood Collection Tube [OBI:0002859]", - "Bronchoscope [OBI:0002826]", - "Collection Container [OBI:0002088]", - "Collection Cup [GENEPIO:0100026]", - "Fibrobronchoscope Brush [OBI:0002825]", - "Filter [GENEPIO:0100103]", - "Fine Needle [OBI:0002827]", - "Microcapillary tube [OBI:0002858]", - "Micropipette [OBI:0001128]", - "Needle [OBI:0000436]", - "Serum Collection Tube [OBI:0002860]", - "Sputum Collection Tube [OBI:0002861]", - "Suction Catheter [OBI:0002831]", - "Swab [GENEPIO:0100027]", - "Urine Collection Tube [OBI:0002862]", - "Virus Transport Medium [OBI:0002866]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001234", - "type": "string", - "description": "The instrument or container used to collect the sample e.g. swab.", - "examples": ["Swab [GENEPIO:0100027]"] - }, - "collection_method": { - "Enums": [ - "Amniocentesis [NCIT:C52009]", - "Aspiration [NCIT:C15631]", - "Suprapubic Aspiration [GENEPIO:0100028]", - "Tracheal Aspiration [GENEPIO:0100029]", - "Vacuum Aspiration [GENEPIO:0100030]", - "Biopsy [OBI:0002650]", - "Needle Biopsy [OBI:0002651]", - "Filtration [OBI:0302885]", - "Air Filtration [GENEPIO:0100031]", - "Lavage [OBI:0600044]", - "Bronchoalveolar Lavage (BAL) [GENEPIO:0100032]", - "Gastric Lavage [GENEPIO:0100033]", - "Lumbar Puncture [NCIT:C15327]", - "Necropsy [MMO:0000344]", - "Phlebotomy [NCIT:C28221]", - "Rinsing [GENEPIO:0002116]", - "Saline gargle (mouth rinse and gargle) [GENEPIO:0100034]", - "Scraping [GENEPIO:0100035]", - "Swabbing [GENEPIO:0002117]", - "Finger Prick [GENEPIO:0100036]", - "Washout Tear Collection [GENEPIO:0100038]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001241", - "type": "string", - "description": "The process used to collect the sample e.g. phlebotomy, necropsy.", - "examples": ["Bronchoalveolar Lavage (BAL) [GENEPIO:0100032]"] - }, - "collection_protocol": { - "examples": ["SC2SamplingProtocol 1.2"], - "ontology": "GENEPIO:0001243", - "type": "string", - "description": "The name and version of a particular protocol used for sampling.", - "clasification":"Sample collection and processing", - }, - "specimen_processing": { - "Enums": [ - "Virus Passage [GENEPIO:0100039]", - "RNA Re-Extraction (Post RT-PCR) [GENEPIO:0100040]", - "Specimens Pooled [OBI:0600016]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001253", - "type": "string", - "description": "Any processing applied to the sample during or after receiving the sample. ", - "examples": ["Virus Passage [GENEPIO:0100039]"] - }, - "lab_host": { - "Enums": [ - "293/ACE2 Cell Line [GENEPIO:0100041]", - "Caco2 Cell Line [BTO:0000195]", - "Calu3 Cell Line [BTO:0002750]", - "EFK3B Cell Line [GENEPIO:0100042]", - "HEK293T Cell Line [BTO:0002181]", - "HRCE Cell Line [GENEPIO:0100043]", - "Huh7 Cell Line [BTO:0001950]", - "LLCMk2 Cell Line [CLO:0007330]", - "MDBK Cell Line [BTO:0000836]", - "NHBE Cell Line [BTO:0002924]", - "PK-15 Cell Line [BTO:0001865]", - "RK-13 Cell Line [BTO:0002909]", - "U251 Cell Line [BTO:0002035]", - "Vero Cell Line [BTO:0001444]", - "Vero E6 Cell Line [BTO:0004755]", - "Vero E6/TMPRSS2 Cell Line [GENEPIO:0100044]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001255", - "type": "string", - "description": "Name and description of the laboratory host used to propagate the source organism or material from which the sample was obtained.", - "examples": ["Vero E6 Cell Line [BTO:0004755]"] - }, - "passage_number": { - "examples": ["3"], - "ontology": "GENEPIO:0001261", - "type": "string", - "description": "Number of passages.", - "clasification":"Sample collection and processing", - }, - "passage_method": { - "examples": ["AVL buffer+30%EtOH lysate received from Respiratory Lab. P3 passage in Vero-1 via bioreactor large-scale batch passage. P3 batch derived from the SP-2/reference lab strain."], - "ontology": "GENEPIO:0001264", - "type": "string", - "description": "Description of how organism was passaged.", - "clasification":"Sample collection and processing", - }, - "biomaterial_extracted": { - "Enums": [ - "mRNA (cDNA) [OBI:0002754]", - "RNA (Total) [OBI:0000895]", - "RNA (Poly-A) [OBI:0000869]", - "RNA (Ribo-Depleted) [OBI:0002627]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001266", - "type": "string", - "description": "The biomaterial extracted from samples for the purpose of sequencing.", - "examples": ["RNA (Total) [OBI:0000895]"] - }, - "tax_id": { - "examples": ["probably 2697049 in all cases"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The NCBITaxon identifier for the organism being sequenced.", - "clasification":"Sample collection and processing", - }, - "scientific_name": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The taxonomic name of the organism.", - "clasification":"Sample collection and processing", - }, - "common_name": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The common name of the organism.", - "clasification":"Sample collection and processing", - }, - "center_name": { - "examples": [" KAROLINSKA INSITUTET"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The name of the institution", - "clasification":"Sample collection and processing", - }, - - "virus_id": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The user-defined name for the sample.", - "clasification":"Database Identifiers", - }, - "host_common_name": { - "Enums": [ - "Human [NCBITaxon:9606]", - "Bat [NCBITaxon:9397]", - "Cat [NCBITaxon:9685]", - "Chicken [NCBITaxon:9031]", - "Civet [NCBITaxon:9673]", - "Cow [NCBITaxon:9913]", - "Dog [NCBITaxon:9615]", - "Lion [NCBITaxon:9689]", - "Mink [NCBITaxon:452646]", - "Pangolin [NCBITaxon:9973]", - "Pig [NCBITaxon:9825]", - "Pigeon [NCBITaxon:8930]", - "Tiger [NCBITaxon:9694]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001386", - "type": "string", - "description": "The commonly used name of the host.", - "examples": ["Human [NCBITaxon:9606]"] - }, - "outbreak": { - "examples": ["Date, Location e.g. type of gathering, Family cluster, etc."], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "If the information is unknown or can not be shared, leave blank.", - "clasification":"Host information", - }, - "additional_host_information": { - "examples": ["e.g. Patient infected while traveling in …."], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "If the information is unknown or can not be shared, leave blank.", - "clasification":"Host information", - }, - "purpose_of_sequencing": { - "Enums": [ - "Baseline surveillance (random sampling) [GENEPIO:0100005]", - "Targeted surveillance (non-random sampling) [GENEPIO:0100006]", - "Priority surveillance projects [GENEPIO:0100007]", - "Screening for Variants of Concern (VOC) [GENEPIO:0100008]", - "Sample has epidemiological link to Variant of Concern (VoC) [GENEPIO:0100273]", - "Sample has epidemiological link to Omicron Variant [GENEPIO:0100274]", - "Longitudinal surveillance (repeat sampling of individuals) [GENEPIO:0100009]", - "Re-infection surveillance [GENEPIO:0100010]", - "Vaccine escape surveillance [GENEPIO:0100011]", - "Travel-associated surveillance [GENEPIO:0100012]", - "Domestic travel surveillance [GENEPIO:0100013]", - "Interstate/ interprovincial travel surveillance [GENEPIO:0100275]", - "Intra-state/ intra-provincial travel surveillance [GENEPIO:0100276]", - "International travel surveillance [GENEPIO:0100014]", - "Surveillance of international border crossing by air travel or ground transport [GENEPIO:0100015]", - "Surveillance of international border crossing by air travel [GENEPIO:0100016]", - "Surveillance of international border crossing by ground transport [GENEPIO:0100017]", - "Surveillance from international worker testing [GENEPIO:0100018]", - "Cluster/Outbreak investigation [GENEPIO:0100019]", - "Multi-jurisdictional outbreak investigation [GENEPIO:0100020]", - "Intra-jurisdictional outbreak investigation [GENEPIO:0100021]", - "Research [GENEPIO:0100022]", - "Viral passage experiment [GENEPIO:0100023]", - "Protocol testing [GENEPIO:0100024]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001445", - "type": "string", - "description": "The reason that the sample was sequenced.", - "examples": ["Baseline surveillance (random sampling) [GENEPIO:0100005]"] - }, - "purpose_of_sequencing_details": { - "Enums": [ - "Screened for S gene target failure (S dropout)", - "Screened for mink variants", - "Screened for B.1.1.7 variant", - "Screened for B.1.135 variant", - "Screened for P.1 variant", - "Screened due to travel history", - "Screened due to close contact with infected individual", - "Assessing public health control measures", - "Determining early introductions and spread", - "Investigating airline-related exposures", - "Investigating temporary foreign worker", - "Investigating remote regions", - "Investigating health care workers", - "Investigating schools/universities", - "Investigating reinfection" - ], - "ontology": "GENEPIO:0001446", - "type": "string", - "description": "The description of why the sample was sequenced providing specific details.", - "examples": ["Screened for S gene target failure (S dropout)"] - }, - "sequencing_date": { - "examples": ["4/26/2021"], - "ontology": "GENEPIO:0001447", - "type": "string", - "description": "The date the sample was sequenced.", - "format":"date", - "clasification":"Sequencing", - }, - "rna_extraction_Protocol": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "library_kit": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "library_id": { - "examples": ["XYZ_123345"], - "ontology": "GENEPIO:0001448", - "type": "string", - "description": "The user-specified identifier for the library prepared for sequencing.", - "clasification":"Sequencing", - }, - "enrichment_protocol": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "if_enrichment_protocol_is_other_specify": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "amplicon protocol": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "if_amplicon_protocol_if_other_especify": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - - "amplicon_version": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "amplicon_size": { - "examples": ["300bp"], - "ontology": "GENEPIO:0001449", - "type": "string", - "description": "The length of the amplicon generated by PCR amplification.", - "clasification":"Sequencing", - }, - - "was_phix_used?": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - - "number_of_samples_in_run": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - - "flowcell_kit": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - - "runID": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - - "sequencing_platforms": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - - "library_preparation_kit": { - "examples": ["Nextera XT"], - "ontology": "GENEPIO:0001450", - "type": "string", - "description": "The name of the DNA library preparation kit used to generate the library being sequenced.", - "clasification":"Sequencing", - }, - - "flow_cell_barcode": { - "examples": ["FAB06069"], - "ontology": "GENEPIO:0001451", - "type": "string", - "description": "The barcode of the flow cell used for sequencing.", - "clasification":"Sequencing", - }, - "sequencing_protocol_name": { - "examples": ["1D_DNA_MinION, ARTIC Network Protocol V3"], - "ontology": "GENEPIO:0001453", - "type": "string", - "description": "The name and version number of the sequencing protocol used.", - "clasification":"Sequencing", - }, - "sequencing_protocol": { - "examples": ["Genomes were generated through amplicon sequencing of 1200 bp amplicons with Freed schema primers. Libraries were created using Illumina DNA Prep kits, and sequence data was produced using Miseq Micro v2 (500 cycles) sequencing kits."], - "ontology": "GENEPIO:0001454", - "type": "string", - "description": "The protocol used to generate the sequence.", - "clasification":"Sequencing", - }, - "sequencing_kit_number": { - "examples": ["AB456XYZ789"], - "ontology": "GENEPIO:0001455", - "type": "string", - "description": "The manufacturer's kit number.", - "clasification":"Sequencing", - }, - "amplicon_pcr_primer_scheme": { - "examples": ["https://github.com/joshquick/artic-ncov2019/blob/master/primer_schemes/nCoV-2019/V3/nCoV-2019.tsv"], - "ontology": "GENEPIO:0001456", - "type": "string", - "description": "The specifications of the primers (primer sequences, binding positions, fragment size generated etc) used to generate the amplicons to be sequenced.", - "clasification":"Sequencing", - }, - "library_source": { - "examples": ["METAGENOMIC"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Molecule type used to make the library.", - "clasification":"Sequencing", - }, - "library_selection": { - "examples": ["RANDOM PCR"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Library capture method.", - "clasification":"Sequencing", - }, - "library_strategy": { - "examples": ["WGS"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Overall sequencing strategy or approach.", - "clasification":"Sequencing", - }, - "library_layout": { - "examples": ["PAIRED"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Single or paired.", - "clasification":"Sequencing", - }, - "library_name": { - "examples": ["P17157_1007"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "Name of the used library", - "clasification":"Sequencing", - }, - "nominal_length ": { - "examples": ["350"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - - "raw_sequence_data_processing_method": { - "examples": ["Porechop 0.2.3"], - "ontology": "GENEPIO:0001458", - "type": "string", - "description": "The method used for raw data processing such as removing barcodes, adapter trimming, filtering etc.", - "clasification":"Bioinformatics and QC metrics", - }, - "dehosting_method": { - "examples": ["Nanostripper"], - "ontology": "GENEPIO:0001459", - "type": "string", - "description": "The method used to remove host reads from the pathogen sequence.", - "clasification":"Bioinformatics and QC metrics", - }, - "assembly": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "if_assembly_other": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "assembly_params": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "variant_Calling": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "if_variant_Calling_other": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "variant_Calling_params": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "consensus_sequence_filepath": { - "examples": ["/User/Documents/RespLab/Data/ncov123assembly.fasta"], - "ontology": "GENEPIO:0001462", - "type": "string", - "description": "The filepath of the consesnsus sequence file.", - "clasification":"Bioinformatics and QC metrics", - }, - "consensus_sequence_software_name": { - "examples": ["Ivar"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The name of software used to generate the consensus sequence.", - "clasification":"Bioinformatics and QC metrics", - }, - "if_consensus_other": { - "examples": ["1.3"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The version of the software used to generate the consensus sequence.", - "clasification":"Bioinformatics and QC metrics", - }, - "consensus_sequence_software_version": { - "examples": ["1.3"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The version of the software used to generate the consensus sequence.", - "clasification":"Bioinformatics and QC metrics", - }, - "consensus_criteria": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "quality_control_metrics": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "breadth_of_coverage_value": { - "examples": ["95%"], - "ontology": "GENEPIO:0001472", - "type": "string", - "description": "The percentage of the reference genome covered by the sequenced data, to a prescribed depth.", - "clasification":"Bioinformatics and QC metrics", - }, - "depth_of_coverage_value": { - "examples": ["400x"], - "ontology": "GENEPIO:0001474", - "type": "string", - "description": "The average number of reads representing a given nucleotide in the reconstructed sequence.", - "clasification":"Bioinformatics and QC metrics", - }, - "depth_of_coverage_threshold": { - "examples": ["100x"], - "ontology": "GENEPIO:0001475", - "type": "string", - "description": "The threshold used as a cut-off for the depth of coverage.", - "clasification":"Bioinformatics and QC metrics", - }, - "sequence_file_R1_fastq": { - "examples": ["ABC123_S1_L001_R1_001.fastq.gz"], - "ontology": "GENEPIO:0001476", - "type": "string", - "description": "The user-specified filename of the r1 FASTQ file.", - "clasification":"Bioinformatics and QC metrics", - }, - "sequence_file_R2_fastq": { - "examples": ["ABC123_S1_L001_R2_001.fastq.gz"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The user-specified filename of the r2 FASTQ file.", - "clasification":"Bioinformatics and QC metrics", - }, - "r1_fastq_filepath": { - "examples": ["/User/Documents/RespLab/Data/"], - "ontology": "GENEPIO:0001478", - "type": "string", - "description": "The filepath of the r1 FASTQ file.", - "clasification":"Bioinformatics and QC metrics", - }, - "r2_fastq_filepath": { - "examples": ["/User/Documents/RespLab/Data/"], - "ontology": "GENEPIO:0001479", - "type": "string", - "description": "The filepath of the r2 FASTQ file.", - "clasification":"Bioinformatics and QC metrics", - }, - "fast5_filename": { - "examples": ["batch1a_sequences.fast5"], - "ontology": "GENEPIO:0001480", - "type": "string", - "description": "The user-specified filename of the FAST5 file.", - "clasification":"Bioinformatics and QC metrics", - }, - "fast5_filepath": { - "examples": ["/User/Documents/RespLab/Data/"], - "ontology": "GENEPIO:0001481", - "type": "string", - "description": "The filepath of the FAST5 file.", - "clasification":"Bioinformatics and QC metrics", - }, - "number_of_base_pairs_sequenced": { - "examples": ["387566"], - "ontology": "GENEPIO:0001482", - "type": "string", - "description": "The number of total base pairs generated by the sequencing process.", - "clasification":"Bioinformatics and QC metrics", - }, - "consensus_genome_length": { - "examples": ["38677"], - "ontology": "GENEPIO:0001483", - "type": "string", - "description": "Size of the assembled genome described as the number of base pairs.", - "clasification":"Bioinformatics and QC metrics", - }, - "ns_per_100_kbp": { - "examples": ["300"], - "ontology": "GENEPIO:0001484", - "type": "string", - "description": "The number of N symbols present in the consensus fasta sequence, per 100kbp of sequence.", - "clasification":"Bioinformatics and QC metrics", - }, - "reference_genome_accession": { - "examples": ["NC_045512.2"], - "ontology": "GENEPIO:0001485", - "type": "string", - "description": "A persistent, unique identifier of a genome database entry.", - "clasification":"Bioinformatics and QC metrics", - }, - "bioinformatics_protocol": { - "examples": ["https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members"], - "ontology": "GENEPIO:0001489", - "type": "string", - "description": "The name of the bioinformatics protocol used.", - "clasification":"Bioinformatics and QC metrics", - }, - "if_bioinformatic_protocol_is_other_specify": { - "examples": ["https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The name of the bioinformatics protocol used.", - "clasification":"Bioinformatics and QC metrics", - }, - "bioinformatic_protocol_version": { - "examples": ["https://www.protocols.io/groups/cphln-sarscov2-sequencing-consortium/members"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The version number of the bioinformatics protocol used.", - "clasification":"Bioinformatics and QC metrics", - }, - "commercial/open-source/both": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "preprocessing": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "if_preprocessing_other": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "preprocessing_params": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Bioinformatics and QC metrics", - }, - "mapping": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Lineage and Variant information", - }, - "if_mapping_other": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Lineage and Variant information", - }, - "Mapping_params": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Lineage and Variant information", - }, - "lineage/clade_name": { - "examples": ["B.1.1.7"], - "ontology": "GENEPIO:0001500", - "type": "string", - "description": "The name of the lineage or clade.", - "clasification":"Lineage and Variant information", - }, - "lineage/clade_analysis_software_name": { - "examples": ["Pangolin"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The name of the software used to determine the lineage/clade.", - "clasification":"Lineage and Variant information", - }, - "if_lineage_identification_other": { - "examples": ["Other than Pangolin"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "The name of the software used to determine the lineage/clade.", - "clasification":"Lineage and Variant information", - }, - "lineage/clade_analysis_software_version": { - "examples": ["2.1.10"], - "ontology":"GENEPIO:0001502", - "type": "string", - "description": "The version of the software used to determine the lineage/clade.", - "clasification":"Lineage and Variant information", - }, - "variant_designation": { - "Enums": [ - "Variant of Interest (VOI) [GENEPIO:0100082]", - "Variant of Concern (VOC) [GENEPIO:0100083]", - "Variant Under Monitoring (VUM) [GENEPIO:0100279]" - ], - "ontology": "GENEPIO:0001503", - "type": "string", - "description": "The variant classification of the lineage/clade i.e. variant, variant of concern.", - "examples": ["Variant of Concern (VOC) [GENEPIO:0100083]"] - }, - "variant_evidence": { - "examples": ["lineage-defining mutations: ORF1ab (K1655N), Spike (K417N, E484K, N501Y, D614G, A701V), N (T205I), E (P71L)"], - "ontology": "GENEPIO:0001504", - "type": "string", - "description": "The evidence used to make the variant determination.", - "clasification":"Lineage and Variant information", - }, - "gene_name_1": { - "Enums": [ - "E gene (orf4) [GENEPIO:0100151]", - "M gene (orf5) [GENEPIO:0100152]", - "N gene (orf9) [GENEPIO:0100153]", - "Spike gene (orf2) [GENEPIO:0100154]", - "orf1ab (rep) [GENEPIO:0100155]", - "orf1a (pp1a) [GENEPIO:0100156]", - "nsp11 [GENEPIO:0100157]", - "nsp1 [GENEPIO:0100158]", - "nsp2 [GENEPIO:0100159]", - "nsp3 [GENEPIO:0100160]", - "nsp4 [GENEPIO:0100161]", - "nsp5 [GENEPIO:0100162]", - "nsp6 [GENEPIO:0100163]", - "nsp7 [GENEPIO:0100164]", - "nsp8 [GENEPIO:0100165]", - "nsp9 [GENEPIO:0100166]", - "nsp10 [GENEPIO:0100167]", - "RdRp gene (nsp12) [GENEPIO:0100168]", - "hel gene (nsp13) [GENEPIO:0100169]", - "exoN gene (nsp14) [GENEPIO:0100170]", - "nsp15 [GENEPIO:0100171]", - "nsp16 [GENEPIO:0100172]", - "orf3a [GENEPIO:0100173]", - "orf3b [GENEPIO:0100174]", - "orf6 (ns6) [GENEPIO:0100175]", - "orf7a [GENEPIO:0100176]", - "orf7b (ns7b) [GENEPIO:0100177]", - "orf8 (ns8) [GENEPIO:0100178]", - "orf9b [GENEPIO:0100179]", - "orf9c [GENEPIO:0100180]", - "orf10 [GENEPIO:0100181]", - "orf14 [GENEPIO:0100182]", - "SARS-COV-2 5' UTR [GENEPIO:0100183]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001507", - "type": "string", - "description": "The name of the gene used in the diagnostic RT-PCR test.", - "examples": ["E gene (orf4) [GENEPIO:0100151]"] - }, - "Protocol_SARS-CoV-2_detection": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "%qc_filtered": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "%reads_host": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "%reads_virus": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "%unmapped": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "% genome _greater_10x": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "mean_depth_of_coverage_value": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "%Ns": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "Number_of_variants_(AF_greater_75%)": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "Numer_of_variants_with_effect": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "reference_genome_accession": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Pathogen diagnostic testing", - }, - "diagnostic_pcr_protocol_1": { - "examples": ["PCREGene 2.0"], - "ontology": "GENEPIO:0001508", - "type": "string", - "description": "The name and version number of the protocol used for diagnostic marker amplification.", - "clasification":"Pathogen diagnostic testing", - }, - "diagnostic_pcr_Ct_value_1": { - "examples": ["21"], - "ontology": "GENEPIO:0001509", - "type": "string", - "description": "The Ct value result from a diagnostic SARS-CoV-2 RT-PCR test.", - "clasification":"Pathogen diagnostic testing", - }, - "gene_name_2": { - "Enums": [ - "E gene (orf4) [GENEPIO:0100151]", - "M gene (orf5) [GENEPIO:0100152]", - "N gene (orf9) [GENEPIO:0100153]", - "Spike gene (orf2) [GENEPIO:0100154]", - "orf1ab (rep) [GENEPIO:0100155]", - "orf1a (pp1a) [GENEPIO:0100156]", - "nsp11 [GENEPIO:0100157]", - "nsp1 [GENEPIO:0100158]", - "nsp2 [GENEPIO:0100159]", - "nsp3 [GENEPIO:0100160]", - "nsp4 [GENEPIO:0100161]", - "nsp5 [GENEPIO:0100162]", - "nsp6 [GENEPIO:0100163]", - "nsp7 [GENEPIO:0100164]", - "nsp8 [GENEPIO:0100165]", - "nsp9 [GENEPIO:0100166]", - "nsp10 [GENEPIO:0100167]", - "RdRp gene (nsp12) [GENEPIO:0100168]", - "hel gene (nsp13) [GENEPIO:0100169]", - "exoN gene (nsp14) [GENEPIO:0100170]", - "nsp15 [GENEPIO:0100171]", - "nsp16 [GENEPIO:0100172]", - "orf3a [GENEPIO:0100173]", - "orf3b [GENEPIO:0100174]", - "orf6 (ns6) [GENEPIO:0100175]", - "orf7a [GENEPIO:0100176]", - "orf7b (ns7b) [GENEPIO:0100177]", - "orf8 (ns8) [GENEPIO:0100178]", - "orf9b [GENEPIO:0100179]", - "orf9c [GENEPIO:0100180]", - "orf10 [GENEPIO:0100181]", - "orf14 [GENEPIO:0100182]", - "SARS-COV-2 5' UTR [GENEPIO:0100183]", - "Not Applicable [GENEPIO:0001619]", - "Not Collected [GENEPIO:0001620]", - "Not Provided [GENEPIO:0001668]", - "Missing [GENEPIO:0001618]", - "Restricted Access [GENEPIO:0001810]" - ], - "ontology": "GENEPIO:0001510", - "type": "string", - "description": "The name of the gene used in the diagnostic RT-PCR test.", - "examples": ["RdRp gene (nsp12) [GENEPIO:0100168]"] - }, - "diagnostic_pcr_protocol_2": { - "examples": ["PCRRdRpGene 3.0"], - "ontology": "GENEPIO:0001511", - "type": "string", - "description": "The name and version number of the protocol used for diagnostic marker amplification.", - "clasification":"Pathogen diagnostic testing", - }, - "diagnostic_pcr_Ct_value_2": { - "examples": ["36"], - "ontology": "GENEPIO:0001512", - "type": "string", - "description": "The cycle threshold (CT) value result from a diagnostic SARS-CoV-2 RT-PCR test.", - "clasification":"Pathogen diagnostic testing", - }, - "analysis_author": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Contributor Acknowledgement", - }, - "author_submitter": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Contributor Acknowledgement", - }, - "submitter": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"enter your GISAID-Username", - }, - "authors": { - "examples": [""], - "ontology": "GENEPIO:0001517", - "type": "string", - "description": "", - "clasification":"Contributor Acknowledgement", - }, - "tax_id": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sample collection and processing", - }, - "scientific_name": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sample collection and processing", - }, - "common_name": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sample collection and processing", - }, - "library_source": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "library_selection": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "library_strategy": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "library_layout": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "library_name": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "nominal_length": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Sequencing", - }, - "analysis_accession": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA" - }, - "study_accession": { - "examples": ["e.g PRJEB39632"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA" - }, - "secondary_study_accession": { - "examples": ["e.g ERP123173"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA" - }, - "sample_accession": { - "examples": ["e.g SAMEA7098096"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "secondary_sample_accession": { - "examples": ["e.g ERS4858671"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "experiment_accession": { - "examples": ["e.g ERX4331406"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "run_accession": { - "examples": ["e.g ERX4331406"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "submission_accession": { - "examples": ["e.g ERA2794974"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "read_count": { - "examples": ["e.g 837055"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "read_length": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "base_count": { - "examples": ["e.g 503907110"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "first_public": { - "examples": ["e.g 2020-08-07"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "last_updated": { - "examples": ["e.g 2020-07-29"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - "format":"date", - }, - "experiment_title": { - "examples": ["e.g Illumina MiSeq paired end sequencing"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "study_title": { - "examples": ["e.g SARS-CoV-2 genomes from late April in Stockholm"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "study_alias": { - "examples": ["e.g Sweden"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "experiment_alias": { - "examples": ["e.g ena-STUDY-KAROLINSKA INSITUTET-29-07-2020-14:18:07:925-2092"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "run_alias": { - "examples": ["e.g ena-EXPERIMENT-KAROLINSKA INSITUTET-29-07-2020-14:50:07:151-1"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "fastq_bytes": { - "examples": ["e.g ena-RUN-KAROLINSKA INSITUTET-29-07-2020-14:50:07:151-1"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "fastq_md5": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "fastq_ftp": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "fastq_aspera": { - "examples": ["e.g ftp.sra.ebi.ac.uk/vol1/fastq/ERR438/005/ERR4387385/ERR4387385_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR438/005/ERR4387385/ERR4387385_2.fastq.gz"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "fastq_galaxy": { - "examples": ["e.g fasp.sra.ebi.ac.uk:/vol1/fastq/ERR438/005/ERR4387385/ERR4387385_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/ERR438/005/ERR4387385/ERR4387385_2.fastq.gz"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "submitted_bytes": { - "examples": ["e.g ftp.sra.ebi.ac.uk/vol1/fastq/ERR438/005/ERR4387385/ERR4387385_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR438/005/ERR4387385/ERR4387385_2.fastq.gz"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "submitted_md5": { - "examples": ["e.g 139853010;166270048"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "submitted_ftp": { - "examples": ["e.g d726a9abc918e2b43bd68b24c7d01b3a;f01eba1b2bad974bdf61b81b1ae8ac2a"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "submitted_aspera": { - "examples": ["e.g ftp.sra.ebi.ac.uk/vol1/run/ERR438/ERR4387385/P17157_1007_S7_L001_R1_001.fastq.gz;ftp.sra.ebi.ac.uk/vol1/run/ERR438/ERR4387385/P17157_1007_S7_L001_R2_001.fastq.gz"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "submitted_galaxy": { - "examples": ["e.g fasp.sra.ebi.ac.uk:/vol1/run/ERR438/ERR4387385/P17157_1007_S7_L001_R1_001.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/run/ERR438/ERR4387385/P17157_1007_S7_L001_R2_001.fastq.gz"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "submitted_format": { - "examples": ["e.g ftp.sra.ebi.ac.uk/vol1/run/ERR438/ERR4387385/P17157_1007_S7_L001_R1_001.fastq.gz;ftp.sra.ebi.ac.uk/vol1/run/ERR438/ERR4387385/P17157_1007_S7_L001_R2_001.fastq.gz"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "sra_bytes": { - "examples": ["e.g FASTQ;FASTQ"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "sra_md5": { - "examples": ["e.g 260236789"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "sra_ftp": { - "examples": ["e.g 2cf0d467d6dc4ae0a5473774d54c059c"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "sra_aspera": { - "examples": ["e.g ftp.sra.ebi.ac.uk/vol1/err/ERR438/005/ERR4387385"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "sra_galaxy": { - "examples": ["e.g fasp.sra.ebi.ac.uk:/vol1/err/ERR438/005/ERR4387385"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "broker_name": { - "examples": ["P17157_1007"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "nominal_sdev": { - "examples": [""], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "first_created": { - "examples": ["e.g 2020-08-07"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "", - "clasification":"Submission ENA", - }, - "type": { - "examples": ["betacoronavirus"], - "ontology": "GENEPIO:0001156", - "type": "string", - "description": "default must remain 'betacoronavirus'", - "clasification":"Database Identifiers" - }, - - - - - } -} diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..eca2a714 --- /dev/null +++ b/setup.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +version = "1.0.0" + +with open("README.md") as f: + readme = f.read() + +with open("requirements.txt") as f: + required = f.read().splitlines() + +setup( + name="relecov_tools", + version=version, + description="Tools for managing and resolution of buisciii services.", + long_description=readme, + long_description_content_type="text/markdown", + keywords=[ + "buisciii", + "bioinformatics", + "pipeline", + "sequencing", + "NGS", + "next generation sequencing", + ], + author="Sara Monzon", + author_email="smonzon@isciii.es", + url="https://github.com/BU-ISCIII/relecov-tools", + license="GNU GENERAL PUBLIC LICENSE v.3", + entry_points={ + "console_scripts": ["relecov-tools=relecov_tools.__main__:run_relecov_tools"] + }, + install_requires=required, + packages=find_packages(exclude=("docs")), + include_package_data=True, + zip_safe=False, +) diff --git a/tests/data/map_validate/metadata_lab_test.xlsx b/tests/data/map_validate/metadata_lab_test.xlsx new file mode 100755 index 00000000..975bdde8 Binary files /dev/null and b/tests/data/map_validate/metadata_lab_test.xlsx differ diff --git a/tests/data/map_validate/processed_metadata_lab_test.json b/tests/data/map_validate/processed_metadata_lab_test.json new file mode 100755 index 00000000..e74b78bd --- /dev/null +++ b/tests/data/map_validate/processed_metadata_lab_test.json @@ -0,0 +1,517 @@ +[ + { + "all_in_one_library_kit": "Illumina COVIDSeq Test [CIDO:0020172]", + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "1000", + "collector_name": "Not Provided", + "enrichment_panel": "ARTIC", + "enrichment_panel_version": "ARTIC v4.1", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "environmental_system": "Terrestrial biome", + "fastq_r1_md5": "c1551cdf3d5e9849b80a082051907a72", + "fastq_r2_md5": "449407f85d7ed4388b7d8a51e28d97a8", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_age": "23.0", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "id1000", + "library_layout": "Paired [OBI:0001852]", + "library_source": "viral rna", + "library_strategy": "WGS strategy [GENEPIO:0001992]", + "microbiology_lab_sample_id": "111111", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2023-03-23", + "sample_received_date": "2023-01-23", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE1_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE1_R2.fastq.gz", + "sequencing_date": "2023-03-23", + "sequencing_institution": "Other", + "sequencing_instrument_model": "Illumina NextSeq 550 [GENEPIO:0100128]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "id1000", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "12GOOD", + "tax_id": "2697049" + }, + { + "all_in_one_library_kit": "Illumina COVIDSeq Test [CIDO:0020172]", + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "id4000", + "collector_name": "Not Provided", + "enrichment_panel": "ARTIC", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "5d838ce4a93bcf12d3031a5f9ffd4acf", + "fastq_r2_md5": "0a9b0a590b4773e5e4c90c391979a685", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_disease": "COVID-19 [MONDO:0100096]", + "isolate_sample_id": "id4000_BAD", + "library_layout": "Paired [OBI:0001852]", + "library_source": "genomic", + "library_strategy": "WGS strategy [GENEPIO:0001992]", + "microbiology_lab_sample_id": "249", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2022-10-02", + "sample_received_date": "2023-01-06", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE4_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE4_R2.fastq.gz", + "sequencing_date": "2022-10-09", + "sequencing_institution": "Instituto de Salud Carlos III ", + "sequencing_instrument_model": "Illumina NextSeq 550 [GENEPIO:0100128]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "id4000_BAD", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "52", + "tax_id": "2697049" + }, + { + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "singleid1", + "collector_name": "Not Provided", + "enrichment_panel": "Ion AmpliSeq SARS-CoV-2 Research Panel", + "enrichment_panel_version": "Ion AmpliSeq SARS-CoV-2 Insight", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "112d5d2ff3845236576732f7c7dc8b06", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "SINGLEID", + "library_layout": "Single [OBI:0002481]", + "library_preparation_kit": "Ion Xpress Plus Fragment Library Kit", + "library_source": "viral rna", + "library_strategy": "WGS strategy [GENEPIO:0001992]", + "microbiology_lab_sample_id": "singleid3", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "sample_collection_date": "2024-01-17", + "sample_received_date": "2022-03-27", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "Singlesamp.fastq.gz", + "sequencing_date": "2024-01-18", + "sequencing_institution": "Other", + "sequencing_instrument_model": "Ion Torrent S5 XL [GENEPIO:0100138]", + "sequencing_instrument_platform": "Ion Torrent [GENEPIO:0002683]", + "sequencing_sample_id": "SINGLEID", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "singleid2", + "tax_id": "2697049" + }, + { + "all_in_one_library_kit": "Illumina COVIDSeq Test [CIDO:0020172]", + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "5555", + "collector_name": "Not Provided", + "enrichment_panel": "ARTIC", + "enrichment_panel_version": "ARTIC v3", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "environmental_material": "Particulate matter [ENVO:01000060]", + "fastq_r1_md5": "d04d32f14ca56f968d07f61d68598639", + "fastq_r2_md5": "66a057995ac925e3203ff1afdd5770ec", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_age": "6.0", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "id5000GOOD", + "library_layout": "Paired [OBI:0001852]", + "library_selection": "PCR [GENEPIO:0001955]", + "library_source": "viral rna", + "library_strategy": "WGS strategy [GENEPIO:0001992]", + "microbiology_lab_sample_id": "PAIRED4397", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2023-03-23", + "sample_received_date": "2023-03-25", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE5_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE5_R2.fastq.gz", + "sequencing_date": "2023-06-23", + "sequencing_institution": "Other", + "sequencing_instrument_model": "Illumina NextSeq 550 [GENEPIO:0100128]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "id5000GOOD", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "55GOOD", + "tax_id": "2697049" + }, + { + "all_in_one_library_kit": "Illumina COVIDSeq Test [CIDO:0020172]", + "anatomical_material": "Not Applicable [GENEPIO:0001619]", + "anatomical_part": "Not Applicable [GENEPIO:0001619]", + "body_product": "Mucus [UBERON:0000912]", + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "6666", + "collection_method": "Not Applicable [GENEPIO:0001619]", + "collector_name": "Not Provided", + "enrichment_panel": "ARTIC", + "enrichment_panel_version": "ARTIC v3", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "0d55b49614d1c38582b69ccded1b22c4", + "fastq_r2_md5": "50afab6751cce654b973bb336cf3b3eb", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "id6000GOOD", + "library_layout": "Paired [OBI:0001852]", + "library_selection": "PCR [GENEPIO:0001955]", + "library_source": "viral rna", + "library_strategy": "Amplicon [GENEPIO:0001974]", + "microbiology_lab_sample_id": "PAIRED6666", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2023-03-26", + "sample_received_date": "2023-03-28", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE6_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE6_R2.fastq.gz", + "sequencing_date": "2023-03-27", + "sequencing_institution": "Other", + "sequencing_instrument_model": "Illumina NextSeq 550 [GENEPIO:0100128]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "id6000GOOD", + "specimen_source": "Blood", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "66GOOD", + "tax_id": "2697049" + }, + { + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "33500052", + "collection_device": "Swab [GENEPIO:0100027]", + "collector_name": "Not Provided", + "diagnostic_pcr_Ct_value_1": "26.39", + "enrichment_panel": "ARTIC", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "69854e93a48df2e0bd77ab065518f284", + "fastq_r2_md5": "e723a5c4fefd200fb5cdef50fe93c02a", + "flowcell_kit": "iSeq 100 i1 Reagent v2 (300-cycle)", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_age": "87.0", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_gender": "Female [NCIT:C46110]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "33500056", + "library_layout": "Paired [OBI:0001852]", + "library_preparation_kit": "Illumina DNA Prep", + "library_source": "viral rna", + "microbiology_lab_sample_id": "33500054", + "nucleic_acid_extraction_protocol": "eMAG", + "number_of_samples_in_run": "24.0", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2024-01-17", + "sample_received_date": "2024-01-17", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE20_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE20_TEST2_R2.fastq.gz", + "sequencing_date": "2024-01-30", + "sequencing_institution": "Instituto de Salud Carlos III ", + "sequencing_instrument_model": "Illumina iSeq 100 [GENEPIO:0100121]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "33500056", + "specimen_source": "Nasopharynx Swabbing", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "33500053", + "tax_id": "2697049" + }, + { + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "33597809", + "collection_device": "Swab [GENEPIO:0100027]", + "collector_name": "Not Provided", + "diagnostic_pcr_Ct_value_1": "21.63", + "enrichment_panel": "ARTIC", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "18fbfc8a0da49d63cfb43c8ea1fb63a5", + "fastq_r2_md5": "1bac23a364f256ee89583088547cd2d0", + "flowcell_kit": "iSeq 100 i1 Reagent v2 (300-cycle)", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_age": "37.0", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_gender": "Male [NCIT:C46109]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "33597813", + "library_layout": "Paired [OBI:0001852]", + "library_preparation_kit": "Illumina DNA Prep", + "library_source": "viral rna", + "microbiology_lab_sample_id": "33597811", + "nucleic_acid_extraction_protocol": "eMAG", + "number_of_samples_in_run": "24.0", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2024-01-20", + "sample_received_date": "2024-01-20", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE21_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE21_TEST2_R2.fastq.gz", + "sequencing_date": "2024-01-30", + "sequencing_institution": "Instituto de Salud Carlos III ", + "sequencing_instrument_model": "Illumina iSeq 100 [GENEPIO:0100121]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "33597813", + "specimen_source": "Nasopharynx Swabbing", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "33597810", + "tax_id": "2697049" + }, + { + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "35184258", + "collection_device": "Swab [GENEPIO:0100027]", + "collector_name": "Not Provided", + "diagnostic_pcr_Ct_value_1": "26.15", + "enrichment_panel": "ARTIC", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "99ab59db60c96551ecd9067000cdbb2a", + "fastq_r2_md5": "c58920e0893dd51c25ab4ea0a1aee5c5", + "flowcell_kit": "iSeq 100 i1 Reagent v2 (300-cycle)", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_age": "40.0", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_gender": "Male [NCIT:C46109]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "35184262", + "library_layout": "Paired [OBI:0001852]", + "library_preparation_kit": "Illumina DNA Prep", + "library_source": "viral rna", + "microbiology_lab_sample_id": "35184260", + "nucleic_acid_extraction_protocol": "eMAG", + "number_of_samples_in_run": "24.0", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2024-01-21", + "sample_received_date": "2024-01-21", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE22_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE22_TEST2_R2.fastq.gz", + "sequencing_date": "2024-01-30", + "sequencing_institution": "Instituto de Salud Carlos III ", + "sequencing_instrument_model": "Illumina iSeq 100 [GENEPIO:0100121]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "35184262", + "specimen_source": "Nasopharynx Swabbing", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "35184259", + "tax_id": "2697049" + }, + { + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "37145955EXT", + "collection_device": "Swab [GENEPIO:0100027]", + "collector_name": "Not Provided", + "diagnostic_pcr_Ct_value_1": "27.51", + "enrichment_panel": "ARTIC", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "dc340063a9f08257157ef4ead6c5b34c", + "fastq_r2_md5": "9a6eaf7e8f93963510cf756ba90ce524", + "flowcell_kit": "iSeq 100 i1 Reagent v2 (300-cycle)", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_age": "74.0", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_gender": "Female [NCIT:C46110]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "37145959EXT", + "library_layout": "Paired [OBI:0001852]", + "library_preparation_kit": "Illumina DNA Prep", + "library_source": "viral rna", + "microbiology_lab_sample_id": "37145957EXT", + "nucleic_acid_extraction_protocol": "eMAG", + "number_of_samples_in_run": "24.0", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2024-01-18", + "sample_received_date": "2024-01-18", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE23_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE23_TEST2_R2.fastq.gz", + "sequencing_date": "2024-01-30", + "sequencing_institution": "Instituto de Salud Carlos III ", + "sequencing_instrument_model": "Illumina iSeq 100 [GENEPIO:0100121]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "37145959EXT", + "specimen_source": "Nasopharynx Swabbing", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "37145956EXT", + "tax_id": "2697049" + }, + { + "collecting_institution": "Instituto de Salud Carlos III ", + "collecting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "collecting_institution_email": "info@isciii.es", + "collecting_lab_sample_id": "39156978", + "collection_device": "Swab [GENEPIO:0100027]", + "collector_name": "Not Provided", + "diagnostic_pcr_Ct_value_1": "19.24", + "enrichment_panel": "ARTIC", + "enrichment_protocol": "Amplicon [GENEPIO:0001974]", + "fastq_r1_md5": "81b05b2b94880749efc19af6ffeac7a6", + "fastq_r2_md5": "b2dcec994291f6b507ec06afe646069d", + "flowcell_kit": "iSeq 100 i1 Reagent v2 (300-cycle)", + "geo_loc_city": "Madrid", + "geo_loc_country": "Spain [GAZ:00003936]", + "geo_loc_latitude": "40.4167", + "geo_loc_longitude": "-3.7167", + "geo_loc_region": "Madrid", + "geo_loc_state": "Comunidad de Madrid", + "host_age": "74.0", + "host_common_name": "Human [NCBITaxon:9606]", + "host_disease": "COVID-19 [MONDO:0100096]", + "host_gender": "Female [NCIT:C46110]", + "host_scientific_name": "Homo sapiens [NCBITaxon:9606]", + "isolate_sample_id": "39156982", + "library_layout": "Paired [OBI:0001852]", + "library_preparation_kit": "Illumina DNA Prep", + "library_source": "viral rna", + "microbiology_lab_sample_id": "39156980", + "nucleic_acid_extraction_protocol": "eMAG", + "number_of_samples_in_run": "24.0", + "organism": "Severe acute respiratory syndrome coronavirus 2 [NCBITaxon:2697049]", + "purpose_sampling": "Surveillance [GENEPIO:0100004]", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sample_collection_date": "2024-01-17", + "sample_received_date": "2024-01-17", + "schema_name": "RELECOV schema", + "schema_version": "1.0.0", + "sequence_file_R1_fastq": "SAMPLE24_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE24_TEST2_R2.fastq.gz", + "sequencing_date": "2024-01-30", + "sequencing_institution": "Instituto de Salud Carlos III ", + "sequencing_instrument_model": "Illumina iSeq 100 [GENEPIO:0100121]", + "sequencing_instrument_platform": "Illumina [OBI:0000759]", + "sequencing_sample_id": "39156982", + "specimen_source": "Nasopharynx Swabbing", + "study_type": "Whole Genome Sequencing [NCIT:C101294]", + "submitting_institution": "Instituto de Salud Carlos III", + "submitting_institution_address": "Crta. Pozuelo Majadahonda S/N", + "submitting_institution_email": "info@isciii.es", + "submitting_lab_sample_id": "39156979", + "tax_id": "2697049" + } +] \ No newline at end of file diff --git a/tests/data/read_lab_metadata/metadata_lab_test.xlsx b/tests/data/read_lab_metadata/metadata_lab_test.xlsx new file mode 100755 index 00000000..975bdde8 Binary files /dev/null and b/tests/data/read_lab_metadata/metadata_lab_test.xlsx differ diff --git a/tests/data/read_lab_metadata/samples_data_test.json b/tests/data/read_lab_metadata/samples_data_test.json new file mode 100755 index 00000000..ac7051df --- /dev/null +++ b/tests/data/read_lab_metadata/samples_data_test.json @@ -0,0 +1,79 @@ +{ + "33500056": { + "fastq_r1_md5": "69854e93a48df2e0bd77ab065518f284", + "fastq_r2_md5": "e723a5c4fefd200fb5cdef50fe93c02a", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE20_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE20_TEST2_R2.fastq.gz" + }, + "33597813": { + "fastq_r1_md5": "18fbfc8a0da49d63cfb43c8ea1fb63a5", + "fastq_r2_md5": "1bac23a364f256ee89583088547cd2d0", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE21_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE21_TEST2_R2.fastq.gz" + }, + "35184262": { + "fastq_r1_md5": "99ab59db60c96551ecd9067000cdbb2a", + "fastq_r2_md5": "c58920e0893dd51c25ab4ea0a1aee5c5", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE22_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE22_TEST2_R2.fastq.gz" + }, + "37145959EXT": { + "fastq_r1_md5": "dc340063a9f08257157ef4ead6c5b34c", + "fastq_r2_md5": "9a6eaf7e8f93963510cf756ba90ce524", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE23_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE23_TEST2_R2.fastq.gz" + }, + "39156982": { + "fastq_r1_md5": "81b05b2b94880749efc19af6ffeac7a6", + "fastq_r2_md5": "b2dcec994291f6b507ec06afe646069d", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE24_TEST2_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE24_TEST2_R2.fastq.gz" + }, + "SINGLEID": { + "fastq_r1_md5": "112d5d2ff3845236576732f7c7dc8b06", + "r1_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "Singlesamp.fastq.gz" + }, + "id1000": { + "fastq_r1_md5": "c1551cdf3d5e9849b80a082051907a72", + "fastq_r2_md5": "449407f85d7ed4388b7d8a51e28d97a8", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE1_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE1_R2.fastq.gz" + }, + "id4000_BAD": { + "fastq_r1_md5": "5d838ce4a93bcf12d3031a5f9ffd4acf", + "fastq_r2_md5": "0a9b0a590b4773e5e4c90c391979a685", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE4_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE4_R2.fastq.gz" + }, + "id5000GOOD": { + "fastq_r1_md5": "d04d32f14ca56f968d07f61d68598639", + "fastq_r2_md5": "66a057995ac925e3203ff1afdd5770ec", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE5_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE5_R2.fastq.gz" + }, + "id6000GOOD": { + "fastq_r1_md5": "0d55b49614d1c38582b69ccded1b22c4", + "fastq_r2_md5": "50afab6751cce654b973bb336cf3b3eb", + "r1_fastq_filepath": "tests/20240320", + "r2_fastq_filepath": "tests/20240320", + "sequence_file_R1_fastq": "SAMPLE6_R1.fastq.gz", + "sequence_file_R2_fastq": "SAMPLE6_R2.fastq.gz" + } +} \ No newline at end of file diff --git a/tests/data/sftp_handle/configuration.json b/tests/data/sftp_handle/configuration.json new file mode 100755 index 00000000..33bd9cb6 --- /dev/null +++ b/tests/data/sftp_handle/configuration.json @@ -0,0 +1,496 @@ +{ + "lab_metadata": { + "fixed_fields": { + "host_disease": "COVID-19", + "tax_id": "2697049", + "organism": "Severe acute respiratory syndrome coronavirus 2", + "study_type": "Whole Genome Sequencing", + "collector_name": "Not Provided" + }, + "metadata_lab_heading": [ + "Public Health sample id (SIVIES)", + "Sample ID given by originating laboratory", + "Sample ID given by the submitting laboratory", + "Sample ID given in the microbiology lab", + "Sample ID given if multiple rna-extraction or passages", + "Sample ID given for sequencing", + "ENA Sample ID", + "GISAID Virus Name", + "GISAID id", + "Originating Laboratory", + "Submitting Institution", + "Sequencing Institution", + "Sample Collection Date", + "Sample Received Date", + "Purpose of sampling", + "Biological Sample Storage Condition", + "Specimen source", + "Environmental Material", + "Environmental System", + "Collection Device", + "Host", + "Host Age", + "Host Gender", + "Sequencing Date", + "Nucleic acid extraction protocol", + "Commercial All-in-one library kit", + "Library Preparation Kit", + "Enrichment Protocol", + "If Enrichment Protocol Is Other, Specify", + "Enrichment panel/assay", + "If Enrichment panel/assay Is Other, Specify", + "Enrichment panel/assay version", + "Number Of Samples In Run", + "Runid", + "Sequencing Instrument Model", + "Flowcell Kit", + "Source material", + "Capture method", + "Sequencing technique", + "Library Layout", + "Gene Name 1", + "Diagnostic Pcr Ct Value 1", + "Gene Name 2", + "Diagnostic Pcr Ct Value-2", + "Authors", + "Sequence file R1 fastq", + "Sequence file R2 fastq" + ], + "lab_metadata_req_json": { + "laboratory_data": { + "file": "laboratory_address.json", + "map_field": "collecting_institution", + "adding_fields": [ + "collecting_institution_address", + "collecting_institution_email", + "geo_loc_state", + "geo_loc_region", + "geo_loc_city", + "geo_loc_country" + ] + }, + "geo_location_data": { + "file": "geo_loc_cities.json", + "map_field": "geo_loc_city", + "adding_fields": [ + "geo_loc_latitude", + "geo_loc_longitude" + ] + }, + "submitting_data": { + "file": "laboratory_address.json", + "map_field": "collecting_institution", + "adding_fields": [ + "submitting_institution", + "submitting_institution_address", + "submitting_institution_email" + ] + }, + "specimen_source_splitting": { + "file": "anatomical_material_collection_method.json", + "map_field": "specimen_source", + "adding_fields": [ + "anatomical_material", + "anatomical_part", + "body_product", + "collection_method" + ] + } + }, + "required_post_processing": { + "host_common_name": { + "Human": "host_scientific_name::Homo sapiens" + }, + "sequencing_instrument_model": { + "Illumina": "sequencing_instrument_platform::Illumina", + "PacBio": "sequencing_instrument_platform::PacBio", + "Ion Torrent": "sequencing_instrument_platform::Ion Torrent", + "Oxford Nanopore": "sequencing_instrument_platform::Oxford Nanopore" + } + }, + "required_copy_from_other_field": { + "isolate_sample_id": "sequencing_sample_id" + }, + "samples_json_fields": [ + "fastq_r1_md5", + "fastq_r2_md5", + "sequence_file_R1_fastq", + "sequence_file_R2_fastq", + "r1_fastq_filepath", + "r2_fastq_filepath" + ] + }, + "long_table_heading": [ + "SAMPLE", + "CHROM", + "POS", + "REF", + "ALT", + "FILTER", + "DP", + "REF_DP", + "ALT_DP", + "AF", + "GENE", + "EFFECT", + "HGVS_C", + "HGVS_P", + "HGVS_P_1LETTER", + "CALLER", + "LINEAGE" + ], + "long_table_parse_aux": { + "Chromosome": "CHROM", + "Variant": { + "pos": "POS", + "alt": "ALT", + "ref": "REF" + }, + "Filter": "FILTER", + "VariantInSample": { + "dp": "DP", + "ref_dp": "REF_DP", + "alt_dp": "ALT_DP", + "af": "AF" + }, + "Effect": "EFFECT", + "VariantAnnotation": { + "hgvs_c": "HGVS_C", + "hgvs_p": "HGVS_P", + "hgvs_p_1_letter": "HGVS_P_1LETTER" + } + }, + "gisaid_csv_headers": [ + "submitter", + "covv_virus_name", + "covv_type", + "covv_passage", + "covv_collection_date", + "covv_location", + "covv_add_location", + "covv_host", + "covv_add_host_info", + "covv_sampling_strategy", + "covv_gender", + "covv_patient_age", + "covv_patient_status", + "covv_specimen", + "covv_outbreak", + "covv_last_vaccinated", + "covv_treatment", + "covv_seq_technology", + "covv_assembly_method", + "covv_coverage", + "covv_orig_lab", + "covv_orig_lab_addr", + "covv_provider_sample_id", + "covv_subm_lab", + "covv_subm_lab_addr", + "covv_subm_sample_id", + "covv_authors" + ], + "json_schemas": { + "relecov_schema": "relecov_schema.json", + "ena_schema": "ena_schema.json", + "gisaid_schema": "gisaid_schema.json" + }, + "institution_mapping_file": { + "ISCIII": "ISCIII.json", + "HUGTiP": "HUGTiP.json" + }, + "sftp_handle": { + "sftp_connection": { + "sftp_server": "sftprelecov.isciii.es", + "sftp_port": "22" + }, + "metadata_processing": { + "header_flag": "CAMPO", + "excel_sheet": "METADATA_LAB" + }, + "abort_if_md5_mismatch": "False", + "platform_storage_folder": "/tmp/relecov", + "allowed_file_extensions": [ + ".fastq.gz", + ".fastq", + ".fq", + ".fq.gz", + ".fasta", + ".fasta.gz" + ], + "allowed_download_options": [ + "download_only", + "download_clean", + "delete_only" + ], + "skip_when_found": [ + "#", + "Hash", + "Path" + ] + }, + "GISAID_configuration": { + "submitter": "GISAID_ID" + }, + "external_url": { + "iskylims": { + "server": "http://relecov-iskylims.isciiides.es", + "url": "/wetlab/api/", + "store_samples": "createSampleData", + "url_project_fields": "sampleProjectFields", + "url_sample_fields": "sampleFields", + "param_sample_project": "project", + "project_name": "relecov", + "token": "" + }, + "relecov": { + "server": "http://relecov-platform.isciiides.es", + "url": "/api/", + "store_samples": "createSampleData", + "bioinfodata": "createBioinfoData", + "variantdata": "createVariantData", + "sftp_info": "sftpInfo", + "token": "" + } + }, + "iskylims_fixed_values": { + "patientCore": "", + "sampleProject": "Relecov", + "onlyRecorded": "Yes", + "sampleLocation": "Not defined" + }, + "relecov_sample_metadata": [ + "authors", + "collecting_institution", + "collecting_lab_sample_id", + "ena_broker_name", + "ena_sample_accession", + "gisaid_accession_id", + "gisaid_virus_name", + "microbiology_lab_sample_id", + "r1_fastq_filepath", + "r2_fastq_filepath", + "schema_name", + "schema_version", + "sequencing_date", + "sequence_file_R1_md5", + "sequence_file_R2_md5", + "sequence_file_R1_fastq", + "sequence_file_R2_fastq", + "sequencing_sample_id", + "submitting_lab_sample_id" + ], + "bioinfo_analysis": { + "fixed_values": { + "assembly": "Not Provided [GENEPIO:0001668]", + "assembly_params": "Not Provided [GENEPIO:0001668]", + "bioinformatics_protocol_software_name": "nf-core/viralrecon", + "commercial_open_source_both": "Open Source", + "consensus_params": "-p vcf -f", + "consensus_sequence_software_name": "BCFTOOLS_CONSENSUS", + "dehosting_method_software_name": "KRAKEN2_KRAKEN2", + "depth_of_coverage_threshold": ">10x", + "if_assembly_other": "Not Provided [GENEPIO:0001668]", + "if_bioinformatic_protocol_is_other_specify": "Not Provided [GENEPIO:0001668]", + "if_consensus_other": "Not Provided [GENEPIO:0001668]", + "if_lineage_identification_other": "Not Provided [GENEPIO:0001668]", + "if_mapping_other": "Not Provided [GENEPIO:0001668]", + "if_preprocessing_other": "Not Provided [GENEPIO:0001668]", + "lineage_analysis_software_name": "pangolin", + "mapping_params": "--seed 1", + "mapping_software_name": "BOWTIE2_ALIGN", + "preprocessing_params": "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50", + "preprocessing_software_name": "FASTP", + "variant_calling_params": "--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0';-t 0.25 -q 20 -m 10", + "variant_calling_software_name": "IVAR_VARIANTS" + }, + "feed_empty_fields":{ + "lineage_analysis_date": "Not Provided [GENEPIO:0001668]", + "consensus_sequence_name": "Not Provided [GENEPIO:0001668]" + }, + "required_file": { + "variants_metrics": "summary_variants_metrics_mqc.csv", + "versions": "software_versions.yml", + "mapping_stats": "mapping_illumina_stats.tab" + }, + "mapping_consensus": [ + "consensus_genome_length", + "consensus_sequence_filename", + "consensus_sequence_filepath", + "consensus_sequence_md5", + "number_of_base_pairs_sequenced" + ], + "mapping_pangolin": { + "variant_name": "scorpio_call", + "lineage_name": "lineage", + "lineage_algorithm_software_version": "version", + "lineage_analysis_software_version": "pangolin_version", + "lineage_analysis_scorpio_version": "scorpio_version", + "lineage_analysis_constellation_version": "constellation_version" + }, + "mapping_stats": { + "analysis_date": "analysis_date", + "depth_of_coverage_value": "medianDPcoveragevirus", + "number_of_variants_in_consensus": "Variantsinconsensusx10", + "number_of_variants_with_effect": "MissenseVariants", + "per_genome_greater_10x": "Coverage>10x(%)", + "per_Ns": "%Ns10x", + "per_reads_host": "%readshost", + "per_reads_virus": "%readsvirus", + "per_unmapped": "%unmapedreads", + "qc_filtered": "totalreads", + "reference_genome_accession": "Virussequence", + "read_length": "read_length" + }, + "mapping_variant_metrics": { + "ns_per_100_kbp": "# Ns per 100kb consensus" + }, + "mapping_version": { + "bioinformatics_protocol_software_version": { + "Workflow": "nf-core/viralrecon" + }, + "consensus_sequence_software_version": { + "BCFTOOLS_CONSENSUS": "bcftools" + }, + "dehosting_method_software_version": { + "KRAKEN2_KRAKEN2": "kraken2" + }, + "mapping_software_version": { + "BOWTIE2_ALIGN": "bowtie2" + }, + "preprocessing_software_version": { + "FASTP": "fastp" + }, + "variant_calling_software_version": { + "IVAR_VARIANTS": "ivar" + } + }, + "required_fields_from_lab_json": { + "read_length": "number_of_base_pairs_sequenced", + "schema_name": "schema_name", + "schema_version": "schema_version", + "sequencing_sample_id": "sequencing_sample_id" + } + }, + "ENA_fields": { + "ENA_configuration": { + "study_alias": "RELECOV", + "design_description": "Design Description", + "experiment_title": "Project for ENA submission RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_type": "Whole Genome Sequencing", + "study_id": "ERP137164", + "ena_broker_name": "Instituto de Salud Carlos III" + }, + "checklist": "ERC000033", + "templates_path": "", + "tool": { + "tool_name": "ena-upload-cli", + "tool_version": "0.5.3" + }, + "df_study_fields": [ + "study_alias", + "study_title", + "study_type", + "study_abstract" + ], + "df_sample_fields": [ + "sample_alias", + "sample_title", + "collection date", + "geographic location (country and/or sea)", + "sample_description", + "host common name", + "host scientific name", + "host sex", + "scientific_name", + "collector name", + "collecting institution", + "address", + "isolate", + "host subject id", + "host health state", + "authors", + "taxon_id" + ], + "df_run_fields": [ + "run_alias", + "experiment_alias", + "file_name", + "file_format", + "file_checksum", + "collecting institution" + ], + "df_experiment_fields": [ + "experiment_alias", + "experiment_title", + "sample_alias", + "study_alias", + "design_description", + "library_name", + "library_strategy", + "library_source", + "library_selection", + "library_layout", + "library_construction_protocol", + "insert_size", + "platform", + "instrument_model", + "collecting institution" + ], + "ena_fixed_fields": { + "broker_name": "Instituto de Salud Carlos III", + "file_format": "FASTQ", + "study_alias": "RELECOV", + "study_title": "RELECOV Spanish Network for genomics surveillance", + "study_abstract": "RELECOV is a Spanish Network for genomics surveillance", + "insert_size": "0" + }, + "accession_fields": [ + "ena_study_accession", + "ena_sample_accession", + "ena_experiment_accession", + "ena_run_accession" + ], + "additional_formating": { + "sample_description": [ + "host_common_name", + "anatomical_part", + "collection_method" + ], + "design_description": [ + "library_layout", + "library_preparation_kit", + "library_selection", + "library_strategy" + ], + "r1_fastq_filepath": [ + "r1_fastq_filepath", + "sequence_file_R1_fastq" + ], + "r2_fastq_filepath": [ + "r2_fastq_filepath", + "sequence_file_R2_fastq" + ], + "experiment_alias": [ + "isolate_sample_id", + "sample_collection_date" + ], + "run_alias": [ + "isolate_sample_id", + "sample_collection_date" + ], + "experiment_title": [ + "sequencing_instrument_model", + "isolate_sample_id" + ], + "file_name": [ + "sequence_file_R1_fastq", + "sequence_file_R2_fastq" + ], + "file_checksum": [ + "fastq_r1_md5", + "fastq_r2_md5" + ] + } + } +} diff --git a/tests/data/sftp_handle/datatest1/SAMPLE1_R1.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE1_R1.fastq.gz new file mode 100644 index 00000000..05918c7a Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE1_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE1_R2.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE1_R2.fastq.gz new file mode 100644 index 00000000..7b057504 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE1_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE3_R1.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE3_R1.fastq.gz new file mode 100644 index 00000000..c7086189 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE3_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE3_R2.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE3_R2.fastq.gz new file mode 100644 index 00000000..c7086189 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE3_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE4_R1.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE4_R1.fastq.gz new file mode 100644 index 00000000..afabf834 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE4_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE4_R2.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE4_R2.fastq.gz new file mode 100644 index 00000000..8bd9836b Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE4_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE5_R1.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE5_R1.fastq.gz new file mode 100644 index 00000000..a5ddb4f2 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE5_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE5_R2.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE5_R2.fastq.gz new file mode 100644 index 00000000..862d57c6 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE5_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE6_R1.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE6_R1.fastq.gz new file mode 100644 index 00000000..308fb899 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE6_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/SAMPLE6_R2.fastq.gz b/tests/data/sftp_handle/datatest1/SAMPLE6_R2.fastq.gz new file mode 100644 index 00000000..58826178 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/SAMPLE6_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/Singlesamp.fastq.gz b/tests/data/sftp_handle/datatest1/Singlesamp.fastq.gz new file mode 100644 index 00000000..58086058 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/Singlesamp.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest1/md5sum.txt b/tests/data/sftp_handle/datatest1/md5sum.txt new file mode 100755 index 00000000..af658a0b --- /dev/null +++ b/tests/data/sftp_handle/datatest1/md5sum.txt @@ -0,0 +1,12 @@ +c1551cdf3d5e9849b80a082051907a72 SAMPLE1_R1.fastq.gz +449407f85d7ed4388b7d8a51e28d97a8 SAMPLE1_R2.fastq.gz +206ccbe05fff0701bdcf9f899ca3175d SAMPLE2_R1.fastq.gz +BADBADe23a1caf88c11678f10666b931 SAMPLE3_R1.fastq.gz +1be991ce0f9ed442d57c5a359d4c5848 SAMPLE3_R2.fastq.gz +5d838ce4a93bcf12d3031a5f9ffd4acf SAMPLE4_R1.fastq.gz +0a9b0a590b4773e5e4c90c391979a685 SAMPLE4_R2.fastq.gz +d04d32f14ca56f968d07f61d68598639 SAMPLE5_R1.fastq.gz +66a057995ac925e3203ff1afdd5770ec SAMPLE5_R2.fastq.gz +0d55b49614d1c38582b69ccded1b22c4 SAMPLE6_R1.fastq.gz +50afab6751cce654b973bb336cf3b3eb SAMPLE6_R2.fastq.gz +112d5d2ff3845236576732f7c7dc8b06 Singlesamp.fastq.gz diff --git a/tests/data/sftp_handle/datatest1/metadata_validation_test.xlsx b/tests/data/sftp_handle/datatest1/metadata_validation_test.xlsx new file mode 100755 index 00000000..46833153 Binary files /dev/null and b/tests/data/sftp_handle/datatest1/metadata_validation_test.xlsx differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE20_TEST2_R1.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE20_TEST2_R1.fastq.gz new file mode 100644 index 00000000..19a3ee86 Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE20_TEST2_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE20_TEST2_R2.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE20_TEST2_R2.fastq.gz new file mode 100644 index 00000000..d65ea248 Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE20_TEST2_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE21_TEST2_R1.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE21_TEST2_R1.fastq.gz new file mode 100644 index 00000000..ccc1b16d Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE21_TEST2_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE21_TEST2_R2.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE21_TEST2_R2.fastq.gz new file mode 100644 index 00000000..77e689a0 Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE21_TEST2_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE22_TEST2_R1.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE22_TEST2_R1.fastq.gz new file mode 100644 index 00000000..d5402ebb Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE22_TEST2_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE22_TEST2_R2.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE22_TEST2_R2.fastq.gz new file mode 100644 index 00000000..291bc4ec Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE22_TEST2_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE23_TEST2_R1.fastq b/tests/data/sftp_handle/datatest2/SAMPLE23_TEST2_R1.fastq new file mode 100644 index 00000000..4e24c85b --- /dev/null +++ b/tests/data/sftp_handle/datatest2/SAMPLE23_TEST2_R1.fastq @@ -0,0 +1,8 @@ +@FS10002216:116:BTR99519-2113:1:1101:6610:1000 1:N:0:6 +GTGCATCAACAGCGGCATGAGAGCAAGCTGTATACACTATGCGAGCAGAAGGGTAGTAGAGAGC ++ +:F,FFF,FFFF:FFFFFFFF,FFFF:FFFF:FFFFFFF:F,F:FFFFFFFF:FF:FFFFFFFF: +@FS10002216:116:BTR99519-2113:1:1101:8110:1000 1:N:0:6 +CTCACATAGTGCATCAACAGCGGCATGAGAGCAAGCTGTATACACTATGCGAGCAGAAGGGTAGTAG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/data/sftp_handle/datatest2/SAMPLE23_TEST2_R2.fastq b/tests/data/sftp_handle/datatest2/SAMPLE23_TEST2_R2.fastq new file mode 100644 index 00000000..e58215c9 --- /dev/null +++ b/tests/data/sftp_handle/datatest2/SAMPLE23_TEST2_R2.fastq @@ -0,0 +1,8 @@ +@FS10002216:116:BTR99519-2113:1:1101:6610:1000 2:N:0:6 +GCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCAC ++ +FFFFFFFFFFFFF:FFFFFFF:FFFFFFFFFF::FFFFFFF:FFFFFFFFF,FFFFFF:FF:,F +@FS10002216:116:BTR99519-2113:1:1101:8110:1000 2:N:0:6 +CTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/data/sftp_handle/datatest2/SAMPLE24_TEST2_R1.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE24_TEST2_R1.fastq.gz new file mode 100644 index 00000000..c8bb4a26 Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE24_TEST2_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE24_TEST2_R2.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE24_TEST2_R2.fastq.gz new file mode 100644 index 00000000..a20c7852 Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE24_TEST2_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/SAMPLE25_TEST2_R1.fastq.gz b/tests/data/sftp_handle/datatest2/SAMPLE25_TEST2_R1.fastq.gz new file mode 100644 index 00000000..ce87295c Binary files /dev/null and b/tests/data/sftp_handle/datatest2/SAMPLE25_TEST2_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/datatest2/md5sum.csv b/tests/data/sftp_handle/datatest2/md5sum.csv new file mode 100755 index 00000000..19f1b44e --- /dev/null +++ b/tests/data/sftp_handle/datatest2/md5sum.csv @@ -0,0 +1,18 @@ +Hash,Path +d4570c6ef5bc98a56101e50d1780bbdd,C:\Users\Desktop\SAMPLE20_TEST2_R1.fastq.gz +b04254111704d823e760a339d93e763e,C:\Users\Desktop\SAMPLE20_TEST2_R2.fastq.gz +d35c84db415d4b8467ca5a39cbf10087,C:\Users\Desktop\SAMPLE21_TEST2_R1.fastq.gz +1ff55531a0d480074b1e7b998d64c8c4,C:\Users\Desktop\SAMPLE21_TEST2_R2.fastq.gz +c25da555ffa3d367455f6966fa1747a6,C:\Users\Desktop\SAMPLE22_TEST2_R1.fastq.gz +4411b9fac4a41ff782402a1892458d60,C:\Users\Desktop\SAMPLE22_TEST2_R2.fastq.gz +BADBAD7192f56aa8cdeb447e4adedb9c,C:\Users\Desktop\SAMPLE23_TEST2_R1 +BADBADd67930013d9a6224e97abbbb91,C:\Users\Desktop\SAMPLE23_TEST2_R2.fastq +9a2b776b1e17b6ced3b46cf2a83b8b44,C:\Users\Desktop\SAMPLE24_TEST2_R1.fastq.gz +df0393d5556e4427167eae6b3e5b22c5,C:\Users\Desktop\SAMPLE24_TEST2_R2.fastq.gz +db70941c0c3e72325128b02c4bbe5f28,C:\Users\Desktop\SAMPLE25_TEST2_R2.fastq.gz +BADBAD52513DA2D3F32896B68C4C0ED6,C:\Users\Desktop\SAMPLE26_TEST2_R1.fastq.gz +BADBAD2AEB37068701B070843B974D3A,C:\Users\Desktop\SAMPLE26_TEST2_R2.fastq.gz +18DF32D0C07EBDD01A9742095751CFD2,C:\Users\Desktop\391563_R2_001.fastq.gz +8C0AA0ADCA114BB3E740354C46696B17,C:\Users\Desktop\391867_R1_001.fastq.gz +3C9889A441E97A7412C07CB6F2F5CF44,C:\Users\Desktop\391598_R2_001.fastq.gz +1DFB6C6734C75D0DFB0AF00D4A8A96E,C:\Users\Desktop\397770_R1_001.fastq.gz diff --git a/tests/data/sftp_handle/datatest2/test2_metadata_template_v2.0.1.xlsx b/tests/data/sftp_handle/datatest2/test2_metadata_template_v2.0.1.xlsx new file mode 100644 index 00000000..ea564b83 Binary files /dev/null and b/tests/data/sftp_handle/datatest2/test2_metadata_template_v2.0.1.xlsx differ diff --git a/tests/data/sftp_handle/empty_test/SAMPLE20_TEST2_R1.fastq.gz b/tests/data/sftp_handle/empty_test/SAMPLE20_TEST2_R1.fastq.gz new file mode 100755 index 00000000..19a3ee86 Binary files /dev/null and b/tests/data/sftp_handle/empty_test/SAMPLE20_TEST2_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/empty_test/SAMPLE20_TEST2_R2.fastq.gz b/tests/data/sftp_handle/empty_test/SAMPLE20_TEST2_R2.fastq.gz new file mode 100755 index 00000000..d65ea248 Binary files /dev/null and b/tests/data/sftp_handle/empty_test/SAMPLE20_TEST2_R2.fastq.gz differ diff --git a/tests/data/sftp_handle/empty_test/SAMPLE21_TEST2_R1.fastq.gz b/tests/data/sftp_handle/empty_test/SAMPLE21_TEST2_R1.fastq.gz new file mode 100644 index 00000000..acb7aa9e Binary files /dev/null and b/tests/data/sftp_handle/empty_test/SAMPLE21_TEST2_R1.fastq.gz differ diff --git a/tests/data/sftp_handle/empty_test/SAMPLE21_TEST2_R2.fastq.gz b/tests/data/sftp_handle/empty_test/SAMPLE21_TEST2_R2.fastq.gz new file mode 100644 index 00000000..07f05160 Binary files /dev/null and b/tests/data/sftp_handle/empty_test/SAMPLE21_TEST2_R2.fastq.gz differ diff --git a/tests/test_sftp_handle.py b/tests/test_sftp_handle.py new file mode 100755 index 00000000..ba738ddc --- /dev/null +++ b/tests/test_sftp_handle.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +import os +import sys +import argparse +from relecov_tools.download_manager import DownloadManager + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-d", + "--download_option", + type=str, + help="Download option", + ) + parser.add_argument("-t", "--target_folders", type=str, help="Target folders") + args = parser.parse_args() + + val_dict = { + "user": os.environ["TEST_USER"], + "password": os.environ["TEST_PASSWORD"], + "download_option": args.download_option, + "output_location": os.environ["OUTPUT_LOCATION"], + "target_folders": args.target_folders, + } + prepare_remote_test(**val_dict) + + +def prepare_remote_test(**kwargs): + # First clean the repository. + print("Initating sftp module") + download_manager = DownloadManager( + user=kwargs["user"], + passwd=kwargs["password"], + conf_file=None, + download_option=kwargs["download_option"], + output_location=kwargs["output_location"], + target_folders=kwargs["target_folders"], + ) + print("Openning connection to sftp") + download_manager.relecov_sftp.sftp_port = os.environ["TEST_PORT"] + if not download_manager.relecov_sftp.open_connection(): + print("Could not open connection to remote sftp") + sys.exit(1) + remote_folders = download_manager.relecov_sftp.list_remote_folders( + ".", recursive=True + ) + clean_folders = [folder.replace("./", "") for folder in remote_folders] + print("Cleaning folders") + for folder in clean_folders: + if len(folder.split("/")) < 2: + continue + filelist = download_manager.relecov_sftp.get_file_list(folder) + for file in filelist: + download_manager.relecov_sftp.remove_file(file) + print(f"Removing remote folder {folder}") + download_manager.relecov_sftp.remove_dir(folder) + + # Upload the test dataset to the sftp. + data_loc = "tests/data/sftp_handle" + folder_files_dict = {folder: files for folder, _, files in os.walk(data_loc)} + print("Uploading files to sftp...") + for folder, files in folder_files_dict.items(): + if "datatest" in folder: + remote_dir = "COD-test-1" + elif "empty_test" in folder: + remote_dir = "COD-test-2" + else: + continue + base_folder = folder.split("/")[-1] + download_manager.relecov_sftp.make_dir(os.path.join(remote_dir, base_folder)) + print(f"Uploading files from {base_folder}") + for file in files: + remote_path = os.path.join(remote_dir, base_folder, file) + local_path = os.path.join(os.path.abspath(folder), file) + download_manager.relecov_sftp.upload_file(local_path, remote_path) + + download_manager.relecov_sftp.close_connection() + + # Test download_module + def test_download(download_manager): + download_manager.execute_process() + + test_download(download_manager) + + +if __name__ == "__main__": + main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..51351738 --- /dev/null +++ b/tox.ini @@ -0,0 +1,6 @@ +## According to black coding style: https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html +[flake8] +max-line-length = 88 + +select = C,E,F,W,B,B950 +extend-ignore = E203, E501, W605