main.jst

# Phoenix Human GRCh38 pipeline
#
# The config variables includes an array of dataFiles. Here we
# need to group the data files into read groups, and then group 
# read_groups into samples. This will make it easier to add tasks
# later in the pipeline. The resulting samples dict will look
# like this:
# 
# sampleMergeKey/sampleName:
#   glPrep: RNA
#   glType: RNA
#       ... 
#   read_groups: 
#     C140KACXX_7_TAGCTT:
#       glPrep: RNA
#       glType: RNA
#           ...
#       data_files: 
#         - 
#           fastqPath: .../MMRF_1157_1_BM_CD138pos_T2_TSMRU_K03096_C140KACXX_TAGCTT_L007_R1_001.fastq.gz
#           fastqCode: R1
#           glPrep: RNA
#           glType: RNA
#               ...
#         - 
#           fastqPath: .../MMRF_1157_1_BM_CD138pos_T2_TSMRU_K03096_C140KACXX_TAGCTT_L007_R2_001.fastq.gz
#           fastqCode: R2
#           glPrep: RNA
#           glType: RNA
#               ...

{# Setting debug to True retains many of the intermediate files #}
{% if debug is not defined %}
    {% set debug = False %}
{% endif %}

{# Tasks contains switches for toggling many of the features #}
{% if tasks is not defined %}
    {% set tasks = {} %}
{% endif %}

{# submissionSource toggles specific tasks based on how they were submitted #}
{% if submissionSource is not defined %}
    {% set submissionSource = "unknown" %}
{% endif %}

{% from 'utilities/copy_fastq.jst' import copy_fastq with context %}
{% from 'utilities/finalize.jst' import finalize with context %}
{% from 'modules/dna_alignment/main.jst' import dna_alignment with context %}
{% from 'modules/qc/bam_qc_snpsniffer.jst' import snpsniffer_summary with context %}
{% from 'modules/constitutional/main.jst' import constitutional_variant_calling with context %}
{% from 'modules/somatic/main.jst' import somatic_variant_calling with context %}
{% from 'modules/rna/main.jst' import rna_quant with context %}
{% from 'modules/rna/main.jst' import rna_fusion_detection with context %}
{% from 'modules/single_cell/main.jst' import single_cell_rna with context %}
{% from 'modules/CHIP/main.jst' import chip_workflow with context %}
{% from 'modules/tumor_only/main.jst' import tumor_only_variant_calling with context %}

{% set samples = {} %}
{% set fastq_validation = [] %}

{% for file in dataFiles %}
    {% set bn = file.fastqPath | basename %}

    {% if 'sampleMergeKey' in file %}
        {% set name = file.sampleMergeKey %}
    {% else %}
        {% set name = file.sampleName %}
    {% endif %}

    {% do file.update({'name': name}) %}
    {% do file.update({'basename': bn}) %}
    {% do file.update({'gltype': file.glType.lower()}) %}
    {% do file.update({'glprep': file.glPrep.lower()}) %}
    {% do fastq_validation.append(bn) %}

    {% if 'subGroup' not in file %}
        {% do file.update({'subGroup': 'constitutional'}) %}
    {% endif %}

    {% if name not in samples %}
        {% do samples.update({name: {}}) %}
        {% do samples[name].update(file) %}
        {% do samples[name].update({"name": name}) %}
    {% endif %}
{% endfor %}

{% for rgid, data_files in dataFiles | groupby('rgid') %}
    {% set rg = data_files|first %}

    {% if 'sampleMergeKey' in rg %}
        {% set name = rg.sampleMergeKey %}
    {% else %}
        {% set name = rg.sampleName %}
    {% endif %}
    {% if not 'read_groups' in samples[name] %}
        {% do samples[name].update({'read_groups': {}}) %}
    {% endif %}

    {% do samples[name]['read_groups'].update({rgid: {}}) %}
    {% do samples[name]['read_groups'][rgid].update(rg) %}
    {% do samples[name]['read_groups'][rgid].update({'data_files': data_files}) %}
{% endfor %}

{# Setup calculated props for samples #}
{% for sample in samples.values() if sample.gltype == 'exome' %}
    {% do sample.update({'library_code': sample.assayCode[0:2]|upper}) %}
    {% do sample.update({'capture_kit_code': sample.assayCode[2:5]|upper}) %}

    {% set targets_interval_list %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.targets.interval_list{% endset %}
    {% set no_header_targets_interval_list %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.no.header.targets.interval_list{% endset %}
    {% set baits_interval_list %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.baits.interval_list{% endset %}
    {% set extended_bed %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.extended.bed{% endset %}

    {% do sample.update({'capture_kit': {}}) %}
    {% do sample['capture_kit'].update({'targets_interval_list': targets_interval_list}) %}
    {% do sample['capture_kit'].update({'no_header_targets_interval_list': no_header_targets_interval_list}) %}
    {% do sample['capture_kit'].update({'baits_interval_list': baits_interval_list}) %}
    {% do sample['capture_kit'].update({'extended_bed': extended_bed}) %}
{% endfor %}

{% for sample in samples.values() if sample.gltype == 'rna' %}
    {% set readOrientation=sample.readOrientation|default('inward')|lower %}
    {% set rnaStrandType=sample.rnaStrandType|default('unstranded')|lower %}
    {% set rnaStrandDirection=sample.rnaStrandDirection|default('notapplicable')|lower %}
    {% set strandedness %}{{ readOrientation }}-{{ rnaStrandType }}-{{ rnaStrandDirection }}{% endset %}
    {% do sample.update({'strandedness': strandedness}) %}
{% endfor %}

{% if debug %}
    {{ log(samples|tojson(indent=4), level='CRITICAL') }}
{% endif %}

{# Unique fastq name validation #}
{% if dataFiles | length != fastq_validation | unique | list | length  %}
  {{ one_or_more_of_the_fastqs_have_the_same_name }}
{% endif %}

{# Copy fastqs to project dir #}
{% for fastq in dataFiles %}
    {{- copy_fastq(fastq) }}
{% endfor %}

{# project level quality control #}
{% if samples|length > 1 %}
{{- snpsniffer_summary(samples) }}
{% endif %}

{# Finalize after running all other tasks #}
{{- finalize() }}

{# Start of module calls #}
{{- dna_alignment(samples) }}

{{- constitutional_variant_calling(samples) }}

{{- somatic_variant_calling(samples) }}

{{- rna_quant(samples) }}

{{- rna_fusion_detection(samples) }}

{{- single_cell_rna(dataFiles) }}

{{- chip_workflow(samples) }}

{{- tumor_only_variant_calling(samples) }}