From 556e4c7fe5191d264d3ecbb44d0497d08528164d Mon Sep 17 00:00:00 2001
From: MattWellie <mattwellie@gmail.com>
Date: Mon, 14 Oct 2024 22:43:15 +1000
Subject: [PATCH] remove/update more

---
 nextflow.config |  22 ++++++
 talos.nf        | 177 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 199 insertions(+)
 create mode 100644 nextflow.config
 create mode 100644 talos.nf

diff --git a/nextflow.config b/nextflow.config
new file mode 100644
index 00000000..edd8ed61
--- /dev/null
+++ b/nextflow.config
@@ -0,0 +1,22 @@
+/*
+ * Pipeline parameters
+ */
+
+// Execution environment setup
+params.projectDir = "/workspace/gitpod/hello-nextflow"
+$projectDir = params.projectDir
+
+// Set the cohort name for this whole run
+params.cohort = "COHORT"
+
+// Primary input (samplesheet in CSV format with ID and file path, one sample per line)
+params.reads_bam = "${projectDir}/data/samplesheet.csv"
+
+// Accessory files
+params.genome_reference = "${projectDir}/data/ref/ref.fasta"
+params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai"
+params.genome_reference_dict = "${projectDir}/data/ref/ref.dict"
+params.calling_intervals = "${projectDir}/data/ref/intervals.bed"
+
+// Base name for final output file
+params.cohort_name = "family_trio"
\ No newline at end of file
diff --git a/talos.nf b/talos.nf
new file mode 100644
index 00000000..ac11d616
--- /dev/null
+++ b/talos.nf
@@ -0,0 +1,177 @@
+
+/*
+ * Pipeline parameters - this sets the defaults
+ */
+
+params.checkpoint = "$projectDir/assets/NO_FILE"
+
+params.greeting = ["Bonjour", "le", "monde!"]
+
+/*
+ * Pipeline parameters
+ */
+params.input_file = "data/greetings.txt"
+
+
+process GeneratePanelData {
+    // takes the HPO-embellished pedigree and matches panels to participants
+    publishDir 'results', mode: 'copy'
+    input:
+        // the pedigree file
+        path pedigree
+        // the HPO obo/ontology file
+        path hpo
+
+    output:
+        path "${params.cohort}_hpo_panel_data.json"
+
+    """
+    GeneratePanelData -i ${pedigree} --hpo ${hpo} --out_path ${params.cohort}_hpo_panel_data.json
+    """
+}
+
+process QueryPanelapp {
+    // uses matched panels to query the PanelApp API
+    publishDir 'results', mode: 'copy'
+
+    input:
+        path hpo_panel_matches
+
+    output:
+        path "${params.cohort}_panelapp_results.json"
+
+    // the command
+    """
+    QueryPanelapp --panels ${panel_data}  --out_path ${params.cohort}_panelapp_results.json
+    """
+}
+
+process FindGeneSymbolMap {
+    // finds corresponding gene symbols for the panelapp results
+    publishDir 'results', mode: 'copy'
+
+    input:
+        path panelapp_data
+
+    output:
+        path "${params.cohort}_symbol_to_ensg.json"
+
+    // the command
+    """
+    FindGeneSymbolMap --panelapp ${panel_data}  --out_path ${params.cohort}_symbol_to_ensg.json
+    """
+}
+
+process RunHailFiltering {
+    // runs the hail small-variant filtering
+    publishDir 'results', mode: 'copy'
+
+    input:
+        path matrix_table
+        path panelapp_data
+        path pedigree
+        path clinvar
+        path pm5
+        path checkpoint
+
+    // only write a checkpoint if we were given a path
+    script:
+        def checkpoint = checkpoint.name != 'NO_FILE' ? "${checkpoint}" : ''
+
+    """
+    RunHailFilteringSV \
+        --mt ${matrix_table} \
+        --panelapp ${panelapp_data} \
+        --pedigree ${pedigree} \
+        --vcf_out ${params.cohort}_small_variants.vcf.bgz \
+        --clinvar ${clinvar} \
+        --clinvar ${pm5} \
+        --checkpoint ${checkpoint}
+    """
+
+    output:
+        path tuple path("${params.cohort}_small_variants.vcf.bgz"), path("${params.cohort}_small_variants.vcf.bgz.tbi")
+
+    """
+    RunHailFilteringSV \
+        --mt ${matrix_table} \
+        --panelapp ${panelapp_data} \
+        --pedigree ${pedigree} \
+        --vcf_out ${params.cohort}_small_variants.vcf.bgz
+    """
+}
+
+process RunHailFilteringSV {
+    // runs the hail structural-variant filtering
+    // here we can have multiple files - should we derive output name from input?
+    publishDir 'results', mode: 'copy'
+
+    input:
+        path matrix_table
+        path panelapp_data
+        path pedigree
+
+    output:
+        path tuple path("${params.cohort}_small_variants.vcf.bgz"), path("${params.cohort}_small_variants.vcf.bgz.tbi")
+
+    """
+    RunHailFilteringSV \
+        --mt ${matrix_table} \
+        --panelapp ${panelapp_data} \
+        --pedigree ${pedigree} \
+        --vcf_out ${params.cohort}_small_variants.vcf.bgz
+    """
+}
+---
+process sayHello {
+    // this takes the resulting file from the work/hash/hash directory, writes into "results"
+    publishDir 'results', mode: 'copy'
+
+    // if doubled
+    output:
+        tuple path(input_bam), path("${input_bam}.bai")
+
+    // if passed
+    input:
+        tuple path(input_bam), path(input_bam_index)
+
+    output:
+        path "${greeting}-output.txt"
+
+    input:
+        val greeting
+
+    """
+    echo '$greeting' > '$greeting-output.txt'
+    """
+}
+
+/*
+ * Use a text replace utility to convert the greeting to uppercase
+ */
+process convertToUpper {
+
+    publishDir 'results', mode: 'copy'
+
+    input:
+        path input_file
+
+    output:
+        path "UPPER-${input_file}"
+
+    """
+    cat '$input_file' | tr '[a-z]' '[A-Z]' > UPPER-${input_file}
+    """
+}
+
+workflow {
+
+    GeneratePanelData(params.pedigree, params.hpo)
+    // create a channel for inputs from a file
+    greeting_ch = Channel.fromPath(params.input_file).splitText() { it.trim() }
+
+    sayHello(greeting_ch)
+
+    // convert the greeting to uppercase
+    convertToUpper(sayHello.out)
+}
\ No newline at end of file