-
Notifications
You must be signed in to change notification settings - Fork 0
/
steamer-methyl.wdl
157 lines (152 loc) · 4.1 KB
/
steamer-methyl.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
version 1.0
workflow run_full {
input {
File fullin_TEs
Array[String] file_id
Array[File] allcs
Array[Int] size_col
File chrom_size
Array[String] fullin_sample_name_column
Int memory_GB
Int nCPUs
Int threshold_QC
}
String fullin_sample_name = fullin_sample_name_column[0]
parameter_meta {
fullin_TEs: "Path to bed file containing TEs"
file_id: "terra table column containing file IDs"
allcs: "terra table column containing location of 'allc_*.tsv.gz'"
size_col: "terra column containing sizes of allc files"
chrom_size: "Path to chrom.sizes obtained by UCSC fetchChromSizes.sh"
fullin_sample_name_column: "name of sample"
memory_GB: "memory, in gigabytes"
nCPUs: "CPUs for parallel execution"
threshold_QC: "Threshold for discarding methylation value"
}
call sum {
input:
sizes = size_col
}
call mangle_bed {
input:
bed = fullin_TEs,
mem = memory_GB
}
call generate_dataset {
input:
fileIDs = file_id,
allc_list = allcs,
SampleName = fullin_sample_name,
nCPU = nCPUs,
DiskSize = sum.size,
mangledTEs = mangle_bed.bed_mangled,
chromSize = chrom_size,
mem = memory_GB
}
call calculate_fractions {
input:
tempzarr = generate_dataset.zarrTar,
SampleName = fullin_sample_name,
mem = memory_GB,
threshold = threshold_QC
}
output {
File mtx_ch = calculate_fractions.count_mat_ch
File mtx_cg = calculate_fractions.count_mat_cg
}
}
task mangle_bed {
input {
File bed
Int mem
}
command <<<
run_steamer mangle-bed-file-ids ~{bed} TEs_mangled.bed
>>>
output {
File bed_mangled = "TEs_mangled.bed"
}
runtime {
docker: "quay.io/welch-lab/steamer:latest"
memory: mem + "GB"
}
}
task generate_dataset {
input {
Array[String] fileIDs
Array[File] allc_list
String SampleName
Int nCPU
Int DiskSize
File mangledTEs
File chromSize
Int mem
}
parameter_meta {
allc_list: {
description: "Terra table column containing location of 'allc_*.tsv.gz",
#localization_optional: true
}
}
Int nCPUscale = ceil(nCPU*0.75)
Int disk = DiskSize + 375
String disk_string = "local-disk " + disk + " LOCAL"
Array[Array[String]] initial_paired = [fileIDs, allc_list]
Array[Array[String]] tsvPaired = transpose(initial_paired)
File allc_table = write_tsv(tsvPaired)
command <<<
#CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt \
#GCS_REQUESTER_PAYS_PROJECT=$(curl -s "http://metadata.google.internal/computeMetadata/v1/project/project-id" -H "Metadata-Flavor: Google") \
#GCS_OAUTH_TOKEN=$(gcloud auth application-default print-access-token) \
sed -i 's;gs://;/cromwell_root/;g' ~{allc_table}; \
allcools generate-dataset --allc_table ~{allc_table} --output_path=~{SampleName}.mcds --obs_dim cell \
--cpu ~{nCPUscale} --chunk 50 --regions TEs ~{mangledTEs} --chrom_size_path ~{chromSize} \
--quantifiers TEs count CGN,CHN; tar -cf tempzarr.tar ~{SampleName}.mcds
>>>
output {
#this should be a Directory but cromwell doesn't support WDL 1.2
File zarrTar = "tempzarr.tar"
}
runtime {
docker: "quay.io/welch-lab/steamer:latest"
memory: mem + "GB"
cpu: nCPU
disks: disk_string
}
}
task calculate_fractions {
input {
File tempzarr
String SampleName
Int mem
Int threshold
}
command <<<
tar -xf ~{tempzarr}; \
run_steamer mc-fractions ~{SampleName}.mcds ~{threshold}
>>>
output {
File count_mat_ch = SampleName + ".mcds.ch.mtx"
File count_mat_cg = SampleName + ".mcds.cg.mtx"
}
runtime {
docker: "quay.io/welch-lab/steamer:latest"
memory: mem + "GB"
}
}
task sum {
input {
Array[Int] sizes
}
command <<<
python3 <<CODE
print(sum([~{sep=',' sizes}])>>30)
CODE
>>>
output {
Int size = ceil(read_float(stdout()))
}
runtime {
docker: "python:3.12"
}
}