-
Notifications
You must be signed in to change notification settings - Fork 1
/
pipeline.R
78 lines (66 loc) · 3.48 KB
/
pipeline.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(devtools)
install_git("git://github.com/friendsofstrandseq/MaRyam.git", branch = "master")
library("MaRyam")
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly=TRUE)
#print(args)
#print(class(args))
args = as.data.frame(strsplit(args, split = "="), stringsAsFactors = F)
#print(args)
binRCfile = args[2,match("binRCfile", as.character(args[1,]))]
BRfile = args[2,match("BRfile", as.character(args[1,]))]
infoFile = args[2,match("infoFile", as.character(args[1,]))]
stateFile = args[2,match("stateFile", as.character(args[1,]))]
outputDir = args[2,match("outputDir", as.character(args[1,]))]
bin.size = as.numeric(args[2,match("bin.size", as.character(args[1,]))])
K = as.numeric(args[2,match("K", as.character(args[1,]))])
maximumCN = as.numeric(args[2,match("maximumCN", as.character(args[1,]))])
haplotypeMode=F
if (any(as.character(args[1,])=="haplotypeMode")){haplotypeMode = T}
print(paste("binRCfile =", binRCfile))
print(paste("BRfile =", BRfile))
print(paste("infoFile =", infoFile))
print(paste("stateFile =", stateFile))
print(paste("outputDir =", outputDir))
print(paste("bin.size =", bin.size))
print(paste("K =", K))
print(paste("maximumCN =", maximumCN))
p = data.table::fread(infoFile)$nb_p[1]
l <- changeRCformat(binRCfile, outputDir, p = p)
cellNames <- l$cellNames
initial.binRC <- l$binRC
r = matrix(rep(l$r, K), ncol = length(cellNames), byrow = T)
# report a warning if some cells are totally removed (because of having SCEs in all chrs)
if (length(r) != K*length(cellNames))
{
message("Warning! The dimension of dispersion parameters and the number
of cells and chromosomes don't match.
Some cells may have been removed completely!")
}
f <- factor(initial.binRC$chromosome, levels=unique(initial.binRC$chromosome))
binRC <- split(initial.binRC, f)
cellTypes = changeCellTypesFormat(stateFile, cellNames)
#NBparams = changeNBparamsFormat(infoFile, K, cellNames)
#p = NBparams[[1]]
#r = NBparams[[2]]
segmentsCounts = getSegReadCounts(binRC, BRfile, K, bin.size)
SVcalling_wrapperFunc(bin.size, K, maximumCN, segmentsCounts, r, p, cellTypes, outputDir, haplotypeMode = haplotypeMode)
dir = "/home/maryam/research/HDhackathon/data/test_small_data/"
#Rscript pipeline.R binRCfile="/home/maryam/research/HDhackathon/data/test_small_data/D2Rfb.100000_fixed.txt.gz" BRfile="/home/maryam/research/HDhackathon/data/test_small_data/few_brs.txt" infoFile="/home/maryam/research/HDhackathon/data/test_small_data/D2Rfb.100000_fixed.info" stateFile="/home/maryam/research/HDhackathon/data/test_small_data/D2Rfb.final.txt" outputDir="/home/maryam/research/HDhackathon/data/test_small_data/" bin.size=100000 K=24 maximumCN=4
binRCfile="/home/maryam/research/HDhackathon/data/test_small_data/D2Rfb.100000_fixed.txt.gz"
BRfile="/home/maryam/research/HDhackathon/data/test_small_data/few_brs.txt"
infoFile="/home/maryam/research/HDhackathon/data/test_small_data/D2Rfb.100000_fixed.info"
stateFile="/home/maryam/research/HDhackathon/data/test_small_data/D2Rfb.final.txt"
outputDir="/home/maryam/research/HDhackathon/data/test_small_data/"
bin.size=100000
K=24
maximumCN=4
haplotypeMode=F
binRCfile="/home/maryam/research/hackathons/data/HG00514/HG00514.500000_fixed.txt.gz"
BRfile="/home/maryam/research/hackathons/data/HG00514/HG00514.500000_fixed.many.txt"
infoFile="/home/maryam/research/hackathons/data/HG00514/HG00514.500000_fixed.info"
stateFile="/home/maryam/research/hackathons/data/HG00514/HG00514.final.txt"
outputDir="/home/maryam/research/hackathons/data/HG00514/"
bin.size=500000
K=22
maximumCN=4