-
Notifications
You must be signed in to change notification settings - Fork 0
/
leukemia.R
58 lines (47 loc) · 1.22 KB
/
leukemia.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
data= read.csv("C:/Users/shiva/Desktop/shivani college/rgitbt/cancer genomics/leukemia.csv")
data
dim(data)
countdata <- data[,-1]
countdata[,20] = NULL
head(countdata)
# Store Genename as rownames
rownames(countdata) <- data[,1]
head(countdata)
#Convert counts to DGEList object
library(edgeR)
library(limma)
y <- DGEList(countdata)
print(head(y))
# See what slots are stored in y
names(y)
cpm_data=cpm(y)
cpm_data
log_data<- cpm(countdata, log=TRUE)
log_data
saveRDS(log_data,file="log.RDS")
View(log_data)
mat= as.matrix(log_data)
mat
boxplot(log_data) # data is normalised
#Z= (value - mean)/ (Standard Deviation)
for(i in 1:nrow(mat)){
vec= as.numeric(mat[i,])
mat[i, 1:ncol(mat)] = (vec-mean(vec))/sd(vec)
}
View(mat)
hm= Heatmap(mat,col= colorRamp2(c(-2,0,2),c("green","white","yellow")))
#z_value=apply(mat,1, function(x) (x - mean(x)) / sd(x))
#z_value
#summary(z_value)
#variance
var_data=apply(log_data,1,var)
sort_var=sort(var_data,decreasing = TRUE)
head(sort_var)
top_gene= sort_var[1:50]
top_gene
mat1=mat[names(top_gene),]
mat1
names(top_gene)
library(ComplexHeatmap)
library(circlize)
h1= Heatmap(mat1,col= colorRamp2(c(-2,0,2),c("green","white","yellow")))