-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_adegenet.r
132 lines (107 loc) · 4.11 KB
/
my_adegenet.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
library(adegenet)
library(ape)
library(hierfstat)
library(pegas)
library(poppr)
#Imports
names(d) <- gsub("\\.", "_", names(d)) #remove dots
x<-df2genind(d[4:38], ploidy=2, ind.names=d$short_name, sep = "", NA.char="NN")
#x<-df2genind(d[3:126], ploidy=2, ind.names=d$ind, pop=d$pop, sep=":", NA.char="NA")
#x <- read.genetix(file="C:/Users/alexa/Matis/data/David_17_03_20/AlexPOP2GTX-GENETIX.gtx")
x<-df2genind(d[3:11], ploidy=2, ind.names=d$ind, pop=d$pop, sep="", NA.char="N")
#Presence/absence @type not codom but PA
d <- data.frame(lapply(d, function(x)
{gsub("NN", "0", x)})) #NN into 0
d[4:21] <- data.frame(lapply(d[4:21], function(x)
{as.integer(x!="0")}))
x<-df2genind(d[4:5], ploidy=1, ind.names=d$short_name, type = "PA")
pca1 <- dudi.pca(x, scale = FALSE, scannf = FALSE)
plot(pca1$li, cex = 1)
abline(h=0,v=0,col="grey",lty=2)
s.arrow(pca1$c1*.2, add.plot=TRUE)
#Basics
informloci(x) -> x #uninformative loci, drop them
summary(x) -> toto
names(toto)
#x <- x[,loc=c('AX_87532311', 'AX_96317592', 'AX_87665320', 'AX_87220302', 'AX_87268437', 'AX_87570456')]
#select loci
plot(toto$n.by.pop, toto$pop.n.all, xlab="Colonies sample size",
ylab="Number of alleles",main="Alleles numbers and sample sizes",
type="n")
text(toto$n.by.pop,toto$pop.n.all,lab=names(toto$n.by.pop))
barplot(toto$loc.n.all, ylab="Number of alleles",
main="Number of alleles per locus")
barplot(toto$Hexp-toto$Hobs, main="Heterozygosity: expected-observed",
ylab="Hexp - Hobs")
barplot(toto$n.by.pop, main="Sample sizes per population",
ylab="Number of genotypes",las=3)
#Is mean observed H significantly lower than mean expected H?
bartlett.test(list(toto$Hexp,toto$Hobs))
#reject homogeneity if p < .05
t.test(toto$Hexp,toto$Hobs,pair=T,var.equal=TRUE,alter="greater")
#significative difference if p < .05
#HWT
x.hwt <- hw.test(x, B=0) #0 for parametric otherwise second test Monte-Carlos
x.hwt #each locus
#reject equilibrium if Pr < .05
#F statistics
fstat(x) #overall
#Fst (pop/total), Fit (Ind/total), Fis(ind/pop)
Fst(as.loci(x))
#Are these values significant?
Gtest <- gstat.randtest(x, nsim = 99)
Gtest
plot(Gtest)
matFst <- pairwise.fst(x)
matFst
is.euclid(matFst)
#PCA requires Euclideanity
#PCA
sum(is.na(x$tab)) #nb NA
X <- tab(x, freq = TRUE, NA.method = "mean")
#tab is allele counts/frequencies
#replace NAs with mean allele frequency
pca1 <- dudi.pca(X, scale = FALSE, scannf = FALSE, nf = 3)
#disable scaling bc all alleles vary on same scale
#remove two last args for interactive selection nb axes
barplot(pca1$eig[1:50], main = "PCA eigenvalues", col = heat.colors(50))
names (pca1)
#principal components of analysis ; synthetic variables summarizing genetic diversity
s.label(pca1$li)
title("PCA of X dataset\naxes 1-2")
add.scatter.eig(pca1$eig[1:20], 3,1,2)
s.class(pca1$li, pop(x))
title("PCA of X dataset\naxes 1-3")
add.scatter.eig(pca1$eig[1:20], 3,1,2)
#distance and color represent genetic diff
colorplot(pca1$li, pca1$li, transp=TRUE, cex=3, xlab="PC 1", ylab="PC 2")
title("PCA of x dataset\naxes 1-2")
abline(v=0,h=0,col="grey", lty=2)
colorplot(pca1$li[c(1,3)], pca1$li, transp=TRUE, cex=3, xlab="PC 1", ylab="PC 3")
title("PCA of x dataset\naxes 1-3")
abline(v=0,h=0,col="grey", lty=2)
#$c1 allele loadings
#DAPC
#find clusters
grp <- find.clusters(x)
#keep maximum PCs to keep all information
#elbow matches nb clusters
names(grp)
table(pop(x), grp$grp) #access actual groups with pop
table.value(table(pop(x), grp$grp), col.lab=paste("inf", 1:2),
row.lab=paste("ori", 1:2))
#browser version but first change data to genetix format
adegenetServer(what=c("DAPC"))
#manual
#uses pre-defined grps!
dapc1 <- dapc(x)
#retain less PCs is better
#ok to retain all discriminants if few
scatter (dapc1)
scatter (dapc1, posi.da="bottomright", scree.pca=TRUE,
posi.pca="bottomleft")
#contribution of alleles
contrib <- loadingplot(dapc1$var.contr, threshold = 0.015)
#grp membership #prior/posterior
assignplot(dapc1, subset =)
#blue cross is prior pre-defined attribution