-
Notifications
You must be signed in to change notification settings - Fork 0
/
PairwiseSequenceAlignment.R
74 lines (69 loc) · 3.38 KB
/
PairwiseSequenceAlignment.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#FROM BIOINFORMATICS R COOKBOOK PAGES 69-72
#NUCLEOTIDE SEQUENCE ALIGNMENT
if (!requireNamespace('BiocManager', quietly = TRUE))
install.packages('BiocManager')
BiocManager::install('Biostrings')#install Biostrings, a package in R
#source("http://bioconductor.org/biocLite.R") #source is a function that brings
#this url into your workspace makes you access
#biocLite("Biostrings") #biocLite, a function helps you install Biostrings
#old version
library(Biostrings) #library, a function is required to call a function
#align two sequence nucleotide sequences of different sizes first define
#to find similarities, identities, mismatches, match, gaps, one sequence is long
#the other is short; to make align i need to insert gaps
sequence1 <- "GAATTCGGCTA"
class(sequence1)
sequence2 <- "GATTACCTA"
#calculate a score will be made up of Number of matches, number mismatches,
#gaps; each a value,
#gaps, two kind: open a gap, extend the gap that i opened, 4nt is more
#dangerous than 3nt, if coding sequence extending in 3s might one aa,
#frameshift, nonsense, opening gaps cost more than extending, negative
#SCORE=sum(matches, mismatches, gapopen, gapclose) gap penalty
#there are multiple ways to align two sequence that they are homologous
#a score that i will calculate, match, mismatch, gap
# scoring matrix needs to created by the function below, provide scores
#score means a value made up of (total number matches)*match score
#-(total number of mismatches)*mismatch score-(total number of gapopen)
#-(total number gapextend)
myScoringMat <- nucleotideSubstitutionMatrix(match = 1,
mismatch = -1,
baseOnly = FALSE)
myScoringMat
?nucleotideSubstitutionMatrix
gapOpen <- 4 #gap penalty in my score function it will be substracted
gapExtend <- 1
myAlignment <- pairwiseAlignment(sequence1, sequence2,
substitutionMatrix = myScoringMat,
gapOpening = gapOpen,
gapExtension = gapExtend,
type="global",
scoreOnly = FALSE)
myAlignment
myAlignment <- pairwiseAlignment(sequence1, sequence2,
substitutionMatrix = myScoringMat,
gapOpening = gapOpen,
gapExtension = gapExtend,
type="local",
scoreOnly = FALSE)
myAlignment
#PROTEIN SEQUENCE ALIGNMENT
data(package="Biostrings")
data(BLOSUM62)
subMat <- "BLOSUM62"
subMat
sequence1 <- "PAWHEAE"
sequence2 <- "HEAGAWGHE"
myAlignProt <- pairwiseAlignment(sequence1, sequence2,
substitutionMatrix = subMat,
gapOpening = gapOpen,
gapExtension = gapExtend,
type="global",
scoreOnly = FALSE)
myAlignProt
myAlignProt <- pairwiseAlignment(sequence1, sequence2,
substitutionMatrix = subMat,
gapOpening = gapOpen,
gapExtension = gapExtend,
type="local", scoreOnly = FALSE)
myAlignProt