-
Notifications
You must be signed in to change notification settings - Fork 0
/
reverse_complement.pl
executable file
·87 lines (68 loc) · 1.57 KB
/
reverse_complement.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#! /usr/bin/perl -w
use strict;
# hash used to translate DNA symbols into complements
my %tr_hash = qw (
A T
a t
T A
t a
G C
g c
C G
c g
N N
n n
M K
m k
R Y
r r
W W
w w
S S
s s
K R
k r
Y M
y m
);
my($filename) = @ARGV;
my $usage = "USAGE: ./reverse_complement <fasta_file>
reverse complements sequences in fasta or multi
fasta file containing DNA symbols, 2 place
ambiguity codes and Ns (everything but 3 place
ambiguity codes)\n";
die $usage unless (@ARGV == 1);
my %hash;
read_fasta($filename, \%hash);
# note: this will not maintain the ordering of the original multi fasta file
foreach my $header (keys %hash) {
my $sequence = $hash{$header};
my $reverse_sequence = reverse($sequence);
my $reverse_complement = "";
my $length = length($sequence);
# yeah it's inefficent but it gets the job done
for(my $i=0;$i<$length;$i++) {
my $character = substr($reverse_sequence, $i, 1);
if(!defined($tr_hash{$character})) {
die("unexpected DNA symbol encountered $character... exiting");
}
$reverse_complement .= $tr_hash{$character};
}
print ">${header}_reverse_complement\n";
print $reverse_complement, "\n";
}
sub read_fasta {
my($filename, $hash_ref) = @_;
open(FASTA, $filename) || die "couldnt open fasta file $filename\n";
chomp(my @fasta = <FASTA>);
my $read_name;
foreach my $line (@fasta) {
if($line =~ /^>(.*)/) {
$read_name = $1;
}
else {
$hash_ref->{$read_name} .= $line;
}
}
close(FASTA);
}