-
Notifications
You must be signed in to change notification settings - Fork 1
/
teltale.cpp
129 lines (101 loc) · 3.55 KB
/
teltale.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//------------------------------------------------------------------------------------
//
// teltale.cpp - program to compute the fraction of telomeric reads in a BAM file;
// a read is considered telomeric if it contains at least
// MIN_OCCURRENCES consecutive occurrences of the telomere motif;
// reads marked as duplicate or failed QC are excluded from the
// computation
//
// Author: Stephen V. Rice, Ph.D.
//
// Copyright 2017 St. Jude Children's Research Hospital
//
//------------------------------------------------------------------------------------
#include <cstdint>
#include <cstring>
#include <iostream>
#include <stdexcept>
#include <string>
#include "api/BamReader.h"
const std::string VERSION = "teltale 2.0";
const int MIN_OCCURRENCES = 7;
const int MOTIF_LENGTH = 6;
const int TARGET_LENGTH = MIN_OCCURRENCES * MOTIF_LENGTH;
const char *MOTIF_FORWARD = "TTAGGG"; // telomere motif
const char *MOTIF_REVERSE = "CCCTAA"; // reverse complement
char targetForward[TARGET_LENGTH + 1]; // sequence of telomere motifs
char targetReverse[TARGET_LENGTH + 1]; // sequence of reverse complements
uint64_t numReads = 0, numTelomericReads = 0; // counters
//------------------------------------------------------------------------------------
// setupTarget() initializes a target sequence
void setupTarget(char *target, const char *motif)
{
for (int i = 0; i < MIN_OCCURRENCES; i++)
{
std::strcpy(target, motif);
target += MOTIF_LENGTH;
}
}
//------------------------------------------------------------------------------------
// setupTargets() initializes both target sequences
void setupTargets()
{
setupTarget(targetForward, MOTIF_FORWARD);
setupTarget(targetReverse, MOTIF_REVERSE);
}
//------------------------------------------------------------------------------------
// isTelomeric() returns true if either the forward or reverse target can be found
// within the read sequence
bool isTelomeric(const char *readSequence)
{
return (std::strstr(readSequence, targetForward) ||
std::strstr(readSequence, targetReverse));
}
//------------------------------------------------------------------------------------
// readBamFile() reads a BAM file and counts telomeric reads
void readBamFile(const std::string& filename)
{
BamTools::BamReader bamReader;
if (!bamReader.Open(filename))
throw std::runtime_error("unable to open " + filename);
BamTools::BamAlignment alignment;
while (bamReader.GetNextAlignment(alignment))
if (!alignment.IsFailedQC() && !alignment.IsDuplicate())
{
numReads++;
if (isTelomeric(alignment.QueryBases.c_str()))
numTelomericReads++;
}
bamReader.Close();
}
//------------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
if (argc != 2 || argv[1][0] == '-')
{
std::cout << VERSION << std::endl << std::endl;
std::cout << "Usage: " << argv[0]
<< " bamfilename"
<< std::endl;
return 1;
}
try
{
setupTargets();
std::string bamfilename = argv[1];
readBamFile(bamfilename);
double fraction = (numReads == 0 ? 0.0 :
numTelomericReads / static_cast<double>(numReads));
std::cout << numReads
<< "\t" << numTelomericReads
<< "\t" << fraction
<< "\t" << bamfilename
<< std::endl;
}
catch (const std::runtime_error& error)
{
std::cerr << argv[0] << ": " << error.what() << std::endl;
return 1;
}
return 0;
}