forked from deweylab/RSEM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SingleReadQ.h
97 lines (79 loc) · 2.61 KB
/
SingleReadQ.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#ifndef SINGLEREADQ
#define SINGLEREADQ
#include<cmath>
#include<cstdio>
#include<cstdlib>
#include<cassert>
#include<string>
#include<iostream>
#include "utils.h"
#include "Read.h"
class SingleReadQ : public Read {
public:
SingleReadQ() { readseq = qscore = ""; len = 0; }
SingleReadQ(const std::string& name, const std::string& readseq, const std::string& qscore) {
this->name = name;
this->readseq = readseq;
this->qscore = qscore;
this->len = readseq.length();
}
bool read(int argc, std::istream* argv[], int flags = 7);
void write(int argc, std::ostream* argv[]);
int getReadLength() const { return len; }
const std::string& getReadSeq() const { return readseq; }
const std::string& getQScore() const { return qscore; }
void calc_lq(bool, int); // calculate if this read is low quality. Without calling this function, isLowQuality() will always be false
private:
int len; // read length
std::string readseq, qscore; // qscore : quality scores
};
bool SingleReadQ::read(int argc, std::istream* argv[], int flags) {
std::string line;
assert(argc == 1);
if (!getline((*argv[0]), line)) return false;
if (line[0] != '@') { fprintf(stderr, "Read file does not look like a FASTQ file!\n"); exit(-1); }
name = "";
if (flags & 4) { name = line.substr(1); }
if (!getline((*argv[0]), readseq)) return false;
len = readseq.length();
if (!(flags & 1)) { readseq = ""; }
if (!getline((*argv[0]), line)) return false;
if (line[0] != '+') { fprintf(stderr, "Read file does not look like a FASTQ file!\n"); exit(-1); }
if (!getline((*argv[0]), qscore)) return false;
if (!(flags & 2)) { qscore = ""; }
return true;
}
void SingleReadQ::write(int argc, std::ostream* argv[]) {
assert(argc == 1);
(*argv[0])<<"@"<<name<<std::endl<<readseq<<std::endl<<"+\n"<<qscore<<std::endl;
}
//calculate if this read is low quality
void SingleReadQ::calc_lq(bool hasPolyA, int seedLen) {
low_quality = false;
if (len < seedLen) { low_quality = true; return; }
// if no polyA, no need to do the following calculation
if (!hasPolyA) return;
assert(readseq != "");
int numA = 0, numT = 0, numAO = 0, numTO = 0; // numAO : number of A in overlap seed region
int threshold_1, threshold_2;
threshold_1 = int(0.9 * len - 1.5 * sqrt(len * 1.0) + 0.5);
threshold_2 = (OLEN - 1) / 2 + 1;
for (int i = 0; i < len; i++) {
if (readseq[i] == 'A') {
++numA;
if (i < OLEN) ++numAO;
}
if (readseq[i] == 'T') {
++numT;
if (i >= len - OLEN) ++numTO;
}
}
if (numA >= threshold_1) {
low_quality = (numAO >= threshold_2);
}
else if (numT >= threshold_1) {
low_quality = (numTO >= threshold_2);
}
else low_quality = false;
}
#endif