Skip to content

Commit

Permalink
fixes #40
Browse files Browse the repository at this point in the history
  • Loading branch information
marekkokot committed May 10, 2018
1 parent 6a2e2dc commit d1dfea4
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 189 deletions.
128 changes: 73 additions & 55 deletions kmc_tools/fastq_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,23 +166,17 @@ bool CFastqReader::GetPart(uchar *&_part, uint64 &_size)

// Read data
if(mode == m_plain)
readed = fread(part+part_filled, 1, part_size, in);
readed = fread(part+part_filled, 1, part_size - part_filled, in);
else if(mode == m_gzip)
readed = gzread(in_gzip, part+part_filled, (int) part_size);
readed = gzread(in_gzip, part+part_filled, (int) (part_size - part_filled));
else if(mode == m_bzip2)
readed = BZ2_bzRead(&bzerror, in_bzip2, part+part_filled, (int) part_size);
readed = BZ2_bzRead(&bzerror, in_bzip2, part+part_filled, (int) (part_size - part_filled));
else
readed = 0; // Never should be here

int64 total_filled = part_filled + readed;
int64 i;

if(part_filled >= OVERHEAD_SIZE)
{
cerr << "Error: Wrong input file!\n";
exit(1);
}

if(IsEof())
{
_part = part;
Expand All @@ -196,70 +190,83 @@ bool CFastqReader::GetPart(uchar *&_part, uint64 &_size)
if(file_type == CFilteringParams::file_type::fasta) // FASTA files
{
// Looking for a FASTA record at the end of the area
int64 line_start[3];
int32 j;

i = total_filled - OVERHEAD_SIZE / 2;
for(j = 0; j < 3; ++j)
i = total_filled - 1;
int64 start, end;
int64 line_start[4], line_end[4];
int readed_lines = 0;
bool success = false;
int k;
while (i >= 0 && readed_lines < 4)
{
if(!SkipNextEOL(part, i, total_filled))
break;
line_start[j] = i;
}
GetFullLineFromEnd(start, end, part, i);

_part = part;
if(j < 3)
_size = 0;
else
{
int k;
for(k = 0; k < 2; ++k)
if(part[line_start[k]+0] == '>')
line_start[4 - readed_lines - 1] = start;
line_end[4 - readed_lines - 1] = end;
++readed_lines;

if (readed_lines >= 2)
{
k = 4 - readed_lines;
if (part[line_start[k]] == '>')
{
success = true;
break;

if(k == 2)
_size = 0;
else
_size = line_start[k];
}
}
else // FASTQ file
{
}
}
}
// Looking for a FASTQ record at the end of the area
int64 line_start[9];
int32 j;

i = total_filled - OVERHEAD_SIZE / 2;
for(j = 0; j < 9; ++j)
if (!success)
{
if(!SkipNextEOL(part, i, total_filled))
break;
line_start[j] = i;
cerr << "Error: Wrong input file!\n";
exit(1);
}

_part = part;
if(j < 9)
_size = 0;
else
_size = line_end[k + 1];
}
else
{
i = total_filled - 1;
int64 start, end;
int64 line_start[8], line_end[8];
int readed_lines = 0;
bool success = false;
int k;
while (i >= 0 && readed_lines < 8)
{
int k;
for(k = 0; k < 4; ++k)
GetFullLineFromEnd(start, end, part, i);
line_start[8 - readed_lines - 1] = start;
line_end[8 - readed_lines - 1] = end;
++readed_lines;

if (readed_lines >= 4)
{
if(part[line_start[k]+0] == '@' && part[line_start[k+2]+0] == '+')
k = 8 - readed_lines;
if (part[line_start[k]] == '@' && part[line_start[k + 2]] == '+')
{
if(part[line_start[k+2]+1] == '\n' || part[line_start[k+2]+1] == '\r')
if (part[line_start[k + 2] + 1] == '\n' || part[line_start[k + 2] + 1] == '\r')
{
success = true;
break;
if(line_start[k+1]-line_start[k] == line_start[k+3]-line_start[k+2] &&
memcmp(part+line_start[k]+1, part+line_start[k+2]+1, line_start[k+3]-line_start[k+2]-1) == 0)
}
if (line_start[k + 1] - line_start[k] == line_start[k + 3] - line_start[k + 2] &&
memcmp(part + line_start[k] + 1, part + line_start[k + 2] + 1, line_start[k + 3] - line_start[k + 2] - 1) == 0)
{
success = true;
break;
}
}
}
}

if(k == 4)
_size = 0;
else
_size = line_start[k];
if (!success)
{
cerr << "Error: Wrong input file!\n";
exit(1);
}
_part = part;
_size = line_end[k + 3];
}
// Allocate new memory for the buffer

Expand Down Expand Up @@ -287,6 +294,17 @@ bool CFastqReader::SkipNextEOL(uchar *part, int64 &pos, int64 max_pos)
return true;
}

void CFastqReader::GetFullLineFromEnd(int64& line_sart, int64& line_end, uchar* buff, int64& pos)
{
while (pos >= 0 && buff[pos] != '\n' && buff[pos] != '\r')
--pos;
line_end = pos + 1;
while (pos >= 0 && (buff[pos] == '\n' || buff[pos] == '\r'))
--pos;
while (pos >= 0 && buff[pos] != '\n' && buff[pos] != '\r')
--pos;
line_sart = pos + 1;
};
//----------------------------------------------------------------------------------
// Check whether there is an EOF
bool CFastqReader::IsEof()
Expand Down
3 changes: 3 additions & 0 deletions kmc_tools/fastq_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class CFastqReader {

bool SkipNextEOL(uchar *part, int64 &pos, int64 max_pos);

void GetFullLineFromEnd(int64& line_sart, int64& line_end, uchar* buff, int64& pos);


bool IsEof();

public:
Expand Down
10 changes: 10 additions & 0 deletions kmer_counter/bam_utils.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
/*
This file is a part of KMC software distributed under GNU GPL 3 licence.
The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 3.0.0
Date : 2017-01-28
*/

#ifndef _BAM_UTILS_H
#define _BAM_UTILS_H
#include "defs.h"
Expand Down
Loading

0 comments on commit d1dfea4

Please sign in to comment.