Skip to content

Commit

Permalink
Feature/faster decompression (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastiandeorowicz authored and agudys committed Oct 11, 2024
1 parent 45bab7f commit cdcaa0c
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 20 deletions.
4 changes: 2 additions & 2 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#include <string>
#include "params.h"

const std::string LZ_ANI_VER = "lz-ani 1.2.0";
const std::string LZ_ANI_DATE = "2024-10-09";
const std::string LZ_ANI_VER = "lz-ani 1.2.1";
const std::string LZ_ANI_DATE = "2024-10-11";
const std::string LZ_ANI_AUTHORS = "Sebastian Deorowicz, Adam Gudys";
const std::string LZ_ANI_INFO = LZ_ANI_VER + " (" + LZ_ANI_DATE + ") by " + LZ_ANI_AUTHORS;

Expand Down
18 changes: 15 additions & 3 deletions src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class CParser

CParams params;

seq_t seq_working;

seq_t seq_ref;
seq_t seq_data;
uint32_t n_ref_seqs = 0;
Expand Down Expand Up @@ -59,21 +61,31 @@ class CParser

void append(seq_t& seq, const seq_view& sv, const uint8_t allowed_N, const uint8_t forbidden_N)
{
for (uint32_t i = 0; i < sv.size(); ++i)
/* for (uint32_t i = 0; i < sv.size(); ++i)
{
auto c = sv[i];
if (c == forbidden_N)
seq.emplace_back(allowed_N);
else
seq.emplace_back(c);
}
}*/

auto prev_size = seq.size();
seq.resize(prev_size + sv.size());
sv.unpack(seq.data() + prev_size);
replace(seq.begin() + prev_size, seq.end(), forbidden_N, allowed_N);
}

void append_rc(seq_t& seq, const seq_view& sv, const uint8_t allowed_N, const uint8_t forbidden_N)
{
seq.reserve(seq.size() + sv.size());

seq_working.resize(sv.size());
sv.unpack(seq_working.data());

for (uint32_t i = 0; i < sv.size(); ++i)
{
auto c = sv[sv.size() - 1 - i];
auto c = seq_working[seq_working.size() - 1 - i];
if (c == forbidden_N)
seq.emplace_back(allowed_N);
else if (c < code_N)
Expand Down
66 changes: 51 additions & 15 deletions src/seq_reservoir.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,25 @@ class seq_view
const uint32_t len;
const internal_packing_t internal_packing;

static inline uint8_t triples[256][4];

struct _si
{
_si()
{
for(uint8_t i = 0; i < 6; ++i)
for(uint8_t j = 0; j < 6; ++j)
for (uint8_t k = 0; k < 6; ++k)
{
auto idx = 36 * i + 6 * j + k;
triples[idx][0] = i;
triples[idx][1] = j;
triples[idx][2] = k;
triples[idx][3] = 0;
}
}
} static inline _init;

public:
seq_view(const uint8_t *data = 0, const uint32_t len = 0, internal_packing_t internal_packing = internal_packing_t::none) :
data(data),
Expand Down Expand Up @@ -90,21 +109,21 @@ class seq_view
if (len & 1)
dest[len / 2] = src[len - 1] << 4;
}
}*/

static void unpack2(uint8_t* dest, uint8_t* src, uint32_t len)
void unpack2(uint8_t* dest) const
{
for (uint32_t i = 0; i < len / 2; ++i)
{
dest[2 * i] = src[i] >> 4;
dest[2 * i + 1] = src[i] & 0xf;
dest[2 * i] = data[i] >> 4;
dest[2 * i + 1] = data[i] & 0xf;
}

if (len & 1)
dest[len - 1] = src[len / 2] >> 4;
dest[len - 1] = data[len / 2] >> 4;
}

static void pack3(uint8_t* dest, uint8_t* src, uint32_t len)
/* static void pack3(uint8_t* dest, uint8_t* src, uint32_t len)
{
for (uint32_t i = 0; i < len / 3; ++i)
dest[i] = 36 * src[3 * i] + 6 * src[3 * i + 1] + src[3 * i + 2];
Expand All @@ -121,32 +140,49 @@ class seq_view
// Nothing
break;
}
}
}*/

static void unpack3(uint8_t * dest, uint8_t * src, uint32_t len)
void unpack3(uint8_t *dest) const
{
uint32_t len_div_3 = len / 3;

for (uint32_t i = 0; i < len_div_3; ++i)
{
dest[3 * i] = src[i] / 36;
dest[3 * i + 1] = src[i] / 6 - 6 * dest[3 * i];
dest[3 * i + 2] = src[i] - 36 * dest[3 * i] - 6 * dest[3 * i + 1];
dest[3 * i] = triples[data[i]][0];
dest[3 * i + 1] = triples[data[i]][1];
dest[3 * i + 2] = triples[data[i]][2];
}

switch (len % 3)
{
case 2:
dest[len - 2] = src[len_div_3] / 6;
dest[len - 1] = src[len_div_3] - 6 * dest[len - 2];
dest[len - 2] = triples[data[len_div_3]][0];
dest[len - 1] = triples[data[len_div_3]][1];
break;
case 1:
dest[len - 1] = src[len_div_3];
dest[len - 1] = triples[data[len_div_3]][0];
break;
case 0:
// Nothing
break;
}
}*/
}

void unpack(uint8_t* dest) const
{
switch (internal_packing)
{
case internal_packing_t::none:
copy_n(data, len, dest);
break;
case internal_packing_t::two_in_byte:
unpack2(dest);
break;
case internal_packing_t::three_in_byte:
unpack3(dest);
break;
}
}
};

class CSeqReservoir
Expand Down

0 comments on commit cdcaa0c

Please sign in to comment.