Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Encode horizontal deltas using +1/-1 indicators for up to 20% speed gain #214

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 31 additions & 18 deletions edlib/src/edlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,46 +401,45 @@ static inline unsigned char* createReverseCopy(const unsigned char* const seq, c
* @param [in] Pv Bitset, Pv[i] == 1 if vin is +1, otherwise Pv[i] == 0.
* @param [in] Mv Bitset, Mv[i] == 1 if vin is -1, otherwise Mv[i] == 0.
* @param [in] Eq Bitset, Eq[i] == 1 if match, 0 if mismatch.
* @param [in] hin Will be +1, 0 or -1.
* @param [in] Phin Bitset, 00..01 when hin == +1.
* @param [in] Mhin Bitset, 00..01 when hin == -1.
* @param [out] PvOut Bitset, PvOut[i] == 1 if vout is +1, otherwise PvOut[i] == 0.
* @param [out] MvOut Bitset, MvOut[i] == 1 if vout is -1, otherwise MvOut[i] == 0.
* @param [out] hout Will be +1, 0 or -1.
* @param [out] Phout Bitset, 00..01 when hout == +1.
* @param [out] Mhout Bitset, 00..01 when hout == -1.
*/
static inline int calculateBlock(Word Pv, Word Mv, Word Eq, const int hin,
static inline pair<Word, Word> calculateBlock(Word Pv, Word Mv, Word Eq, const Word Phin, const Word Mhin,
Word &PvOut, Word &MvOut) {
// hin can be 1, -1 or 0.
// 1 -> 00...01
// 0 -> 00...00
// -1 -> 11...11 (2-complement)

Word hinIsNeg = static_cast<Word>(hin >> 2) & WORD_1; // 00...001 if hin is -1, 00...000 if 0 or 1

Word Xv = Eq | Mv;
// This is instruction below written using 'if': if (hin < 0) Eq |= (Word)1;
Eq |= hinIsNeg;
Eq |= Mhin;
Word Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq;

Word Ph = Mv | ~(Xh | Pv);
Word Mh = Pv & Xh;

int hout = 0;
// This is instruction below written using 'if': if (Ph & HIGH_BIT_MASK) hout = 1;
hout = (Ph & HIGH_BIT_MASK) >> (WORD_SIZE - 1);
Word Phout = Ph >> (WORD_SIZE - 1);
// This is instruction below written using 'if': if (Mh & HIGH_BIT_MASK) hout = -1;
hout -= (Mh & HIGH_BIT_MASK) >> (WORD_SIZE - 1);
Word Mhout = Mh >> (WORD_SIZE - 1);

Ph <<= 1;
Mh <<= 1;

// This is instruction below written using 'if': if (hin < 0) Mh |= (Word)1;
Mh |= hinIsNeg;
Mh |= Mhin;
// This is instruction below written using 'if': if (hin > 0) Ph |= (Word)1;
Ph |= static_cast<Word>((hin + 1) >> 1);
Ph |= Phin;

PvOut = Mh | ~(Xv | Ph);
MvOut = Ph & Xv;

return hout;
return {Phout, Mhout};
}

/**
Expand Down Expand Up @@ -581,17 +580,22 @@ static int myersCalcEditDistanceSemiGlobal(

int bestScore = -1;
vector<int> positions; // TODO: Maybe put this on heap?
const int startHout = mode == EDLIB_MODE_HW ? 0 : 1; // If 0 then gap before query is not penalized;
const Word PstartHout = mode == EDLIB_MODE_HW ? 0 : 1; // If 0 then gap before query is not penalized;
const unsigned char* targetChar = target;
for (int c = 0; c < targetLength; c++) { // for each column
const Word* Peq_c = Peq + (*targetChar) * maxNumBlocks;

//----------------------- Calculate column -------------------------//
int hout = startHout;
Word Phout = PstartHout;
Word Mhout = 0;
int hout = 0;
bl = blocks + firstBlock;
Peq_c += firstBlock;
for (int b = firstBlock; b <= lastBlock; b++) {
hout = calculateBlock(bl->P, bl->M, *Peq_c, hout, bl->P, bl->M);
pair<Word,Word> PMhout = calculateBlock(bl->P, bl->M, *Peq_c, Phout, Mhout, bl->P, bl->M);
Phout = PMhout.first;
Mhout = PMhout.second;
hout = Phout - Mhout;
bl->score += hout;
bl++; Peq_c++;
}
Expand All @@ -605,7 +609,8 @@ static int myersCalcEditDistanceSemiGlobal(
lastBlock++; bl++; Peq_c++;
bl->P = static_cast<Word>(-1); // All 1s
bl->M = static_cast<Word>(0);
bl->score = (bl - 1)->score - hout + WORD_SIZE + calculateBlock(bl->P, bl->M, *Peq_c, hout, bl->P, bl->M);
pair<Word, Word> PMhout = calculateBlock(bl->P, bl->M, *Peq_c, Phout, Mhout, bl->P, bl->M);
bl->score = (bl - 1)->score - hout + WORD_SIZE + PMhout.first - PMhout.second;
} else {
while (lastBlock >= firstBlock && bl->score >= k + WORD_SIZE) {
lastBlock--; bl--; Peq_c--;
Expand Down Expand Up @@ -780,9 +785,14 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int

//----------------------- Calculate column -------------------------//
int hout = 1;
Word Phout = 1;
Word Mhout = 0;
bl = blocks + firstBlock;
for (int b = firstBlock; b <= lastBlock; b++) {
hout = calculateBlock(bl->P, bl->M, Peq_c[b], hout, bl->P, bl->M);
pair<Word, Word> PMhout = calculateBlock(bl->P, bl->M, Peq_c[b], Phout, Mhout, bl->P, bl->M);
Phout = PMhout.first;
Mhout = PMhout.second;
hout = Phout - Mhout;
bl->score += hout;
bl++;
}
Expand All @@ -806,7 +816,10 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int
lastBlock++; bl++;
bl->P = static_cast<Word>(-1); // All 1s
bl->M = static_cast<Word>(0);
int newHout = calculateBlock(bl->P, bl->M, Peq_c[lastBlock], hout, bl->P, bl->M);
pair<Word, Word> PMhout = calculateBlock(bl->P, bl->M, Peq_c[lastBlock], Phout, Mhout, bl->P, bl->M);
Phout = PMhout.first;
Mhout = PMhout.second;
int newHout = Phout - Mhout;
bl->score = (bl - 1)->score - hout + WORD_SIZE + newHout;
hout = newHout;
}
Expand Down