From f2a465332c1bd240b528eaf72b7e561b2e809e86 Mon Sep 17 00:00:00 2001 From: Ragnar Groot Koerkamp Date: Thu, 6 Apr 2023 19:40:17 +0200 Subject: [PATCH] Encode horizontal deltas using +1/-1 indicators --- edlib/src/edlib.cpp | 49 ++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/edlib/src/edlib.cpp b/edlib/src/edlib.cpp index e19f119..76625e8 100644 --- a/edlib/src/edlib.cpp +++ b/edlib/src/edlib.cpp @@ -401,46 +401,45 @@ static inline unsigned char* createReverseCopy(const unsigned char* const seq, c * @param [in] Pv Bitset, Pv[i] == 1 if vin is +1, otherwise Pv[i] == 0. * @param [in] Mv Bitset, Mv[i] == 1 if vin is -1, otherwise Mv[i] == 0. * @param [in] Eq Bitset, Eq[i] == 1 if match, 0 if mismatch. - * @param [in] hin Will be +1, 0 or -1. + * @param [in] Phin Bitset, 00..01 when hin == +1. + * @param [in] Mhin Bitset, 00..01 when hin == -1. * @param [out] PvOut Bitset, PvOut[i] == 1 if vout is +1, otherwise PvOut[i] == 0. * @param [out] MvOut Bitset, MvOut[i] == 1 if vout is -1, otherwise MvOut[i] == 0. - * @param [out] hout Will be +1, 0 or -1. + * @param [out] Phout Bitset, 00..01 when hout == +1. + * @param [out] Mhout Bitset, 00..01 when hout == -1. */ -static inline int calculateBlock(Word Pv, Word Mv, Word Eq, const int hin, +static inline pair calculateBlock(Word Pv, Word Mv, Word Eq, const Word Phin, const Word Mhin, Word &PvOut, Word &MvOut) { // hin can be 1, -1 or 0. // 1 -> 00...01 // 0 -> 00...00 // -1 -> 11...11 (2-complement) - Word hinIsNeg = static_cast(hin >> 2) & WORD_1; // 00...001 if hin is -1, 00...000 if 0 or 1 - Word Xv = Eq | Mv; // This is instruction below written using 'if': if (hin < 0) Eq |= (Word)1; - Eq |= hinIsNeg; + Eq |= Mhin; Word Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; Word Ph = Mv | ~(Xh | Pv); Word Mh = Pv & Xh; - int hout = 0; // This is instruction below written using 'if': if (Ph & HIGH_BIT_MASK) hout = 1; - hout = (Ph & HIGH_BIT_MASK) >> (WORD_SIZE - 1); + Word Phout = Ph >> (WORD_SIZE - 1); // This is instruction below written using 'if': if (Mh & HIGH_BIT_MASK) hout = -1; - hout -= (Mh & HIGH_BIT_MASK) >> (WORD_SIZE - 1); + Word Mhout = Mh >> (WORD_SIZE - 1); Ph <<= 1; Mh <<= 1; // This is instruction below written using 'if': if (hin < 0) Mh |= (Word)1; - Mh |= hinIsNeg; + Mh |= Mhin; // This is instruction below written using 'if': if (hin > 0) Ph |= (Word)1; - Ph |= static_cast((hin + 1) >> 1); + Ph |= Phin; PvOut = Mh | ~(Xv | Ph); MvOut = Ph & Xv; - return hout; + return {Phout, Mhout}; } /** @@ -581,17 +580,22 @@ static int myersCalcEditDistanceSemiGlobal( int bestScore = -1; vector positions; // TODO: Maybe put this on heap? - const int startHout = mode == EDLIB_MODE_HW ? 0 : 1; // If 0 then gap before query is not penalized; + const Word PstartHout = mode == EDLIB_MODE_HW ? 0 : 1; // If 0 then gap before query is not penalized; const unsigned char* targetChar = target; for (int c = 0; c < targetLength; c++) { // for each column const Word* Peq_c = Peq + (*targetChar) * maxNumBlocks; //----------------------- Calculate column -------------------------// - int hout = startHout; + Word Phout = PstartHout; + Word Mhout = 0; + int hout = 0; bl = blocks + firstBlock; Peq_c += firstBlock; for (int b = firstBlock; b <= lastBlock; b++) { - hout = calculateBlock(bl->P, bl->M, *Peq_c, hout, bl->P, bl->M); + pair PMhout = calculateBlock(bl->P, bl->M, *Peq_c, Phout, Mhout, bl->P, bl->M); + Phout = PMhout.first; + Mhout = PMhout.second; + hout = Phout - Mhout; bl->score += hout; bl++; Peq_c++; } @@ -605,7 +609,8 @@ static int myersCalcEditDistanceSemiGlobal( lastBlock++; bl++; Peq_c++; bl->P = static_cast(-1); // All 1s bl->M = static_cast(0); - bl->score = (bl - 1)->score - hout + WORD_SIZE + calculateBlock(bl->P, bl->M, *Peq_c, hout, bl->P, bl->M); + pair PMhout = calculateBlock(bl->P, bl->M, *Peq_c, Phout, Mhout, bl->P, bl->M); + bl->score = (bl - 1)->score - hout + WORD_SIZE + PMhout.first - PMhout.second; } else { while (lastBlock >= firstBlock && bl->score >= k + WORD_SIZE) { lastBlock--; bl--; Peq_c--; @@ -780,9 +785,14 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int //----------------------- Calculate column -------------------------// int hout = 1; + Word Phout = 1; + Word Mhout = 0; bl = blocks + firstBlock; for (int b = firstBlock; b <= lastBlock; b++) { - hout = calculateBlock(bl->P, bl->M, Peq_c[b], hout, bl->P, bl->M); + pair PMhout = calculateBlock(bl->P, bl->M, Peq_c[b], Phout, Mhout, bl->P, bl->M); + Phout = PMhout.first; + Mhout = PMhout.second; + hout = Phout - Mhout; bl->score += hout; bl++; } @@ -806,7 +816,10 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int lastBlock++; bl++; bl->P = static_cast(-1); // All 1s bl->M = static_cast(0); - int newHout = calculateBlock(bl->P, bl->M, Peq_c[lastBlock], hout, bl->P, bl->M); + pair PMhout = calculateBlock(bl->P, bl->M, Peq_c[lastBlock], Phout, Mhout, bl->P, bl->M); + Phout = PMhout.first; + Mhout = PMhout.second; + int newHout = Phout - Mhout; bl->score = (bl - 1)->score - hout + WORD_SIZE + newHout; hout = newHout; }