diff --git a/main.c b/main.c index eca3a998..acd573e0 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.19-r1059-dirty" +#define MM_VERSION "2.19-r1060-dirty" #ifdef __linux__ #include @@ -296,8 +296,6 @@ int main(int argc, char *argv[]) } if (!fnw && !(opt.flag&MM_F_CIGAR)) ipt.flag |= MM_I_NO_SEQ; - if (opt.bw < opt.bw_long && (opt.flag&MM_F_RMQ)) - opt.bw = opt.bw_long; if (mm_check_opt(&ipt, &opt) < 0) return 1; if (opt.best_n == 0) { @@ -319,7 +317,7 @@ int main(int argc, char *argv[]) fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap); fprintf(fp_help, " -G NUM max intron length (effective with -xsplice; changing -r) [200k]\n"); fprintf(fp_help, " -F NUM max fragment length (effective with -xsr or in the fragment mode) [800]\n"); - fprintf(fp_help, " -r NUM bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw); + fprintf(fp_help, " -r NUM[,NUM] chaining/alignment bandwidth and long-join bandwidth [%d,%d]\n", opt.bw, opt.bw_long); fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt); fprintf(fp_help, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score); // fprintf(fp_help, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy diff --git a/map.c b/map.c index 513fe1fe..a41b7a95 100644 --- a/map.c +++ b/map.c @@ -277,7 +277,17 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** opt->chain_gap_scale * 0.01 * mi->k, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km); } - if (opt->max_occ > opt->mid_occ && rep_len > 0 && !(opt->flag & MM_F_RMQ)) { + if (opt->bw_long > opt->bw && (opt->flag & MM_F_NO_LJOIN) == 0 && n_segs == 1 && n_regs0 > 1) { // re-chain/long-join for long sequences + int32_t st = (int32_t)a[0].y, en = (int32_t)a[(int32_t)u[0] - 1].y; + if (qlen_sum - (en - st) > opt->rmq_rescue_size || en - st > qlen_sum * opt->rmq_rescue_ratio) { + int32_t i; + for (i = 0, n_a = 0; i < n_regs0; ++i) n_a += (int32_t)u[i]; + kfree(b->km, u); + radix_sort_128x(a, a + n_a); + a = mg_lchain_rmq(opt->max_gap, opt->rmq_inner_dist, opt->bw_long, opt->max_chain_skip, opt->rmq_size_cap, opt->min_cnt, opt->min_chain_score, + opt->chain_gap_scale * 0.01 * mi->k, 0.0f, n_a, a, &n_regs0, &u, b->km); + } + } else if (opt->max_occ > opt->mid_occ && rep_len > 0 && !(opt->flag & MM_F_RMQ)) { // re-chain, mostly for short reads int rechain = 0; if (n_regs0 > 0) { // test if the best chain has all the segments int n_chained_segs = 1, max = 0, max_i = -1, max_off = -1, off = 0; @@ -300,16 +310,6 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, opt->chain_gap_scale * 0.01 * mi->k, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km); } - } else if (opt->bw_long > opt->bw && (opt->flag & (MM_F_RMQ|MM_F_NO_LJOIN)) == 0 && n_segs == 1 && n_regs0 > 1) { - int32_t st = (int32_t)a[0].y, en = (int32_t)a[(int32_t)u[0] - 1].y; - if (qlen_sum - (en - st) > opt->rmq_rescue_size || en - st > qlen_sum * opt->rmq_rescue_ratio) { - int32_t i; - for (i = 0, n_a = 0; i < n_regs0; ++i) n_a += (int32_t)u[i]; - kfree(b->km, u); - radix_sort_128x(a, a + n_a); - a = mg_lchain_rmq(opt->max_gap, opt->rmq_inner_dist, opt->bw_long, opt->max_chain_skip, opt->rmq_size_cap, opt->min_cnt, opt->min_chain_score, - opt->chain_gap_scale * 0.01 * mi->k, 0.0f, n_a, a, &n_regs0, &u, b->km); - } } b->frag_gap = max_chain_gap_ref; b->rep_len = rep_len; diff --git a/minimap2.1 b/minimap2.1 index 94e3ce3f..251ed992 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -165,9 +165,12 @@ Stop chain enlongation if there are no minimizers within .IR NUM -bp [10k]. .TP -.BI -r \ NUM -Bandwidth used in chaining and DP-based alignment [500,20k]. This option -approximately controls the maximum gap size. +.BI -r \ NUM1 [, NUM2 ] +Bandwidth for chaining and base alignment [500,20k]. +.I NUM1 +is used for initial chaining and alignment extension; +.I NUM2 +for RMQ-based re-chaining and closing gaps in alignments. .TP .BI -n \ INT Discard chains consisting of diff --git a/options.c b/options.c index 8ff99d53..08a4d93b 100644 --- a/options.c +++ b/options.c @@ -156,6 +156,11 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo) { + if (mo->bw > mo->bw_long) { + if (mm_verbose >= 1) + fprintf(stderr, "[ERROR]\033[1;31m with '-rNUM1,NUM2', NUM1 (%d) can't be larger than NUM2 (%d)\033[0m\n", mo->bw, mo->bw_long); + return -8; + } if ((mo->flag & MM_F_RMQ) && (mo->flag & (MM_F_SR|MM_F_SPLICE))) { if (mm_verbose >= 1) fprintf(stderr, "[ERROR]\033[1;31m --rmq doesn't work with --sr or --splice\033[0m\n");