Skip to content

Commit

Permalink
r1060: safer ways to use -rNUM1,NUM2
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed May 27, 2021
1 parent 4f8d1bc commit ca19463
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 18 deletions.
6 changes: 2 additions & 4 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "mmpriv.h"
#include "ketopt.h"

#define MM_VERSION "2.19-r1059-dirty"
#define MM_VERSION "2.19-r1060-dirty"

#ifdef __linux__
#include <sys/resource.h>
Expand Down Expand Up @@ -296,8 +296,6 @@ int main(int argc, char *argv[])
}
if (!fnw && !(opt.flag&MM_F_CIGAR))
ipt.flag |= MM_I_NO_SEQ;
if (opt.bw < opt.bw_long && (opt.flag&MM_F_RMQ))
opt.bw = opt.bw_long;
if (mm_check_opt(&ipt, &opt) < 0)
return 1;
if (opt.best_n == 0) {
Expand All @@ -319,7 +317,7 @@ int main(int argc, char *argv[])
fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
fprintf(fp_help, " -G NUM max intron length (effective with -xsplice; changing -r) [200k]\n");
fprintf(fp_help, " -F NUM max fragment length (effective with -xsr or in the fragment mode) [800]\n");
fprintf(fp_help, " -r NUM bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
fprintf(fp_help, " -r NUM[,NUM] chaining/alignment bandwidth and long-join bandwidth [%d,%d]\n", opt.bw, opt.bw_long);
fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
fprintf(fp_help, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
// fprintf(fp_help, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
Expand Down
22 changes: 11 additions & 11 deletions map.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,17 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
opt->chain_gap_scale * 0.01 * mi->k, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
}

if (opt->max_occ > opt->mid_occ && rep_len > 0 && !(opt->flag & MM_F_RMQ)) {
if (opt->bw_long > opt->bw && (opt->flag & MM_F_NO_LJOIN) == 0 && n_segs == 1 && n_regs0 > 1) { // re-chain/long-join for long sequences
int32_t st = (int32_t)a[0].y, en = (int32_t)a[(int32_t)u[0] - 1].y;
if (qlen_sum - (en - st) > opt->rmq_rescue_size || en - st > qlen_sum * opt->rmq_rescue_ratio) {
int32_t i;
for (i = 0, n_a = 0; i < n_regs0; ++i) n_a += (int32_t)u[i];
kfree(b->km, u);
radix_sort_128x(a, a + n_a);
a = mg_lchain_rmq(opt->max_gap, opt->rmq_inner_dist, opt->bw_long, opt->max_chain_skip, opt->rmq_size_cap, opt->min_cnt, opt->min_chain_score,
opt->chain_gap_scale * 0.01 * mi->k, 0.0f, n_a, a, &n_regs0, &u, b->km);
}
} else if (opt->max_occ > opt->mid_occ && rep_len > 0 && !(opt->flag & MM_F_RMQ)) { // re-chain, mostly for short reads
int rechain = 0;
if (n_regs0 > 0) { // test if the best chain has all the segments
int n_chained_segs = 1, max = 0, max_i = -1, max_off = -1, off = 0;
Expand All @@ -300,16 +310,6 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score,
opt->chain_gap_scale * 0.01 * mi->k, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
}
} else if (opt->bw_long > opt->bw && (opt->flag & (MM_F_RMQ|MM_F_NO_LJOIN)) == 0 && n_segs == 1 && n_regs0 > 1) {
int32_t st = (int32_t)a[0].y, en = (int32_t)a[(int32_t)u[0] - 1].y;
if (qlen_sum - (en - st) > opt->rmq_rescue_size || en - st > qlen_sum * opt->rmq_rescue_ratio) {
int32_t i;
for (i = 0, n_a = 0; i < n_regs0; ++i) n_a += (int32_t)u[i];
kfree(b->km, u);
radix_sort_128x(a, a + n_a);
a = mg_lchain_rmq(opt->max_gap, opt->rmq_inner_dist, opt->bw_long, opt->max_chain_skip, opt->rmq_size_cap, opt->min_cnt, opt->min_chain_score,
opt->chain_gap_scale * 0.01 * mi->k, 0.0f, n_a, a, &n_regs0, &u, b->km);
}
}
b->frag_gap = max_chain_gap_ref;
b->rep_len = rep_len;
Expand Down
9 changes: 6 additions & 3 deletions minimap2.1
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,12 @@ Stop chain enlongation if there are no minimizers within
.IR NUM -bp
[10k].
.TP
.BI -r \ NUM
Bandwidth used in chaining and DP-based alignment [500,20k]. This option
approximately controls the maximum gap size.
.BI -r \ NUM1 [, NUM2 ]
Bandwidth for chaining and base alignment [500,20k].
.I NUM1
is used for initial chaining and alignment extension;
.I NUM2
for RMQ-based re-chaining and closing gaps in alignments.
.TP
.BI -n \ INT
Discard chains consisting of
Expand Down
5 changes: 5 additions & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)

int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
{
if (mo->bw > mo->bw_long) {
if (mm_verbose >= 1)
fprintf(stderr, "[ERROR]\033[1;31m with '-rNUM1,NUM2', NUM1 (%d) can't be larger than NUM2 (%d)\033[0m\n", mo->bw, mo->bw_long);
return -8;
}
if ((mo->flag & MM_F_RMQ) && (mo->flag & (MM_F_SR|MM_F_SPLICE))) {
if (mm_verbose >= 1)
fprintf(stderr, "[ERROR]\033[1;31m --rmq doesn't work with --sr or --splice\033[0m\n");
Expand Down

0 comments on commit ca19463

Please sign in to comment.