Skip to content

Commit

Permalink
r190: default -k to 15; added -x map-ont
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Jul 19, 2017
1 parent 470021f commit 4aff301
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 40 deletions.
22 changes: 13 additions & 9 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "minimap.h"
#include "mmpriv.h"

#define MM_VERSION "2.0-r189-dirty"
#define MM_VERSION "2.0-r190-dirty"

void liftrlimit()
{
Expand Down Expand Up @@ -63,7 +63,7 @@ static struct option long_options[] = {
int main(int argc, char *argv[])
{
mm_mapopt_t opt;
int i, c, k = 17, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx;
int i, c, k = 15, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx;
int minibatch_size = 200000000;
uint64_t batch_size = 4000000000ULL;
mm_bseq_file_t *fp = 0;
Expand Down Expand Up @@ -135,8 +135,10 @@ int main(int argc, char *argv[])
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
minibatch_size = 500000000;
is_hpc = 1, k = 19, w = 5;
} else if (strcmp(optarg, "map10k") == 0) {
} else if (strcmp(optarg, "map10k") == 0 || strcmp(optarg, "map-pb") == 0) {
is_hpc = 1, k = 19;
} else if (strcmp(optarg, "map-ont") == 0) {
is_hpc = 0, k = 15;
} else if (strcmp(optarg, "asm5") == 0) {
k = 19, w = 19;
opt.a = 1, opt.b = 19, opt.q = 39, opt.q2 = 81, opt.e = 3, opt.e2 = 1, opt.zdrop = 200;
Expand Down Expand Up @@ -172,12 +174,6 @@ int main(int argc, char *argv[])
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(stderr, " Alignment:\n");
fprintf(stderr, " -A INT matching score [%d]\n", opt.a);
fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b);
Expand All @@ -193,6 +189,14 @@ int main(int argc, char *argv[])
fprintf(stderr, " -K NUM minibatch size [200M]\n");
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
fprintf(stderr, " -V show version number\n");
fprintf(stderr, " Preset:\n");
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " map10k/map-pb: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " map-ont: -k15 (slightly more sensitive than 'map10k' for ONT vs reference)\n");
fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
return 1;
}
Expand Down
78 changes: 47 additions & 31 deletions minimap2.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH minimap2 1 "18 July 2017" "minimap2-2.0-r180-dirty" "Bioinformatics tools"
.TH minimap2 1 "19 July 2017" "minimap2-2.0-r190-dirty" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
Expand Down Expand Up @@ -74,7 +74,7 @@ SAM format.
.SS Indexing options
.TP 10
.BI -k \ INT
Minimizer k-mer length [17]
Minimizer k-mer length [15]
.TP
.BI -w \ INT
Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
Expand Down Expand Up @@ -164,35 +164,6 @@ secondary alignments [5]. This option has no effect when
.B -X
is applied.
.TP
.BI -x \ STR
Preset []. This option applies multiple options at the same time. It should be
applied before other options because options applied later will overwrite the
values set by
.BR -x .
Available
.I STR
are:
.RS
.TP 8
.B ava-pb
PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP 8
.B ava-ont
Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP
.B map10k
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
.TP
.B asm5
Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200).
Typically, the alignment will not extend to regions with 5% or higher sequence
divergence. Only use this preset if the average divergence is far below 5%.
.TP
.B asm10
Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up
to 10% sequence divergence.
.RE
.TP
.BI --max-chain-skip \ INT
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
for chaining. The time complexity is quadratic in the number of seeds. This
Expand Down Expand Up @@ -265,6 +236,51 @@ use
.TP
.B -V
Print version number to stdout
.SS Preset options
.TP 10
.BI -x \ STR
Preset []. This option applies multiple options at the same time. It should be
applied before other options because options applied later will overwrite the
values set by
.BR -x .
Available
.I STR
are:
.RS
.TP 8
.B map-pb
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
.TP
.B map10k
The same as
.B map-pb
(-Hk19)
.TP
.B map-ont
Slightly more sensitive for Oxford Nanopore to reference mapping (-k15). For
PacBio reads, HPC minimizers consistently leads to faster performance and more
sensitive results in comparison to normal minimizers. For Oxford Nanopore data,
normal minimizers are better, though not much. The effectiveness of HPC is
determined by the sequencing error mode.
.TP
.B asm5
Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200).
Typically, the alignment will not extend to regions with 5% or higher sequence
divergence. Only use this preset if the average divergence is far below 5%.
.TP
.B asm10
Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up
to 10% sequence divergence.
.TP 8
.B ava-pb
PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP 8
.B ava-ont
Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000
--max-chain-skip 25). Similarly, the major difference from
.B ava-pb
is that this preset is not using HPC minimizers.
.RE
.SS Miscellaneous options
.TP 10
.B --no-kalloc
Expand Down

0 comments on commit 4aff301

Please sign in to comment.