Skip to content

Commit

Permalink
Release minimap2-2.25 (r1173)
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Apr 25, 2023
1 parent f1b3c7a commit 4483f89
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 74 deletions.
38 changes: 38 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,41 @@
Release 2.25-r1173 (25 April 2023)
----------------------------------

Notable changes:

* Improvement: use the miniprot splice model for RNA-seq alignment by default.
This model considers non-GT-AG splice sites and leads to slightly higher
(<0.1%) accuracy and sensitivity on real human data.

* Change: increased the default `-I` to `8G` such that minimap2 would create a
uni-part index for a pair of mammalian genomes. This change may increase the
memory for all-vs-all read overlap alignment given large datasets.

* New feature: output the sequences in secondary alignments with option
`--secondary-seq` (#687).

* Bugfix: --rmq was not parsed correctly (#1010)

* Bugfix: possibly incorrect coordinate when applying end bonus to the target
sequence (#1025). This is a ksw2 bug. It does not affect minimap2 as
minimap2 is not using the affected feature.

* Improvement: incorporated several changes for better compatibility with
Windows (#1051) and for minimap2 integration at Oxford Nanopore Technologies
(#1048 and #1033).

* Improvement: output the HD-line in SAM output (#1019).

* Improvement: check minimap2 index file in mappy to prevent segmentation
fault for certain indices (#1008).

For genomic sequences, minimap2 should give identical output to v2.24.
Long-read RNA-seq alignment may occasionally differ from previous versions.

(2.25: 25 April 2023, r1173)



Release 2.24-r1122 (26 December 2021)
-------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
the [release page][release] with:
```sh
curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar -jxvf -
./minimap2-2.24_x64-linux/minimap2
curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar -jxvf -
./minimap2-2.25_x64-linux/minimap2
```
If you want to compile from the source, you need to have a C compiler, GNU make
and zlib development files installed. Then type `make` in the source code
Expand Down
4 changes: 2 additions & 2 deletions cookbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
please follow the command lines below:
```sh
# install minimap2 executables
curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar jxf -
cp minimap2-2.24_x64-linux/{minimap2,k8,paftools.js} . # copy executables
curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar jxf -
cp minimap2-2.25_x64-linux/{minimap2,k8,paftools.js} . # copy executables
export PATH="$PATH:"`pwd` # put the current directory on PATH
# download example datasets
curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
Expand Down
4 changes: 2 additions & 2 deletions index.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
for (i = 0; i < s->n_seq; ++i) {
mm_bseq1_t *t = &s->seq[i];
if (t->l_seq > 0)
mm_sketch2(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->mi->flag&MM_I_HPC, p->mi->flag&MM_I_SYNCMER, &s->a);
mm_sketch(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->mi->flag&MM_I_HPC, &s->a);
else if (mm_verbose >= 2)
fprintf(stderr, "[WARNING] the length database sequence '%s' is 0\n", t->name);
free(t->seq); free(t->name);
Expand Down Expand Up @@ -446,7 +446,7 @@ mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const cha
sum_len += p->len;
if (p->len > 0) {
a.n = 0;
mm_sketch2(0, s, p->len, w, k, i, is_hpc, 0, &a); // TODO: mm_idx_str() doesn't support syncmer
mm_sketch(0, s, p->len, w, k, i, is_hpc, &a);
mm_idx_add(mi, a.n, a.a);
}
}
Expand Down
6 changes: 2 additions & 4 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const

int main(int argc, char *argv[])
{
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:j:J:";
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:J:";
ketopt_t o = KETOPT_INIT;
mm_mapopt_t opt;
mm_idxopt_t ipt;
Expand Down Expand Up @@ -152,8 +152,7 @@ int main(int argc, char *argv[])
o = KETOPT_INIT;

while ((c = ketopt(&o, argc, argv, 1, opt_str, long_options)) >= 0) {
if (c == 'w') ipt.w = atoi(o.arg), ipt.flag &= ~MM_I_SYNCMER;
else if (c == 'j') ipt.w = atoi(o.arg), ipt.flag |= MM_I_SYNCMER;
if (c == 'w') ipt.w = atoi(o.arg);
else if (c == 'k') ipt.k = atoi(o.arg);
else if (c == 'H') ipt.flag |= MM_I_HPC;
else if (c == 'd') fnw = o.arg; // the above are indexing related options, except -I
Expand Down Expand Up @@ -329,7 +328,6 @@ int main(int argc, char *argv[])
fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
// fprintf(fp_help, " -j INT syncmer submer size (overriding -w) []\n");
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [8G]\n");
fprintf(fp_help, " -d FILE dump index to FILE []\n");
fprintf(fp_help, " Mapping:\n");
Expand Down
2 changes: 1 addition & 1 deletion map.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static void collect_minimizers(void *km, const mm_mapopt_t *opt, const mm_idx_t
mv->n = 0;
for (i = n = 0; i < n_segs; ++i) {
size_t j;
mm_sketch2(km, seqs[i], qlens[i], mi->w, mi->k, i, mi->flag&MM_I_HPC, mi->flag&MM_I_SYNCMER, mv);
mm_sketch(km, seqs[i], qlens[i], mi->w, mi->k, i, mi->flag&MM_I_HPC, mv);
for (j = n; j < mv->n; ++j)
mv->a[j].y += sum << 1;
if (opt->sdust_thres > 0) // mask low-complexity minimizers
Expand Down
3 changes: 1 addition & 2 deletions minimap.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <stdio.h>
#include <sys/types.h>

#define MM_VERSION "2.24-r1171-dirty"
#define MM_VERSION "2.25-r1173"

#define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
#define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
Expand Down Expand Up @@ -48,7 +48,6 @@
#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2
#define MM_I_NO_NAME 0x4
#define MM_I_SYNCMER 0x8

#define MM_IDX_MAGIC "MMI\2"

Expand Down
2 changes: 1 addition & 1 deletion minimap2.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH minimap2 1 "18 December 2021" "minimap2-2.24 (r1122)" "Bioinformatics tools"
.TH minimap2 1 "25 April 2023" "minimap2-2.25 (r1173)" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
Expand Down
2 changes: 1 addition & 1 deletion misc/paftools.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env k8

var paftools_version = '2.24-r1152-dirty';
var paftools_version = '2.25-r1173';

/*****************************
***** Library functions *****
Expand Down
2 changes: 0 additions & 2 deletions mmpriv.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ void radix_sort_64(uint64_t *beg, uint64_t *end);
uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);

void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
void mm_sketch_syncmer(void *km, const char *str, int len, int smer, int k, uint32_t rid, int is_hpc, mm128_v *p);
void mm_sketch2(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, int is_syncmer, mm128_v *p);

mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos);
void mm_seed_mz_flt(void *km, mm128_v *mv, int32_t q_occ_max, float q_occ_frac);
Expand Down
2 changes: 1 addition & 1 deletion python/mappy.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from libc.stdlib cimport free
cimport cmappy
import sys

__version__ = '2.24'
__version__ = '2.25'

cmappy.mm_reset_timer()

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def compile_libminimap2(*args, **kwargs):

setup(
name = 'mappy',
version = '2.24',
version = '2.25',
url = 'https://github.com/lh3/minimap2',
description = 'Minimap2 python binding',
long_description = readme(),
Expand Down
55 changes: 0 additions & 55 deletions sketch.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,58 +141,3 @@ void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, i
if (min.x != UINT64_MAX)
kv_push(mm128_t, km, *p, min);
}

void mm_sketch_syncmer(void *km, const char *str, int len, int smer, int k, uint32_t rid, int is_hpc, mm128_v *p)
{
uint64_t shift1 = 2 * (k - 1), mask = (1ULL<<2*k) - 1, smask = (1ULL<<2*smer) - 1, kmer[2] = {0,0};
int i, j, l, buf_pos, min_pos, kmer_span = 0;
tiny_queue_t tq;

assert(len > 0 && (smer > 0 && smer <= k) && (k > 0 && k <= 28)); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice
memset(&tq, 0, sizeof(tiny_queue_t));
kv_resize(mm128_t, km, *p, p->n + len/(k - smer));

for (i = l = buf_pos = min_pos = 0; i < len; ++i) {
int c = seq_nt4_table[(uint8_t)str[i]];
if (c < 4) { // not an ambiguous base
int z;
if (is_hpc) {
int skip_len = 1;
if (i + 1 < len && seq_nt4_table[(uint8_t)str[i + 1]] == c) {
for (skip_len = 2; i + skip_len < len; ++skip_len)
if (seq_nt4_table[(uint8_t)str[i + skip_len]] != c)
break;
i += skip_len - 1; // put $i at the end of the current homopolymer run
}
tq_push(&tq, skip_len);
kmer_span += skip_len;
if (tq.count > k) kmer_span -= tq_shift(&tq);
} else kmer_span = l + 1 < k? l + 1 : k;
kmer[0] = (kmer[0] << 2 | c) & mask; // forward k-mer
kmer[1] = (kmer[1] >> 2) | (3ULL^c) << shift1; // reverse k-mer
if (kmer[0] == kmer[1]) continue; // skip "symmetric k-mers" as we don't know it strand
z = kmer[0] < kmer[1]? 0 : 1; // strand
++l;
if (l >= k && kmer_span < 256) {
uint64_t x, min = UINT64_MAX;
x = hash64(kmer[z], mask);
for (j = 0; j <= k - smer; ++j) {
uint64_t y = x >> (j + j) & smask;
min = min < y? min : y;
}
if ((x & smask) == min) {
mm128_t t;
t.x = x << 8 | kmer_span;
t.y = (uint64_t)rid<<32 | (uint32_t)i<<1 | z;
kv_push(mm128_t, km, *p, t);
}
}
} else l = 0, tq.count = tq.front = 0, kmer_span = 0;
}
}

void mm_sketch2(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, int is_syncmer, mm128_v *p)
{
if (is_syncmer) mm_sketch_syncmer(km, str, len, w, k, rid, is_hpc, p);
else mm_sketch(km, str, len, w, k, rid, is_hpc, p);
}

0 comments on commit 4483f89

Please sign in to comment.