diff --git a/bseq.c b/bseq.c index e499ddae..e3e576fe 100644 --- a/bseq.c +++ b/bseq.c @@ -7,43 +7,43 @@ #include "kseq.h" KSEQ_INIT(gzFile, gzread) -struct bseq_file_s { +struct mm_bseq_file_s { int is_eof; gzFile fp; kseq_t *ks; }; -bseq_file_t *bseq_open(const char *fn) +mm_bseq_file_t *mm_bseq_open(const char *fn) { - bseq_file_t *fp; + mm_bseq_file_t *fp; gzFile f; f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); if (f == 0) return 0; - fp = (bseq_file_t*)calloc(1, sizeof(bseq_file_t)); + fp = (mm_bseq_file_t*)calloc(1, sizeof(mm_bseq_file_t)); fp->fp = f; fp->ks = kseq_init(fp->fp); return fp; } -void bseq_close(bseq_file_t *fp) +void mm_bseq_close(mm_bseq_file_t *fp) { kseq_destroy(fp->ks); gzclose(fp->fp); free(fp); } -bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int with_qual, int *n_) +mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_) { int size = 0, m, n; - bseq1_t *seqs; + mm_bseq1_t *seqs; kseq_t *ks = fp->ks; m = n = 0; seqs = 0; while (kseq_read(ks) >= 0) { - bseq1_t *s; + mm_bseq1_t *s; assert(ks->seq.l <= INT32_MAX); if (n >= m) { m = m? m<<1 : 256; - seqs = (bseq1_t*)realloc(seqs, m * sizeof(bseq1_t)); + seqs = (mm_bseq1_t*)realloc(seqs, m * sizeof(mm_bseq1_t)); } s = &seqs[n]; s->name = strdup(ks->name.s); @@ -58,7 +58,7 @@ bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int with_qual, int *n_) return seqs; } -int bseq_eof(bseq_file_t *fp) +int mm_bseq_eof(mm_bseq_file_t *fp) { return fp->is_eof; } diff --git a/bseq.h b/bseq.h index df9d5bbf..2d065f9e 100644 --- a/bseq.h +++ b/bseq.h @@ -3,19 +3,27 @@ #include -struct bseq_file_s; -typedef struct bseq_file_s bseq_file_t; +#ifdef __cplusplus +extern "C" { +#endif + +struct mm_bseq_file_s; +typedef struct mm_bseq_file_s mm_bseq_file_t; typedef struct { int l_seq, rid; char *name, *seq, *qual; -} bseq1_t; +} mm_bseq1_t; -bseq_file_t *bseq_open(const char *fn); -void bseq_close(bseq_file_t *fp); -bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int with_qual, int *n_); -int bseq_eof(bseq_file_t *fp); +mm_bseq_file_t *mm_bseq_open(const char *fn); +void mm_bseq_close(mm_bseq_file_t *fp); +mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_); +int mm_bseq_eof(mm_bseq_file_t *fp); extern unsigned char seq_nt4_table[256]; +#ifdef __cplusplus +} +#endif + #endif diff --git a/format.c b/format.c index 048ca3eb..c927adb3 100644 --- a/format.c +++ b/format.c @@ -62,7 +62,7 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r) if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi); } -void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r) +void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r) { s->l = 0; mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]); @@ -105,7 +105,7 @@ static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp) } else str_copy(s, seq, seq + l); } -void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r) +void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r) { int flag = 0; s->l = 0; diff --git a/index.c b/index.c index 066a8915..6ec29a0b 100644 --- a/index.c +++ b/index.c @@ -190,13 +190,13 @@ static void mm_idx_post(mm_idx_t *mi, int n_threads) typedef struct { int mini_batch_size, keep_name; uint64_t batch_size, sum_len; - bseq_file_t *fp; + mm_bseq_file_t *fp; mm_idx_t *mi; } pipeline_t; typedef struct { int n_seq; - bseq1_t *seq; + mm_bseq1_t *seq; mm128_v a; } step_t; @@ -217,7 +217,7 @@ static void *worker_pipeline(void *shared, int step, void *in) step_t *s; if (p->sum_len > p->batch_size) return 0; s = (step_t*)calloc(1, sizeof(step_t)); - s->seq = bseq_read(p->fp, p->mini_batch_size, 0, &s->n_seq); // read a mini-batch + s->seq = mm_bseq_read(p->fp, p->mini_batch_size, 0, &s->n_seq); // read a mini-batch if (s->seq) { uint32_t old_m, m; uint64_t sum_len, old_max_len, max_len; @@ -261,7 +261,7 @@ static void *worker_pipeline(void *shared, int step, void *in) } else if (step == 1) { // step 1: compute sketch step_t *s = (step_t*)in; for (i = 0; i < s->n_seq; ++i) { - bseq1_t *t = &s->seq[i]; + mm_bseq1_t *t = &s->seq[i]; mm_sketch(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->mi->is_hpc, &s->a); free(t->seq); free(t->name); } @@ -275,7 +275,7 @@ static void *worker_pipeline(void *shared, int step, void *in) return 0; } -mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name) +mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name) { pipeline_t pl; memset(&pl, 0, sizeof(pipeline_t)); @@ -299,12 +299,12 @@ mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_ mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads) // a simpler interface { - bseq_file_t *fp; + mm_bseq_file_t *fp; mm_idx_t *mi; - fp = bseq_open(fn); + fp = mm_bseq_open(fn); if (fp == 0) return 0; mi = mm_idx_gen(fp, w, k, MM_IDX_DEF_B, is_hpc, 1<<18, n_threads, UINT64_MAX, 1); - bseq_close(fp); + mm_bseq_close(fp); return mi; } diff --git a/ksw2.h b/ksw2.h index 41c0d9d7..a9256429 100644 --- a/ksw2.h +++ b/ksw2.h @@ -13,6 +13,10 @@ #define KSW_EZ_EXTZ_ONLY 0x40 // only perform extension #define KSW_EZ_REV_CIGAR 0x80 // reverse CIGAR in the output +#ifdef __cplusplus +extern "C" { +#endif + typedef struct { uint32_t max:31, zdropped:1; int max_q, max_t; // max extension coordinate @@ -23,10 +27,6 @@ typedef struct { uint32_t *cigar; } ksw_extz_t; -#ifdef __cplusplus -extern "C" { -#endif - /** * NW-like extension * diff --git a/main.c b/main.c index 68b96910..c554bd9f 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r187-dirty" +#define MM_VERSION "2.0-r188-dirty" void liftrlimit() { @@ -66,7 +66,7 @@ int main(int argc, char *argv[]) int i, c, k = 17, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx; int minibatch_size = 200000000; uint64_t batch_size = 4000000000ULL; - bseq_file_t *fp = 0; + mm_bseq_file_t *fp = 0; char *fnw = 0, *s; FILE *fpr = 0, *fpw = 0; @@ -203,12 +203,12 @@ int main(int argc, char *argv[]) return 1; } if (is_idx) fpr = fopen(argv[optind], "rb"); - else fp = bseq_open(argv[optind]); + else fp = mm_bseq_open(argv[optind]); if (fnw) fpw = fopen(fnw, "wb"); for (;;) { mm_idx_t *mi = 0; if (fpr) mi = mm_idx_load(fpr); - else if (!bseq_eof(fp)) + else if (!mm_bseq_eof(fp)) mi = mm_idx_gen(fp, w, k, bucket_bits, is_hpc, minibatch_size, n_threads, batch_size, keep_name); if (mi == 0) break; if (mm_verbose >= 3) @@ -227,7 +227,7 @@ int main(int argc, char *argv[]) } if (fpw) fclose(fpw); if (fpr) fclose(fpr); - if (fp) bseq_close(fp); + if (fp) mm_bseq_close(fp); fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION); fprintf(stderr, "[M::%s] CMD:", __func__); diff --git a/map.c b/map.c index 0d200574..81f7db83 100644 --- a/map.c +++ b/map.c @@ -281,7 +281,7 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, m typedef struct { int mini_batch_size, n_processed, n_threads; const mm_mapopt_t *opt; - bseq_file_t *fp; + mm_bseq_file_t *fp; const mm_idx_t *mi; kstring_t str; } pipeline_t; @@ -289,7 +289,7 @@ typedef struct { typedef struct { const pipeline_t *p; int n_seq; - bseq1_t *seq; + mm_bseq1_t *seq; int *n_reg; mm_reg1_t **reg; mm_tbuf_t **buf; @@ -311,7 +311,7 @@ static void *worker_pipeline(void *shared, int step, void *in) int with_qual = (!!(p->opt->flag & MM_F_OUT_SAM) && !(p->opt->flag & MM_F_NO_QUAL)); step_t *s; s = (step_t*)calloc(1, sizeof(step_t)); - s->seq = bseq_read(p->fp, p->mini_batch_size, with_qual, &s->n_seq); + s->seq = mm_bseq_read(p->fp, p->mini_batch_size, with_qual, &s->n_seq); if (s->seq) { s->p = p; for (i = 0; i < s->n_seq; ++i) @@ -332,7 +332,7 @@ static void *worker_pipeline(void *shared, int step, void *in) for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]); free(s->buf); for (i = 0; i < s->n_seq; ++i) { - bseq1_t *t = &s->seq[i]; + mm_bseq1_t *t = &s->seq[i]; for (j = 0; j < s->n_reg[i]; ++j) { mm_reg1_t *r = &s->reg[i][j]; if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, r); @@ -360,7 +360,7 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int { pipeline_t pl; memset(&pl, 0, sizeof(pipeline_t)); - pl.fp = bseq_open(fn); + pl.fp = mm_bseq_open(fn); if (pl.fp == 0) return -1; pl.opt = opt, pl.mi = idx; pl.n_threads = n_threads, pl.mini_batch_size = mini_batch_size; @@ -371,6 +371,6 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int } kt_pipeline(n_threads == 1? 1 : 2, worker_pipeline, &pl, 3); free(pl.str.s); - bseq_close(pl.fp); + mm_bseq_close(pl.fp); return 0; } diff --git a/minimap.h b/minimap.h index 6c68c626..14fc4470 100644 --- a/minimap.h +++ b/minimap.h @@ -15,6 +15,10 @@ #define MM_IDX_MAGIC "MMI\2" +#ifdef __cplusplus +extern "C" { +#endif + typedef struct { uint64_t x, y; } mm128_t; @@ -100,22 +104,18 @@ extern double mm_realtime0; struct mm_tbuf_s; typedef struct mm_tbuf_s mm_tbuf_t; -struct bseq_file_s; +struct mm_bseq_file_s; #define mm_seq4_set(s, i, c) ((s)[(i)>>3] |= (uint32_t)(c) << (((i)&7)<<2)) #define mm_seq4_get(s, i) ((s)[(i)>>3] >> (((i)&7)<<2) & 0xf) -#ifdef __cplusplus -extern "C" { -#endif - // compute minimizers void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p); // minimizer indexing mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc); void mm_idx_destroy(mm_idx_t *mi); -mm_idx_t *mm_idx_gen(struct bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name); +mm_idx_t *mm_idx_gen(struct mm_bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name); uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f); void mm_idx_stat(const mm_idx_t *idx); const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n); diff --git a/mmpriv.h b/mmpriv.h index 6fff0973..242c7990 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -16,6 +16,10 @@ #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif +#ifdef __cplusplus +extern "C" { +#endif + #ifndef KSTRING_T #define KSTRING_T kstring_t typedef struct __kstring_t { @@ -24,10 +28,6 @@ typedef struct __kstring_t { } kstring_t; #endif -#ifdef __cplusplus -extern "C" { -#endif - double cputime(void); double realtime(void); @@ -35,8 +35,8 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end); void radix_sort_64(uint64_t *beg, uint64_t *end); uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk); -void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r); -void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r); +void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r); +void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r); int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int64_t n, mm128_t *a, uint64_t **_u, void *km); mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a); diff --git a/sdust.h b/sdust.h index 47a71651..a12cab28 100644 --- a/sdust.h +++ b/sdust.h @@ -3,13 +3,13 @@ #include -struct sdust_buf_s; -typedef struct sdust_buf_s sdust_buf_t; - #ifdef __cplusplus extern "C" { #endif +struct sdust_buf_s; +typedef struct sdust_buf_s sdust_buf_t; + // the simple interface uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n);