Skip to content

Commit

Permalink
fix memory issue
Browse files Browse the repository at this point in the history
  • Loading branch information
chhylp123 committed Oct 13, 2024
1 parent e8b1856 commit 49dd83d
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 33 deletions.
2 changes: 1 addition & 1 deletion CommandLines.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <pthread.h>
#include <stdint.h>

#define HA_VERSION "0.20.0-r631"
#define HA_VERSION "0.20.0-r639"

#define VERBOSE 0

Expand Down
21 changes: 18 additions & 3 deletions Correct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16753,12 +16753,14 @@ char* qstr, char* tstr, bit_extz_t *exz, double e_rate, int64_t ql, int64_t tl,
uint64_t gen_hc_fast_cigar0(overlap_region *z, Candidates_list *cl, uint64_t wl, All_reads *rref, char* qstr, UC_Read *tu, bit_extz_t *exz, overlap_region *aux_o, double e_rate, int64_t ql, uint64_t rid, int64_t h_khit, int64_t *re)
{
int64_t ch_idx = z->shared_seed, ch_n;
int64_t i, tl, id = z->y_id, m, tot_e, aln;
int64_t i, tl, id = z->y_id, m, tot_e, aln, xe, ye;
k_mer_hit *ch_a = cl->list + ch_idx;
tl = Get_READ_LENGTH((*rref), id);
for (i = ch_idx; i < cl->length && cl->list[i].readID == cl->list[ch_idx].readID; i++); ch_n = i-ch_idx;
if(ch_n <= 0) return 0;


///debug for memory
// snprintf(NULL, 0, "dwn::%u\tdcn::%u", (uint32_t)aux_o->w_list.n, (uint32_t)aux_o->w_list.c.n);

// fprintf(stderr, "[M::%s::rid->%ld] utg%.6dl(%c), z::[%u, %u)\n",
// __func__, rid, (int32_t)z->y_id+1, "+-"[z->y_pos_strand], z->x_pos_s, z->x_pos_e+1);
Expand Down Expand Up @@ -16799,7 +16801,9 @@ uint64_t gen_hc_fast_cigar0(overlap_region *z, Candidates_list *cl, uint64_t wl,
aux_n = z->w_list.n;
for (i = tot_e = aln = 0; i < aux_n; i++) {
if(is_ualn_win(z->w_list.a[i])) {
tot_e += z->w_list.a[i].x_end + 1 - z->w_list.a[i].x_start;
xe = z->w_list.a[i].x_end + 1 - z->w_list.a[i].x_start;
ye = z->w_list.a[i].y_end + 1 - z->w_list.a[i].y_start;
tot_e += ((xe >= ye)?(xe):(ye));
} else {
tot_e += z->w_list.a[i].error; aln += z->w_list.a[i].x_end + 1 - z->w_list.a[i].x_start;
}
Expand Down Expand Up @@ -23495,6 +23499,9 @@ void gen_hc_r_alin_nec(overlap_region_alloc* ol, Candidates_list *cl, All_reads
resize_UC_Read(tu, bs<<1);
// fprintf(stderr, "[M::%s] window_length::%lld\n", __func__, w.window_length);

///debug for memory
// snprintf(NULL, 0, "dwn::%u\tdcn::%u", (uint32_t)aux_o->w_list.n, (uint32_t)aux_o->w_list.c.n);

for (i = k = 0; i < ol->length; i++) {
z = &(ol->list[i]);
if(z->is_match != 1) {
Expand All @@ -23514,11 +23521,19 @@ void gen_hc_r_alin_nec(overlap_region_alloc* ol, Candidates_list *cl, All_reads
if (rr > err) continue;
z->non_homopolymer_errors = re;

///debug for memory
// snprintf(NULL, 0, "dwn::%u\tdcn::%u", (uint32_t)aux_o->w_list.n, (uint32_t)aux_o->w_list.c.n);

if(!gen_hc_fast_cigar(z, cl, rref, w.window_length, qu->seq, tu, exz, aux_o, e_rate, ql, rid, khit, &re)) continue;

// if(z->x_id == 3196 && z->y_id == 3199) fprintf(stderr, "-1-[M::%s] tid::%u\t%.*s\trr::%f\tre::%ld\n", __func__, z->y_id, (int)Get_NAME_LENGTH(R_INF, z->y_id), Get_NAME(R_INF, z->y_id), rr, re);
///debug for memory
// snprintf(NULL, 0, "dwn::%u\tdcn::%u", (uint32_t)aux_o->w_list.n, (uint32_t)aux_o->w_list.c.n);

reassign_gaps(z, aux_o, qu->seq, ql, NULL, -1, rref, tu, buf);

///debug for memory
// snprintf(NULL, 0, "dwn::%u\tdcn::%u", (uint32_t)aux_o->w_list.n, (uint32_t)aux_o->w_list.c.n);
}

// if(z->x_id == 3196 && z->y_id == 3199) fprintf(stderr, "-2-[M::%s] tid::%u\t%.*s\trr::%f\tre::%u\n", __func__, z->y_id, (int)Get_NAME_LENGTH(R_INF, z->y_id), Get_NAME(R_INF, z->y_id), rr, z->non_homopolymer_errors);
Expand Down
22 changes: 14 additions & 8 deletions Hash_Table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2108,7 +2108,7 @@ uint64_t lchain_qdp_mcopy_fast(Candidates_list *cl, int64_t a_idx, int64_t a_n,
Chain_Data* dp, overlap_region_alloc* res, int64_t max_skip, int64_t max_iter,
int64_t max_dis, double chn_pen_gap, double chn_pen_skip, double bw_rate,
uint32_t xid, int64_t xl, int64_t yl, int64_t quick_check, uint32_t apend_be,
int64_t gen_cigar, int64_t enable_mcopy, double mcopy_rate, int64_t mcopy_khit_cutoff,
int64_t gen_cigar, int64_t mcopy_num, double mcopy_rate, int64_t mcopy_khit_cutoff,
int64_t khit_n)
{
if(a_n <= 0) return 0;
Expand Down Expand Up @@ -2186,19 +2186,21 @@ uint64_t lchain_qdp_mcopy_fast(Candidates_list *cl, int64_t a_idx, int64_t a_n,
}

for (i = msc_i, cL = 0; i >= 0; i = p[i]) { ii[i] = 1; t[cL++] = i;}///label the best chain
if((movl < xl) && enable_mcopy/**(movl < yl)**/) {


if(mcopy_num > 1) {
if(cL >= mcopy_khit_cutoff) {///if there are too few k-mers, disable mcopy
msc -= plus; min_sc = msc*mcopy_rate/**0.2**/; ii[msc_i] = 0;
for (i = ch_n = 0; i < a_n; ++i) {///make all f[] positive
f[i] -= plus; if(i >= ch_n) t[i] = 0;
if((!(ii[i])) && (f[i] >= min_sc)) {
if((!(ii[i])) && (f[i] >= min_sc)) {///!(ii[i]): skip the best chain
t[ch_n] = ((uint64_t)f[i])<<32; t[ch_n] += (i<<1); ch_n++;
}
}
if(ch_n > 1) {
int64_t n_v, n_v0, ni, n_u, n_u0 = res->length;
radix_sort_hc64i(t, t + ch_n);
for (k = ch_n-1, n_v = n_u = 0; k >= 0; --k) {
for (k = ch_n-1, n_v = n_u = 0; k >= 0 && n_u < mcopy_num; --k) {
n_v0 = n_v;
for (i = ((uint32_t)t[k])>>1; i >= 0 && (t[i]&1) == 0; ) {
ii[n_v++] = i; t[i] |= 1; i = p[i];
Expand All @@ -2209,19 +2211,20 @@ uint64_t lchain_qdp_mcopy_fast(Candidates_list *cl, int64_t a_idx, int64_t a_n,
kv_pushp_ol(overlap_region, (*res), &z);
push_ovlp_chain_qgen(z, xid, xl, yl, sc+plus, &(a[ii[n_v-1]]), &(a[ii[n_v0]]));
///mcopy_khit_cutoff <= 1: disable the mcopy_khit_cutoff filtering, for the realignment
if((mcopy_khit_cutoff <= 1) || ((z->x_pos_e+1-z->x_pos_s) <= (movl<<2))) {
// if((mcopy_khit_cutoff <= 1) || ((z->x_pos_e+1-z->x_pos_s) <= (movl<<2))) {
if((!n_u) || (n_v - n_v0 > 1)) {
z->align_length = n_v-n_v0; z->x_id = n_v0;
n_u++;
} else {///non-best is too long
} else {///non-best is tiny
res->length--; n_v = n_v0;
}
} else {
n_v = n_v0;
}
}

if(n_u > 1) ks_introsort_or_sss(n_u, res->list + n_u0);
res->length = n_u0 + filter_non_ovlp_xchains(res->list + n_u0, n_u, &n_v);
// if(n_u > 1) ks_introsort_or_sss(n_u, res->list + n_u0);
// res->length = n_u0 + filter_non_ovlp_xchains(res->list + n_u0, n_u, &n_v);
n_u = res->length;
if(n_u > n_u0 + 1) {
kv_resize_cl(k_mer_hit, (*cl), (n_v+cl->length));
Expand Down Expand Up @@ -2264,6 +2267,9 @@ uint64_t lchain_qdp_mcopy_fast(Candidates_list *cl, int64_t a_idx, int64_t a_n,
}
}
}



///a[] has been sorted by self_offset
// i = msc_i; cL = 0;
// while (i >= 0) {t[cL++] = i; i = p[i];}
Expand Down
8 changes: 4 additions & 4 deletions anchor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1920,7 +1920,7 @@ void lchain_qgen_mcopy(Candidates_list* cl, overlap_region_alloc* ol, uint32_t r
void lchain_qgen_mcopy_fast(Candidates_list* cl, overlap_region_alloc* ol, uint32_t rid, uint64_t rl, All_reads* rdb,
uint32_t apend_be, uint64_t max_n_chain, int64_t max_skip, int64_t max_iter,
int64_t max_dis, double chn_pen_gap, double chn_pen_skip, double bw_rate, int64_t quick_check,
uint32_t gen_off, int64_t enable_mcopy, double mcopy_rate, uint32_t chain_cutoff, uint32_t mcopy_khit_cut, st_mt_t *sp)
uint32_t gen_off, int64_t mcopy_num, double mcopy_rate, uint32_t chain_cutoff, uint32_t mcopy_khit_cut, st_mt_t *sp)
{
// fprintf(stderr, "+[M::%s] chain_cutoff::%u\n", __func__, chain_cutoff);
uint64_t i, k, l, m, cn = cl->length, yid, ol0, lch; overlap_region *r, t; ///srt = 0
Expand All @@ -1931,7 +1931,7 @@ void lchain_qgen_mcopy_fast(Candidates_list* cl, overlap_region_alloc* ol, uint3
if(cl->list[l].readID != rid) {
yid = cl->list[l].readID; ol0 = ol->length;
m += lchain_qdp_mcopy_fast(cl, l, k-l, m, &(cl->chainDP), ol, max_skip, max_iter, max_dis, chn_pen_gap, chn_pen_skip, bw_rate,
rid, rl, Get_READ_LENGTH((*rdb), yid), quick_check, apend_be, gen_off, enable_mcopy, mcopy_rate, mcopy_khit_cut, 1);
rid, rl, Get_READ_LENGTH((*rdb), yid), quick_check, apend_be, gen_off, mcopy_num, mcopy_rate, mcopy_khit_cut, 1);
if((chain_cutoff >= 2) && (!lch)) {
for (i = ol0; (i<ol->length) && (!lch); i++) {
if(ol->list[i].align_length < chain_cutoff) lch = 1;
Expand Down Expand Up @@ -2224,7 +2224,7 @@ void ul_map_lchain(ha_abufl_t *ab, uint32_t rid, char* rs, uint64_t rl, uint64_t
}

void h_ec_lchain(ha_abuf_t *ab, uint32_t rid, char* rs, uint64_t rl, uint64_t mz_w, uint64_t mz_k, All_reads *rref, overlap_region_alloc *overlap_list, Candidates_list *cl, double bw_thres,
int max_n_chain, int apend_be, kvec_t_u8_warp* k_flag, kvec_t_u64_warp* dbg_ct, st_mt_t *sp, uint32_t *high_occ, uint32_t *low_occ, uint32_t is_accurate, uint32_t gen_off, int64_t enable_mcopy, double mcopy_rate, uint32_t chain_cutoff, uint32_t mcopy_khit_cut)
int max_n_chain, int apend_be, kvec_t_u8_warp* k_flag, kvec_t_u64_warp* dbg_ct, st_mt_t *sp, uint32_t *high_occ, uint32_t *low_occ, uint32_t is_accurate, uint32_t gen_off, int64_t mcopy_num, double mcopy_rate, uint32_t chain_cutoff, uint32_t mcopy_khit_cut)
{
extern void *ha_flt_tab;
extern ha_pt_t *ha_idx;
Expand All @@ -2235,7 +2235,7 @@ void h_ec_lchain(ha_abuf_t *ab, uint32_t rid, char* rs, uint64_t rl, uint64_t mz
// lchain_gen(cl, overlap_list, rid, rl, NULL, uref, apend_be, f_cigar, max_n_chain, max_skip, max_iter, max_dis, chn_pen_gap, chn_pen_skip, bw_thres, quick_check, gen_off);
// lchain_qgen(cl, overlap_list, rid, rl, NULL, uref, apend_be, f_cigar, max_n_chain, max_skip, max_iter, max_dis, chn_pen_gap, chn_pen_skip, bw_thres, quick_check, gen_off);
///no need to sort here, overlap_list has been sorted at lchain_gen
lchain_qgen_mcopy_fast(cl, overlap_list, rid, rl, rref, apend_be, max_n_chain, max_skip, max_iter, max_dis, chn_pen_gap, chn_pen_skip, bw_thres, quick_check, gen_off, enable_mcopy, mcopy_rate, chain_cutoff, mcopy_khit_cut, sp);
lchain_qgen_mcopy_fast(cl, overlap_list, rid, rl, rref, apend_be, max_n_chain, max_skip, max_iter, max_dis, chn_pen_gap, chn_pen_skip, bw_thres, quick_check, gen_off, mcopy_num, mcopy_rate, chain_cutoff, mcopy_khit_cut, sp);
}

void h_ec_lchain_amz(ha_abuf_t *ab, uint32_t rid, char* rs, uint64_t rl, uint64_t mz_w, uint64_t mz_k, All_reads *rref, overlap_region_alloc *overlap_list, Candidates_list *cl, double bw_thres,
Expand Down
Loading

0 comments on commit 49dd83d

Please sign in to comment.