Skip to content

Commit

Permalink
Systems Check NB
Browse files Browse the repository at this point in the history
  • Loading branch information
straussmaximilian committed Mar 27, 2022
1 parent 24f0bf2 commit 0959205
Show file tree
Hide file tree
Showing 3 changed files with 4,757 additions and 4,769 deletions.
128 changes: 67 additions & 61 deletions alphapept/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,97 +1011,103 @@ def search_fasta_block(to_process:tuple) -> (list, int):
int: Number of new peptides that were generated in this iteration.
"""

try:
fasta_index, fasta_block, ms_files, settings = to_process

settings_ = settings[0]
spectra_block = settings_['fasta']['spectra_block']
to_add = List()

fasta_index, fasta_block, ms_files, settings = to_process
psms_container = [list() for _ in ms_files]

settings_ = settings[0]
spectra_block = settings_['fasta']['spectra_block']
to_add = List()
f_index = 0

psms_container = [list() for _ in ms_files]
pept_dict = {}
for element in fasta_block:
sequence = element["sequence"]
mod_peptides = generate_peptides(sequence, **settings_['fasta'])

f_index = 0
pept_dict, added_peptides = add_to_pept_dict(pept_dict, mod_peptides, fasta_index+f_index)

pept_dict = {}
for element in fasta_block:
sequence = element["sequence"]
mod_peptides = generate_peptides(sequence, **settings_['fasta'])
if len(added_peptides) > 0:
to_add.extend(added_peptides)

pept_dict, added_peptides = add_to_pept_dict(pept_dict, mod_peptides, fasta_index+f_index)
f_index += 1

if len(added_peptides) > 0:
to_add.extend(added_peptides)

f_index += 1
if len(to_add) > 0:
for seq_block in blocks(to_add, spectra_block):

spectra = generate_spectra(seq_block, mass_dict)

if len(to_add) > 0:
for seq_block in blocks(to_add, spectra_block):
precmasses, seqs, fragmasses, fragtypes = zip(*spectra)
sortindex = np.argsort(precmasses)

spectra = generate_spectra(seq_block, mass_dict)
fragmasses = np.array(fragmasses, dtype=object)[sortindex]
fragtypes = np.array(fragtypes, dtype=object)[sortindex]

precmasses, seqs, fragmasses, fragtypes = zip(*spectra)
sortindex = np.argsort(precmasses)
lens = [len(_) for _ in fragmasses]

fragmasses = np.array(fragmasses, dtype=object)[sortindex]
fragtypes = np.array(fragtypes, dtype=object)[sortindex]
n_frags = sum(lens)

lens = [len(_) for _ in fragmasses]
frags = np.zeros(n_frags, dtype=fragmasses[0].dtype)
frag_types = np.zeros(n_frags, dtype=fragtypes[0].dtype)

n_frags = sum(lens)
indices = np.zeros(len(lens) + 1, np.int64)
indices[1:] = lens
indices = np.cumsum(indices)

frags = np.zeros(n_frags, dtype=fragmasses[0].dtype)
frag_types = np.zeros(n_frags, dtype=fragtypes[0].dtype)
#Fill data

indices = np.zeros(len(lens) + 1, np.int64)
indices[1:] = lens
indices = np.cumsum(indices)
for _ in range(len(indices)-1):
start = indices[_]
end = indices[_+1]
frags[start:end] = fragmasses[_]
frag_types[start:end] = fragtypes[_]

#Fill data
db_data = {}

for _ in range(len(indices)-1):
start = indices[_]
end = indices[_+1]
frags[start:end] = fragmasses[_]
frag_types[start:end] = fragtypes[_]
db_data["precursors"] = np.array(precmasses)[sortindex]
db_data["seqs"] = np.array(seqs)[sortindex]

db_data = {}
db_data["fragmasses"] = frags
db_data["fragtypes"] = frag_types
db_data["indices"] = indices

db_data["precursors"] = np.array(precmasses)[sortindex]
db_data["seqs"] = np.array(seqs)[sortindex]
for file_idx, ms_file in enumerate(ms_files):
query_data = alphapept.io.MS_Data_File(
f"{ms_file}"
).read_DDA_query_data(swmr=True)

db_data["fragmasses"] = frags
db_data["fragtypes"] = frag_types
db_data["indices"] = indices
try:
features = alphapept.io.MS_Data_File(
ms_file
).read(dataset_name="features",swmr=True)
except FileNotFoundError:
features = None
except KeyError:
features = None

for file_idx, ms_file in enumerate(ms_files):
query_data = alphapept.io.MS_Data_File(
f"{ms_file}"
).read_DDA_query_data(swmr=True)
psms, num_specs_compared = get_psms(query_data, db_data, features, **settings[file_idx]["search"])

try:
features = alphapept.io.MS_Data_File(
ms_file
).read(dataset_name="features",swmr=True)
except FileNotFoundError:
features = None
except KeyError:
features = None
if len(psms) > 0:
#This could be speed up..
psms, fragment_ions = get_score_columns(psms, query_data, db_data, features, **settings[file_idx]["search"])

psms, num_specs_compared = get_psms(query_data, db_data, features, **settings[file_idx]["search"])
fasta_indices = [set(x for x in pept_dict[_]) for _ in psms['sequence']]

if len(psms) > 0:
#This could be speed up..
psms, fragment_ions = get_score_columns(psms, query_data, db_data, features, **settings[file_idx]["search"])
psms_df = pd.DataFrame(psms)
psms_df['fasta_index'] = fasta_indices

fasta_indices = [set(x for x in pept_dict[_]) for _ in psms['sequence']]
psms_container[file_idx].append(psms_df)

psms_df = pd.DataFrame(psms)
psms_df['fasta_index'] = fasta_indices
success = True

psms_container[file_idx].append(psms_df)
except Exception as e:
logging.error(f'Search of block {fasta_index} failed. Exception {e}.')
success = f"{e}"

return psms_container, len(to_add)
return psms_container, len(to_add), success

# Cell

Expand Down
130 changes: 68 additions & 62 deletions nbs/05_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1638,97 +1638,103 @@
" int: Number of new peptides that were generated in this iteration.\n",
" \"\"\" \n",
"\n",
" try:\n",
" fasta_index, fasta_block, ms_files, settings = to_process\n",
"\n",
" fasta_index, fasta_block, ms_files, settings = to_process\n",
"\n",
" settings_ = settings[0]\n",
" spectra_block = settings_['fasta']['spectra_block']\n",
" to_add = List()\n",
" settings_ = settings[0]\n",
" spectra_block = settings_['fasta']['spectra_block']\n",
" to_add = List()\n",
"\n",
" psms_container = [list() for _ in ms_files]\n",
" psms_container = [list() for _ in ms_files]\n",
"\n",
" f_index = 0\n",
" f_index = 0\n",
"\n",
" pept_dict = {}\n",
" for element in fasta_block:\n",
" sequence = element[\"sequence\"]\n",
" mod_peptides = generate_peptides(sequence, **settings_['fasta'])\n",
" pept_dict = {}\n",
" for element in fasta_block:\n",
" sequence = element[\"sequence\"]\n",
" mod_peptides = generate_peptides(sequence, **settings_['fasta'])\n",
"\n",
" pept_dict, added_peptides = add_to_pept_dict(pept_dict, mod_peptides, fasta_index+f_index)\n",
" pept_dict, added_peptides = add_to_pept_dict(pept_dict, mod_peptides, fasta_index+f_index)\n",
"\n",
" if len(added_peptides) > 0:\n",
" to_add.extend(added_peptides)\n",
" if len(added_peptides) > 0:\n",
" to_add.extend(added_peptides)\n",
"\n",
" f_index += 1\n",
" f_index += 1\n",
"\n",
"\n",
" if len(to_add) > 0:\n",
" for seq_block in blocks(to_add, spectra_block):\n",
" if len(to_add) > 0:\n",
" for seq_block in blocks(to_add, spectra_block):\n",
"\n",
" spectra = generate_spectra(seq_block, mass_dict)\n",
" spectra = generate_spectra(seq_block, mass_dict)\n",
"\n",
" precmasses, seqs, fragmasses, fragtypes = zip(*spectra)\n",
" sortindex = np.argsort(precmasses)\n",
" precmasses, seqs, fragmasses, fragtypes = zip(*spectra)\n",
" sortindex = np.argsort(precmasses)\n",
"\n",
" fragmasses = np.array(fragmasses, dtype=object)[sortindex]\n",
" fragtypes = np.array(fragtypes, dtype=object)[sortindex]\n",
" fragmasses = np.array(fragmasses, dtype=object)[sortindex]\n",
" fragtypes = np.array(fragtypes, dtype=object)[sortindex]\n",
"\n",
" lens = [len(_) for _ in fragmasses]\n",
" lens = [len(_) for _ in fragmasses]\n",
"\n",
" n_frags = sum(lens)\n",
" n_frags = sum(lens)\n",
"\n",
" frags = np.zeros(n_frags, dtype=fragmasses[0].dtype)\n",
" frag_types = np.zeros(n_frags, dtype=fragtypes[0].dtype)\n",
" frags = np.zeros(n_frags, dtype=fragmasses[0].dtype)\n",
" frag_types = np.zeros(n_frags, dtype=fragtypes[0].dtype)\n",
"\n",
" indices = np.zeros(len(lens) + 1, np.int64)\n",
" indices[1:] = lens\n",
" indices = np.cumsum(indices)\n",
" indices = np.zeros(len(lens) + 1, np.int64)\n",
" indices[1:] = lens\n",
" indices = np.cumsum(indices)\n",
"\n",
" #Fill data\n",
" #Fill data\n",
"\n",
" for _ in range(len(indices)-1):\n",
" start = indices[_]\n",
" end = indices[_+1]\n",
" frags[start:end] = fragmasses[_]\n",
" frag_types[start:end] = fragtypes[_]\n",
" for _ in range(len(indices)-1):\n",
" start = indices[_]\n",
" end = indices[_+1]\n",
" frags[start:end] = fragmasses[_]\n",
" frag_types[start:end] = fragtypes[_]\n",
"\n",
" db_data = {}\n",
" db_data = {}\n",
"\n",
" db_data[\"precursors\"] = np.array(precmasses)[sortindex]\n",
" db_data[\"seqs\"] = np.array(seqs)[sortindex]\n",
" db_data[\"precursors\"] = np.array(precmasses)[sortindex]\n",
" db_data[\"seqs\"] = np.array(seqs)[sortindex]\n",
"\n",
" db_data[\"fragmasses\"] = frags\n",
" db_data[\"fragtypes\"] = frag_types\n",
" db_data[\"indices\"] = indices\n",
" db_data[\"fragmasses\"] = frags\n",
" db_data[\"fragtypes\"] = frag_types\n",
" db_data[\"indices\"] = indices\n",
"\n",
" for file_idx, ms_file in enumerate(ms_files):\n",
" query_data = alphapept.io.MS_Data_File(\n",
" f\"{ms_file}\"\n",
" ).read_DDA_query_data(swmr=True)\n",
" for file_idx, ms_file in enumerate(ms_files):\n",
" query_data = alphapept.io.MS_Data_File(\n",
" f\"{ms_file}\"\n",
" ).read_DDA_query_data(swmr=True)\n",
"\n",
" try:\n",
" features = alphapept.io.MS_Data_File(\n",
" ms_file\n",
" ).read(dataset_name=\"features\",swmr=True)\n",
" except FileNotFoundError:\n",
" features = None\n",
" except KeyError:\n",
" features = None\n",
" try:\n",
" features = alphapept.io.MS_Data_File(\n",
" ms_file\n",
" ).read(dataset_name=\"features\",swmr=True)\n",
" except FileNotFoundError:\n",
" features = None\n",
" except KeyError:\n",
" features = None\n",
"\n",
" psms, num_specs_compared = get_psms(query_data, db_data, features, **settings[file_idx][\"search\"])\n",
" psms, num_specs_compared = get_psms(query_data, db_data, features, **settings[file_idx][\"search\"])\n",
"\n",
" if len(psms) > 0:\n",
" #This could be speed up..\n",
" psms, fragment_ions = get_score_columns(psms, query_data, db_data, features, **settings[file_idx][\"search\"])\n",
" if len(psms) > 0:\n",
" #This could be speed up..\n",
" psms, fragment_ions = get_score_columns(psms, query_data, db_data, features, **settings[file_idx][\"search\"])\n",
"\n",
" fasta_indices = [set(x for x in pept_dict[_]) for _ in psms['sequence']]\n",
" fasta_indices = [set(x for x in pept_dict[_]) for _ in psms['sequence']]\n",
"\n",
" psms_df = pd.DataFrame(psms)\n",
" psms_df['fasta_index'] = fasta_indices\n",
" psms_df = pd.DataFrame(psms)\n",
" psms_df['fasta_index'] = fasta_indices\n",
"\n",
" psms_container[file_idx].append(psms_df)\n",
" psms_container[file_idx].append(psms_df)\n",
" \n",
" success = True\n",
"\n",
" return psms_container, len(to_add)"
" except Exception as e:\n",
" logging.error(f'Search of block {fasta_index} failed. Exception {e}.')\n",
" success = f\"{e}\"\n",
" \n",
" return psms_container, len(to_add), success"
]
},
{
Expand Down
Loading

0 comments on commit 0959205

Please sign in to comment.