Skip to content

Commit

Permalink
Merge pull request #494 from MannLabs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
straussmaximilian authored Aug 17, 2022
2 parents d1ce3a0 + defeb45 commit 51b80f5
Show file tree
Hide file tree
Showing 43 changed files with 8,206 additions and 6,842 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.7
current_version = 0.4.8
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ jobs:
run: |
pip install -U setuptools
pip install -U wheel
pip install nbdev jupyter
pip install "nbdev<=1.2.11"
pip install jupyter
pip install ".[gui-stable]"
- name: Read all notebooks
run: |
Expand Down
2 changes: 1 addition & 1 deletion alphapept/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.4.7"
__version__ = "0.4.8"

__requirements__ = {
"": "requirements/requirements.txt",
Expand Down
2 changes: 1 addition & 1 deletion alphapept/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
AUTHOR_EMAIL = "straussmaximilian@gmail.com"
COPYRIGHT = "Mann Labs"
BRANCH = "master"
VERSION_NO = "0.4.7"
VERSION_NO = "0.4.8"
MIN_PYTHON = "3.6"
MAX_PYTHON = "4"
AUDIENCE = "Developers"
Expand Down
4 changes: 4 additions & 0 deletions alphapept/_nbdev.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@
"add_column": "05_search.ipynb",
"remove_column": "05_search.ipynb",
"get_hits": "05_search.ipynb",
"FRAG_DTYPE": "05_search.ipynb",
"LOSS_DICT": "05_search.ipynb",
"LOSSES": "05_search.ipynb",
"get_sequences": "05_search.ipynb",
Expand All @@ -189,6 +190,8 @@
"cut_global_fdr": "06_score.ipynb",
"get_x_tandem_score": "06_score.ipynb",
"score_x_tandem": "06_score.ipynb",
"get_generic_score": "06_score.ipynb",
"score_generic": "06_score.ipynb",
"filter_with_x_tandem": "06_score.ipynb",
"filter_with_score": "06_score.ipynb",
"score_psms": "06_score.ipynb",
Expand All @@ -212,6 +215,7 @@
"chunks": "07_recalibration.ipynb",
"density_scatter": "07_recalibration.ipynb",
"save_fragment_calibration": "07_recalibration.ipynb",
"save_precursor_calibration": "07_recalibration.ipynb",
"calibrate_fragments_nn": "07_recalibration.ipynb",
"calibrate_hdf": "07_recalibration.ipynb",
"get_db_targets": "07_recalibration.ipynb",
Expand Down
2 changes: 1 addition & 1 deletion alphapept/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def get_mass_dict(modfile:str, aasfile: str, verbose:bool=True):
mass_dict["OH"] = mass_dict["Oxygen"] + mass_dict["Hydrogen"] # OH mass
mass_dict["H2O"] = mass_dict["Oxygen"] + 2 * mass_dict["Hydrogen"] # H2O mass

mass_dict["NH3"] = 17.03052
mass_dict["NH3"] = 17.02654910112
mass_dict["delta_M"] = 1.00286864
mass_dict["delta_S"] = 0.0109135

Expand Down
1 change: 1 addition & 0 deletions alphapept/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1258,6 +1258,7 @@ def parallel_execute(
memory_available = psutil.virtual_memory().available/1024**3
n_processes_temp = max((int(memory_available //8 ), 1)) # 8 gb per file: Todo: make this better
n_processes = min((n_processes, n_processes_temp))
n_processes = min((n_processes, n_files)) #not more processes than files.
logging.info(f'Searching. Setting Process limit to {n_processes}.')


Expand Down
121 changes: 103 additions & 18 deletions alphapept/recalibration.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/07_recalibration.ipynb (unless otherwise specified).

__all__ = ['remove_outliers', 'transform', 'kneighbors_calibration', 'get_calibration', 'chunks', 'density_scatter',
'save_fragment_calibration', 'calibrate_fragments_nn', 'calibrate_hdf', 'get_db_targets', 'align_run_to_db',
'calibrate_fragments']
'save_fragment_calibration', 'save_precursor_calibration', 'calibrate_fragments_nn', 'calibrate_hdf',
'get_db_targets', 'align_run_to_db', 'calibrate_fragments']

# Cell

Expand Down Expand Up @@ -131,9 +131,11 @@ def kneighbors_calibration(df: pd.DataFrame, features: pd.DataFrame, cols: list,
def get_calibration(
df: pd.DataFrame,
features:pd.DataFrame,
file_name = '',
settings = None,
outlier_std: float = 3,
calib_n_neighbors: int = 100,
calib_mz_range: int = 20,
calib_mz_range: int = 100,
calib_rt_range: float = 0.5,
calib_mob_range: float = 0.3,
**kwargs) -> (np.ndarray, float):
Expand All @@ -157,7 +159,6 @@ def get_calibration(
"""



target = 'prec_offset_ppm'
cols = ['mz','rt']

Expand All @@ -173,16 +174,29 @@ def get_calibration(

if len(df_sub) > calib_n_neighbors:

y_hat = kneighbors_calibration(df_sub, features, cols, target, scaling_dict, calib_n_neighbors)
y_hat_ = kneighbors_calibration(df_sub, features, cols, target, scaling_dict, calib_n_neighbors) #ppm
corrected_mass = (1-y_hat_/1e6) * features['mass_matched']

feature_lookup_dict = features['feature_idx'].to_dict()
feature_lookup_dict_r = {v:k for k,v in feature_lookup_dict.items()}
features.iloc[df_sub['feature_idx'].apply(lambda x: feature_lookup_dict_r[x]).values]
y_hat = y_hat_[df_sub['feature_idx'].apply(lambda x: feature_lookup_dict_r[x]).values]

corrected_mass = (1-y_hat/1e6) * features['mass_matched']
#Correction
correction = df_sub['prec_offset_ppm'].values - y_hat

y_hat_std = y_hat.std()
y_hat_std = correction.std()

mad_offset = np.median(np.absolute(y_hat - np.median(y_hat)))
mad_offset = np.median(np.absolute(correction - np.median(correction)))

logging.info(f'Precursor calibration std {y_hat_std:.2f}, {mad_offset:.2f}')

if settings is not None:
logging.info(f'Saving precursor calibration')
df_sub['delta_ppm'] = df_sub['prec_offset_ppm']

save_precursor_calibration(df_sub, correction, y_hat_std, file_name, settings)

return corrected_mass, y_hat_std, mad_offset


Expand Down Expand Up @@ -245,8 +259,8 @@ def save_fragment_calibration(fragment_ions, corrected, std_offset, file_name, s
ax2 = density_scatter(fragment_ions['rt'].values, corrected.values, ax = ax2)
ax1.axhline(0, color='w', linestyle='-', alpha=0.5)
ax2.axhline(0, color='w', linestyle='-', alpha=0.5)
ax2.axhline(0+std_offset*settings['search']['calibration_std_frag'], color='w', linestyle=':', alpha=0.5)
ax2.axhline(0-std_offset*settings['search']['calibration_std_frag'], color='w', linestyle=':', alpha=0.5)
ax2.axhline(0+std_offset*settings['search']['calibration_std_frag'], color='r', linestyle=':', alpha=0.5)
ax2.axhline(0-std_offset*settings['search']['calibration_std_frag'], color='r', linestyle=':', alpha=0.5)

ax2.set_title('Fragment error after correction')
ax2.set_ylabel('Error (ppm)')
Expand All @@ -268,17 +282,71 @@ def save_fragment_calibration(fragment_ions, corrected, std_offset, file_name, s
ax4.set_title('Fragment error after correction')

ax4.axhline(0, color='w', linestyle='-', alpha=0.5)
ax4.axhline(0+std_offset*settings['search']['calibration_std_frag'], color='w', linestyle=':', alpha=0.5)
ax4.axhline(0-std_offset*settings['search']['calibration_std_frag'], color='w', linestyle=':', alpha=0.5)
ax4.axhline(0+std_offset*settings['search']['calibration_std_frag'], color='r', linestyle=':', alpha=0.5)
ax4.axhline(0-std_offset*settings['search']['calibration_std_frag'], color='r', linestyle=':', alpha=0.5)

base, ext = os.path.splitext(file_name)

plt.suptitle(os.path.split(file_name)[1])
plt.suptitle(f"Fragment {os.path.split(file_name)[1]}")

for ax in [ax1, ax2, ax3, ax4]:
ax.set_ylim([-settings['search']['frag_tol'], settings['search']['frag_tol']])

plt.savefig(base+'_calibration.png')
plt.savefig(base+'_frag_calib.png')

def save_precursor_calibration(df, corrected, std_offset, file_name, settings):

f, axes = plt.subplots(2, 2, figsize=(20,10))

ax1 = axes[0,0]
ax2 = axes[1,0]
ax3 = axes[0,1]
ax4 = axes[1,1]

ax1 = density_scatter(df['rt'].values, df['delta_ppm'].values, ax = ax1)
ax1.set_title('Precursor error before correction')
ax1.axhline(0, color='w', linestyle='-', alpha=0.5)
ax1.set_ylabel('Error (ppm)')
ax1.set_xlabel('RT (min)')

ax2 = density_scatter(df['rt'].values, corrected, ax = ax2)
ax1.axhline(0, color='w', linestyle='-', alpha=0.5)
ax2.axhline(0, color='w', linestyle='-', alpha=0.5)
ax2.axhline(0+std_offset*settings['search']['calibration_std_prec'], color='r', linestyle=':', alpha=0.5)
ax2.axhline(0-std_offset*settings['search']['calibration_std_prec'], color='r', linestyle=':', alpha=0.5)

ax2.set_title('Precursor error after correction')
ax2.set_ylabel('Error (ppm)')
ax2.set_xlabel('RT (min)')

ax3 = density_scatter(df['mz'].values, df['delta_ppm'].values, bins=50, ax = ax3)
ax3.axhline(0, color='w', linestyle='-', alpha=0.5)

ax3.set_ylabel('Error (ppm)')
ax3.set_xlabel('m/z')
ax3.set_xlim([100,1500])
ax3.set_title('Precursor error before correction')

ax4 = density_scatter(df['mz'].values, corrected, bins=50, ax = ax4)

ax4.set_ylabel('Error (ppm)')
ax4.set_xlabel('m/z')
ax4.set_xlim([100, 1500])
ax4.set_title('Precursor error after correction')

ax4.axhline(0, color='w', linestyle='-', alpha=0.5)
ax4.axhline(0+std_offset*settings['search']['calibration_std_prec'], color='r', linestyle=':', alpha=0.5)
ax4.axhline(0-std_offset*settings['search']['calibration_std_prec'], color='r', linestyle=':', alpha=0.5)

base, ext = os.path.splitext(file_name)

plt.suptitle(f"Precursor {os.path.split(file_name)[1]}")

for ax in [ax1, ax2, ax3, ax4]:
ax.set_ylim([-settings['search']['prec_tol'], settings['search']['prec_tol']])

plt.savefig(base+'_prec_calib.png')


def calibrate_fragments_nn(ms_file_, file_name, settings):
logging.info('Starting fragment calibration.')
Expand All @@ -296,14 +364,30 @@ def calibrate_fragments_nn(ms_file_, file_name, settings):
psms = ms_file_.read(dataset_name='first_search')
psms['psms_index'] = np.arange(len(psms))

df = score_x_tandem(
df = score_generic(
psms,
fdr_level=settings["search"]["peptide_fdr"],
plot=False,
verbose=False,
**settings["search"]
)

#Only calibrate on b & y
fragment_ions = fragment_ions[fragment_ions['fragment_ion_type'] == 0]
fragment_ions['delta_ppm'] = ((fragment_ions['db_mass'] - fragment_ions['fragment_ion_mass'])/((fragment_ions['db_mass'] + fragment_ions['fragment_ion_mass'])/2)*1e6).values


##Outlier removal

calib_std = settings["calibration"]['outlier_std']

delta_ppm = fragment_ions['delta_ppm']

upper_bound = delta_ppm[delta_ppm>0].std()*calib_std
lower_bound = -delta_ppm[delta_ppm<0].std()*calib_std

fragment_ions = fragment_ions[(delta_ppm<upper_bound)&(delta_ppm>lower_bound)]

#Calculate offset
psms['keep'] = False
psms.loc[df['psms_index'].tolist(),'keep'] = True
Expand All @@ -319,7 +403,6 @@ def calibrate_fragments_nn(ms_file_, file_name, settings):
logging.info(f'Minimum hits for fragments after score {min_score:.2f}.')

fragment_ions['rt'] = psms['rt'][fragment_ions['psms_idx'].values.astype('int')].values
fragment_ions['delta_ppm'] = ((fragment_ions['db_mass'] - fragment_ions['fragment_ion_mass'])/((fragment_ions['db_mass'] + fragment_ions['fragment_ion_mass'])/2)*1e6).values

if len(fragment_ions) >= calib_n_neighbors:

Expand Down Expand Up @@ -377,7 +460,7 @@ def calibrate_fragments_nn(ms_file_, file_name, settings):

from typing import Union
import alphapept.io
from .score import score_x_tandem
from .score import score_generic
import os


Expand Down Expand Up @@ -413,7 +496,7 @@ def calibrate_hdf(
df = None

if len(psms) > 0 :
df = score_x_tandem(
df = score_generic(
psms,
fdr_level=settings["search"]["peptide_fdr"],
plot=False,
Expand All @@ -424,6 +507,8 @@ def calibrate_hdf(
corrected_mass, prec_offset_ppm_std, prec_offset_ppm_mad = get_calibration(
df,
features,
file_name,
settings,
**settings["calibration"]
)
ms_file_.write(
Expand Down
Loading

0 comments on commit 51b80f5

Please sign in to comment.