From 7dcacd2bb39608c6f4ee56fb6827c14e4b926e0f Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Mon, 29 Jan 2024 13:17:57 +0100
Subject: [PATCH 01/18] Add Ella's charge prediction

---
 peptdeep/model/charge.py | 108 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 peptdeep/model/charge.py

diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
new file mode 100644
index 00000000..b57c6266
--- /dev/null
+++ b/peptdeep/model/charge.py
@@ -0,0 +1,108 @@
+import pandas as pd
+import numpy as np
+
+
+from peptdeep.model.generic_property_prediction import (
+    ModelInterface_for_Generic_AASeq_MultiLabelClassification,
+    Model_for_Generic_AASeq_BinaryClassification_Transformer,
+    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
+    Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
+)
+
+class ChargeModelForAASeq(
+    ModelInterface_for_Generic_AASeq_MultiLabelClassification
+):
+    def __init__(self, min_charge:int, max_charge:int):
+        super().__init__(
+            num_target_values=max_charge-min_charge+1,
+            model_class=Model_for_Generic_AASeq_BinaryClassification_Transformer,
+            nlayers=4, hidden_dim=128, dropout=0.1
+        )
+
+        self.target_column_to_predict = "charge_probs"
+        self.target_column_to_train = "charge_indicators"
+        self.min_charge = min_charge
+        self.max_charge = max_charge
+        self.charge_range = np.arange(min_charge, max_charge+1, dtype=np.int8)
+        
+    def predict_charges_for_pep_df(self, 
+        pep_df:pd.DataFrame, 
+        charge_prob=0.3,
+        drop_probs_column=True
+    ):
+        df = self.predict(pep_df)
+        df["charge"] = self.charge_probs.apply(
+            lambda x: self.charge_range[x>charge_prob]
+        )
+        df = df.explode("charge").dropna(subset=["charge"])
+        if drop_probs_column:
+            df.drop(columns="charge_probs", inplace=True)
+        df["charge"] = df.charge.astype(np.int8)
+        return df
+    
+class ChargeModelForModAASeq(
+    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification
+):
+    def __init__(self, min_charge:int, max_charge:int):
+        super().__init__(
+            num_target_values=max_charge-min_charge+1,
+            model_class=Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
+            nlayers=4, hidden_dim=128, dropout=0.1
+        )
+
+        self.target_column_to_predict = "charge_probs"
+        self.target_column_to_train = "charge_indicators"
+        self.min_charge = min_charge
+        self.max_charge = max_charge
+        self.charge_range = np.arange(
+            min_charge, max_charge+1, dtype=np.int8
+        )
+        
+    def predict_charges_for_pep_df(self, 
+        pep_df:pd.DataFrame, 
+        charge_prob=0.3,
+        drop_probs_column=True
+    ):
+        df = self.predict(pep_df)
+        df["charge"] = self.charge_probs.apply(
+            lambda x: self.charge_range[x>charge_prob]
+        )
+        df = df.explode("charge").dropna(subset=["charge"])
+        if drop_probs_column:
+            df.drop(columns="charge_probs", inplace=True)
+        df["charge"] = df.charge.astype(np.int8)
+        return df
+
+def group_psm_df_by_sequence(
+    psm_df: pd.DataFrame,
+    min_charge:int,
+    max_charge:int,
+):
+    return psm_df.groupby("sequence")["charge"].apply(
+        lambda x: get_charge_indicators(set(x),
+            min_charge=min_charge, max_charge=max_charge
+        )
+    ).reset_index(drop=False).rename(columns={"charge":"charge_indicators"})
+
+
+def group_psm_df_by_modseq(
+    psm_df: pd.DataFrame,
+    min_charge:int,
+    max_charge:int,
+):
+    return psm_df.groupby(["sequence","mods","mod_sites"])["charge"].apply(
+        lambda x: get_charge_indicators(set(x),
+            min_charge=min_charge, max_charge=max_charge
+        )
+    ).reset_index(drop=False).rename(columns={"charge":"charge_indicators"})
+
+def get_charge_indicators(
+    charge_list,
+    min_charge:int,
+    max_charge:int,
+):
+    charge_indicators = np.zeros(max_charge-min_charge+1)
+    for charge in charge_list:
+        if charge <= max_charge and charge >= min_charge:
+            charge_indicators[charge-min_charge] = 1.0
+    return charge_indicators
\ No newline at end of file

From d9b5ad2251ab3e4aaaac04a8726248efad768311 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Mon, 29 Jan 2024 15:46:37 +0100
Subject: [PATCH 02/18] test nbs for charge pred

---
 nbdev_nbs/model/charge.ipynb | 244 +++++++++++++++++++++++++++++++++++
 peptdeep/model/charge.py     |  34 +++--
 2 files changed, 264 insertions(+), 14 deletions(-)
 create mode 100644 nbdev_nbs/model/charge.ipynb

diff --git a/nbdev_nbs/model/charge.ipynb b/nbdev_nbs/model/charge.ipynb
new file mode 100644
index 00000000..daadf88e
--- /dev/null
+++ b/nbdev_nbs/model/charge.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>charge_indicators</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>charge_probs</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABCDE</td>\n",
+       "      <td>[1, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>[0.7503374, 0.18496446]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.29431552, 0.5896796]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.7241462, 0.6150697]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>RSTUVWXYZ</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>9</td>\n",
+       "      <td>[0.30657992, 0.22709145]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    sequence charge_indicators  nAA              charge_probs\n",
+       "0      ABCDE            [1, 0]    5   [0.7503374, 0.18496446]\n",
+       "1     FGHIJK            [0, 1]    6   [0.29431552, 0.5896796]\n",
+       "2     LMNOPQ            [1, 1]    6    [0.7241462, 0.6150697]\n",
+       "3  RSTUVWXYZ            [0, 0]    9  [0.30657992, 0.22709145]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from peptdeep.model.charge import *\n",
+    "import pandas as pd\n",
+    "\n",
+    "model = ChargeModelForAASeq(min_charge=1, max_charge=2)\n",
+    "\n",
+    "df = pd.DataFrame({\n",
+    "    'sequence': ['ABCDE','FGHIJK','LMNOPQ','RSTUVWXYZ'],\n",
+    "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0]],\n",
+    "})\n",
+    "model.train(df)\n",
+    "model.predict(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>mods</th>\n",
+       "      <th>mod_sites</th>\n",
+       "      <th>charge_indicators</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>charge_probs</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABCDE</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[1, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>[0.80226785, 0.19673407]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.3100456, 0.6222909]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.7553099, 0.66014636]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>RSTUVWXYZ</td>\n",
+       "      <td>Phospho@T</td>\n",
+       "      <td>3</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>9</td>\n",
+       "      <td>[0.28392678, 0.25133142]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    sequence         mods mod_sites charge_indicators  nAA  \\\n",
+       "0      ABCDE                                   [1, 0]    5   \n",
+       "1     FGHIJK                                   [0, 1]    6   \n",
+       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6   \n",
+       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9   \n",
+       "\n",
+       "               charge_probs  \n",
+       "0  [0.80226785, 0.19673407]  \n",
+       "1    [0.3100456, 0.6222909]  \n",
+       "2   [0.7553099, 0.66014636]  \n",
+       "3  [0.28392678, 0.25133142]  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from peptdeep.model.charge import *\n",
+    "import pandas as pd\n",
+    "\n",
+    "model = ChargeModelForModAASeq(min_charge=1, max_charge=2)\n",
+    "\n",
+    "df = pd.DataFrame({\n",
+    "    'sequence': ['ABCDE','FGHIJK','LMNOPQ','RSTUVWXYZ'],\n",
+    "    'mods': ['', '', 'Oxidation@M', 'Phospho@T'],\n",
+    "    'mod_sites': ['', '', '2', '3'],\n",
+    "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0]],\n",
+    "})\n",
+    "model.train(df)\n",
+    "model.predict(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index b57c6266..5c0fc631 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -8,14 +8,17 @@
     ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
     Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
 )
-
-class ChargeModelForAASeq(
-    ModelInterface_for_Generic_AASeq_MultiLabelClassification
+    
+class ChargeModelForModAASeq(
+    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification
 ):
-    def __init__(self, min_charge:int, max_charge:int):
+    """
+    ModelInterface for charge prediction for modified peptides
+    """
+    def __init__(self, min_charge:int=1, max_charge:int=6):
         super().__init__(
             num_target_values=max_charge-min_charge+1,
-            model_class=Model_for_Generic_AASeq_BinaryClassification_Transformer,
+            model_class=Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
             nlayers=4, hidden_dim=128, dropout=0.1
         )
 
@@ -23,7 +26,9 @@ def __init__(self, min_charge:int, max_charge:int):
         self.target_column_to_train = "charge_indicators"
         self.min_charge = min_charge
         self.max_charge = max_charge
-        self.charge_range = np.arange(min_charge, max_charge+1, dtype=np.int8)
+        self.charge_range = np.arange(
+            min_charge, max_charge+1, dtype=np.int8
+        )
         
     def predict_charges_for_pep_df(self, 
         pep_df:pd.DataFrame, 
@@ -39,14 +44,17 @@ def predict_charges_for_pep_df(self,
             df.drop(columns="charge_probs", inplace=True)
         df["charge"] = df.charge.astype(np.int8)
         return df
-    
-class ChargeModelForModAASeq(
-    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification
+
+class ChargeModelForAASeq(
+    ModelInterface_for_Generic_AASeq_MultiLabelClassification
 ):
-    def __init__(self, min_charge:int, max_charge:int):
+    """
+    ModelInterface for charge prediction for amino acid sequence
+    """
+    def __init__(self, min_charge:int=1, max_charge:int=6):
         super().__init__(
             num_target_values=max_charge-min_charge+1,
-            model_class=Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
+            model_class=Model_for_Generic_AASeq_BinaryClassification_Transformer,
             nlayers=4, hidden_dim=128, dropout=0.1
         )
 
@@ -54,9 +62,7 @@ def __init__(self, min_charge:int, max_charge:int):
         self.target_column_to_train = "charge_indicators"
         self.min_charge = min_charge
         self.max_charge = max_charge
-        self.charge_range = np.arange(
-            min_charge, max_charge+1, dtype=np.int8
-        )
+        self.charge_range = np.arange(min_charge, max_charge+1, dtype=np.int8)
         
     def predict_charges_for_pep_df(self, 
         pep_df:pd.DataFrame, 

From c0b9b0e38c3f89fb15701b796aee6af63e070acb Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Mon, 29 Jan 2024 15:56:17 +0100
Subject: [PATCH 03/18] test predict_charges_for_pep_df

---
 nbdev_nbs/model/charge.ipynb | 111 +++++++++++++++++++++++------------
 peptdeep/model/charge.py     |   4 +-
 2 files changed, 74 insertions(+), 41 deletions(-)

diff --git a/nbdev_nbs/model/charge.ipynb b/nbdev_nbs/model/charge.ipynb
index daadf88e..72b7a0cc 100644
--- a/nbdev_nbs/model/charge.ipynb
+++ b/nbdev_nbs/model/charge.ipynb
@@ -40,6 +40,7 @@
        "      <th>charge_indicators</th>\n",
        "      <th>nAA</th>\n",
        "      <th>charge_probs</th>\n",
+       "      <th>charge</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -48,39 +49,52 @@
        "      <td>ABCDE</td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>[0.7503374, 0.18496446]</td>\n",
+       "      <td>[0.7461448, 0.2694278]</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>FGHIJK</td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.29431552, 0.5896796]</td>\n",
+       "      <td>[0.32061976, 0.63410914]</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.32061976, 0.63410914]</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.7241462, 0.6150697]</td>\n",
+       "      <td>[0.6874503, 0.56040055]</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>RSTUVWXYZ</td>\n",
-       "      <td>[0, 0]</td>\n",
-       "      <td>9</td>\n",
-       "      <td>[0.30657992, 0.22709145]</td>\n",
+       "      <th>2</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.6874503, 0.56040055]</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "    sequence charge_indicators  nAA              charge_probs\n",
-       "0      ABCDE            [1, 0]    5   [0.7503374, 0.18496446]\n",
-       "1     FGHIJK            [0, 1]    6   [0.29431552, 0.5896796]\n",
-       "2     LMNOPQ            [1, 1]    6    [0.7241462, 0.6150697]\n",
-       "3  RSTUVWXYZ            [0, 0]    9  [0.30657992, 0.22709145]"
+       "  sequence charge_indicators  nAA              charge_probs  charge\n",
+       "0    ABCDE            [1, 0]    5    [0.7461448, 0.2694278]       1\n",
+       "1   FGHIJK            [0, 1]    6  [0.32061976, 0.63410914]       1\n",
+       "1   FGHIJK            [0, 1]    6  [0.32061976, 0.63410914]       2\n",
+       "2   LMNOPQ            [1, 1]    6   [0.6874503, 0.56040055]       1\n",
+       "2   LMNOPQ            [1, 1]    6   [0.6874503, 0.56040055]       2"
       ]
      },
      "execution_count": 2,
@@ -94,12 +108,13 @@
     "\n",
     "model = ChargeModelForAASeq(min_charge=1, max_charge=2)\n",
     "\n",
-    "df = pd.DataFrame({\n",
+    "seq_df = pd.DataFrame({\n",
     "    'sequence': ['ABCDE','FGHIJK','LMNOPQ','RSTUVWXYZ'],\n",
     "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0]],\n",
     "})\n",
-    "model.train(df)\n",
-    "model.predict(df)"
+    "model.train(seq_df)\n",
+    "model.predict(seq_df)\n",
+    "model.predict_charges_for_pep_df(seq_df, drop_probs_column=False)"
    ]
   },
   {
@@ -134,6 +149,7 @@
        "      <th>charge_indicators</th>\n",
        "      <th>nAA</th>\n",
        "      <th>charge_probs</th>\n",
+       "      <th>charge</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -144,7 +160,18 @@
        "      <td></td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>[0.80226785, 0.19673407]</td>\n",
+       "      <td>[0.7292267, 0.24495421]</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.30077943, 0.5916298]</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -153,7 +180,8 @@
        "      <td></td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.3100456, 0.6222909]</td>\n",
+       "      <td>[0.30077943, 0.5916298]</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -162,33 +190,37 @@
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.7553099, 0.66014636]</td>\n",
+       "      <td>[0.7352803, 0.60003597]</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>RSTUVWXYZ</td>\n",
-       "      <td>Phospho@T</td>\n",
-       "      <td>3</td>\n",
-       "      <td>[0, 0]</td>\n",
-       "      <td>9</td>\n",
-       "      <td>[0.28392678, 0.25133142]</td>\n",
+       "      <th>2</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[0.7352803, 0.60003597]</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "    sequence         mods mod_sites charge_indicators  nAA  \\\n",
-       "0      ABCDE                                   [1, 0]    5   \n",
-       "1     FGHIJK                                   [0, 1]    6   \n",
-       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6   \n",
-       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9   \n",
+       "  sequence         mods mod_sites charge_indicators  nAA  \\\n",
+       "0    ABCDE                                   [1, 0]    5   \n",
+       "1   FGHIJK                                   [0, 1]    6   \n",
+       "1   FGHIJK                                   [0, 1]    6   \n",
+       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6   \n",
+       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6   \n",
        "\n",
-       "               charge_probs  \n",
-       "0  [0.80226785, 0.19673407]  \n",
-       "1    [0.3100456, 0.6222909]  \n",
-       "2   [0.7553099, 0.66014636]  \n",
-       "3  [0.28392678, 0.25133142]  "
+       "              charge_probs  charge  \n",
+       "0  [0.7292267, 0.24495421]       1  \n",
+       "1  [0.30077943, 0.5916298]       1  \n",
+       "1  [0.30077943, 0.5916298]       2  \n",
+       "2  [0.7352803, 0.60003597]       1  \n",
+       "2  [0.7352803, 0.60003597]       2  "
       ]
      },
      "execution_count": 3,
@@ -202,14 +234,15 @@
     "\n",
     "model = ChargeModelForModAASeq(min_charge=1, max_charge=2)\n",
     "\n",
-    "df = pd.DataFrame({\n",
+    "modseq_df = pd.DataFrame({\n",
     "    'sequence': ['ABCDE','FGHIJK','LMNOPQ','RSTUVWXYZ'],\n",
     "    'mods': ['', '', 'Oxidation@M', 'Phospho@T'],\n",
     "    'mod_sites': ['', '', '2', '3'],\n",
     "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0]],\n",
     "})\n",
-    "model.train(df)\n",
-    "model.predict(df)"
+    "model.train(modseq_df)\n",
+    "model.predict(modseq_df)\n",
+    "model.predict_charges_for_pep_df(modseq_df, drop_probs_column=False)"
    ]
   },
   {
diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 5c0fc631..5c0bea65 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -36,7 +36,7 @@ def predict_charges_for_pep_df(self,
         drop_probs_column=True
     ):
         df = self.predict(pep_df)
-        df["charge"] = self.charge_probs.apply(
+        df["charge"] = df.charge_probs.apply(
             lambda x: self.charge_range[x>charge_prob]
         )
         df = df.explode("charge").dropna(subset=["charge"])
@@ -70,7 +70,7 @@ def predict_charges_for_pep_df(self,
         drop_probs_column=True
     ):
         df = self.predict(pep_df)
-        df["charge"] = self.charge_probs.apply(
+        df["charge"] = df.charge_probs.apply(
             lambda x: self.charge_range[x>charge_prob]
         )
         df = df.explode("charge").dropna(subset=["charge"])

From d912e570eeacdb45fa448e746044318ff8562205 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Mon, 29 Jan 2024 16:02:12 +0100
Subject: [PATCH 04/18] nbdev_clean

---
 nbdev_nbs/model/charge.ipynb | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/nbdev_nbs/model/charge.ipynb b/nbdev_nbs/model/charge.ipynb
index 72b7a0cc..302041c7 100644
--- a/nbdev_nbs/model/charge.ipynb
+++ b/nbdev_nbs/model/charge.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -97,7 +97,7 @@
        "2   LMNOPQ            [1, 1]    6   [0.6874503, 0.56040055]       2"
       ]
      },
-     "execution_count": 2,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -119,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -223,7 +223,7 @@
        "2  [0.7352803, 0.60003597]       2  "
       ]
      },
-     "execution_count": 3,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -258,18 +258,6 @@
    "display_name": "base",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,

From 58e7e66f05698a41902897b119cdcbb0e50b554b Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Mon, 29 Jan 2024 22:17:25 +0100
Subject: [PATCH 05/18] add_charge for speclib prediction

---
 nbdev_nbs/model/charge.ipynb                 |  256 +++-
 nbdev_nbs/protein/fasta.ipynb                | 1321 +++++++++---------
 nbs_tests/test_fasta_lib_precursor_lib.ipynb |  109 +-
 peptdeep/constants/default_settings.yaml     |    7 +
 peptdeep/model/charge.py                     |   91 +-
 peptdeep/pretrained_models.py                |   16 +-
 peptdeep/protein/fasta.py                    |   16 +
 7 files changed, 1007 insertions(+), 809 deletions(-)

diff --git a/nbdev_nbs/model/charge.ipynb b/nbdev_nbs/model/charge.ipynb
index 302041c7..7755d736 100644
--- a/nbdev_nbs/model/charge.ipynb
+++ b/nbdev_nbs/model/charge.ipynb
@@ -15,6 +15,13 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -39,7 +46,7 @@
        "      <th>sequence</th>\n",
        "      <th>charge_indicators</th>\n",
        "      <th>nAA</th>\n",
-       "      <th>charge_probs</th>\n",
+       "      <th>charge_prob</th>\n",
        "      <th>charge</th>\n",
        "    </tr>\n",
        "  </thead>\n",
@@ -49,7 +56,7 @@
        "      <td>ABCDE</td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>[0.7461448, 0.2694278]</td>\n",
+       "      <td>0.739165</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -57,7 +64,7 @@
        "      <td>FGHIJK</td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.32061976, 0.63410914]</td>\n",
+       "      <td>0.439334</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -65,7 +72,7 @@
        "      <td>FGHIJK</td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.32061976, 0.63410914]</td>\n",
+       "      <td>0.627932</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -73,7 +80,7 @@
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.6874503, 0.56040055]</td>\n",
+       "      <td>0.628110</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -81,7 +88,7 @@
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.6874503, 0.56040055]</td>\n",
+       "      <td>0.587332</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -89,12 +96,12 @@
        "</div>"
       ],
       "text/plain": [
-       "  sequence charge_indicators  nAA              charge_probs  charge\n",
-       "0    ABCDE            [1, 0]    5    [0.7461448, 0.2694278]       1\n",
-       "1   FGHIJK            [0, 1]    6  [0.32061976, 0.63410914]       1\n",
-       "1   FGHIJK            [0, 1]    6  [0.32061976, 0.63410914]       2\n",
-       "2   LMNOPQ            [1, 1]    6   [0.6874503, 0.56040055]       1\n",
-       "2   LMNOPQ            [1, 1]    6   [0.6874503, 0.56040055]       2"
+       "  sequence charge_indicators  nAA  charge_prob  charge\n",
+       "0    ABCDE            [1, 0]    5     0.739165       1\n",
+       "1   FGHIJK            [0, 1]    6     0.439334       1\n",
+       "1   FGHIJK            [0, 1]    6     0.627932       2\n",
+       "2   LMNOPQ            [1, 1]    6     0.628110       1\n",
+       "2   LMNOPQ            [1, 1]    6     0.587332       2"
       ]
      },
      "execution_count": null,
@@ -114,7 +121,7 @@
     "})\n",
     "model.train(seq_df)\n",
     "model.predict(seq_df)\n",
-    "model.predict_charges_for_pep_df(seq_df, drop_probs_column=False)"
+    "model.predict_and_clip_charges(seq_df, charge_prob_cutoff=0.3)"
    ]
   },
   {
@@ -144,11 +151,9 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>sequence</th>\n",
-       "      <th>mods</th>\n",
-       "      <th>mod_sites</th>\n",
        "      <th>charge_indicators</th>\n",
        "      <th>nAA</th>\n",
-       "      <th>charge_probs</th>\n",
+       "      <th>charge_prob</th>\n",
        "      <th>charge</th>\n",
        "    </tr>\n",
        "  </thead>\n",
@@ -156,21 +161,100 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>ABCDE</td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>[0.7292267, 0.24495421]</td>\n",
-       "      <td>1</td>\n",
+       "      <td>0.249596</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>FGHIJK</td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.30077943, 0.5916298]</td>\n",
+       "      <td>0.627932</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.587332</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>RSTUVWXYZ</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.260932</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    sequence charge_indicators  nAA  charge_prob  charge\n",
+       "0      ABCDE            [1, 0]    5     0.249596       2\n",
+       "1     FGHIJK            [0, 1]    6     0.627932       2\n",
+       "2     LMNOPQ            [1, 1]    6     0.587332       2\n",
+       "3  RSTUVWXYZ            [0, 0]    9     0.260932       2"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict_charges_as_prob(seq_df, 2, 4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>mods</th>\n",
+       "      <th>mod_sites</th>\n",
+       "      <th>charge_indicators</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>charge_prob</th>\n",
+       "      <th>charge</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABCDE</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[1, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.814867</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -180,7 +264,7 @@
        "      <td></td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.30077943, 0.5916298]</td>\n",
+       "      <td>0.708186</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -190,7 +274,7 @@
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.7352803, 0.60003597]</td>\n",
+       "      <td>0.712738</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -200,7 +284,7 @@
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>[0.7352803, 0.60003597]</td>\n",
+       "      <td>0.534221</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -208,19 +292,11 @@
        "</div>"
       ],
       "text/plain": [
-       "  sequence         mods mod_sites charge_indicators  nAA  \\\n",
-       "0    ABCDE                                   [1, 0]    5   \n",
-       "1   FGHIJK                                   [0, 1]    6   \n",
-       "1   FGHIJK                                   [0, 1]    6   \n",
-       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6   \n",
-       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6   \n",
-       "\n",
-       "              charge_probs  charge  \n",
-       "0  [0.7292267, 0.24495421]       1  \n",
-       "1  [0.30077943, 0.5916298]       1  \n",
-       "1  [0.30077943, 0.5916298]       2  \n",
-       "2  [0.7352803, 0.60003597]       1  \n",
-       "2  [0.7352803, 0.60003597]       2  "
+       "  sequence         mods mod_sites charge_indicators  nAA  charge_prob  charge\n",
+       "0    ABCDE                                   [1, 0]    5     0.814867       1\n",
+       "1   FGHIJK                                   [0, 1]    6     0.708186       2\n",
+       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.712738       1\n",
+       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.534221       2"
       ]
      },
      "execution_count": null,
@@ -242,15 +318,111 @@
     "})\n",
     "model.train(modseq_df)\n",
     "model.predict(modseq_df)\n",
-    "model.predict_charges_for_pep_df(modseq_df, drop_probs_column=False)"
+    "model.predict_and_clip_charges(modseq_df, charge_prob_cutoff=0.3)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>mods</th>\n",
+       "      <th>mod_sites</th>\n",
+       "      <th>charge_indicators</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>charge_prob</th>\n",
+       "      <th>charge</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABCDE</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[1, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.224946</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.708186</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.534221</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>RSTUVWXYZ</td>\n",
+       "      <td>Phospho@T</td>\n",
+       "      <td>3</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.278221</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    sequence         mods mod_sites charge_indicators  nAA  charge_prob  \\\n",
+       "0      ABCDE                                   [1, 0]    5     0.224946   \n",
+       "1     FGHIJK                                   [0, 1]    6     0.708186   \n",
+       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6     0.534221   \n",
+       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9     0.278221   \n",
+       "\n",
+       "   charge  \n",
+       "0       2  \n",
+       "1       2  \n",
+       "2       2  \n",
+       "3       2  "
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict_charges_as_prob(modseq_df, 2, 4)"
+   ]
   }
  ],
  "metadata": {
diff --git a/nbdev_nbs/protein/fasta.ipynb b/nbdev_nbs/protein/fasta.ipynb
index fb7436e2..3332879e 100644
--- a/nbdev_nbs/protein/fasta.ipynb
+++ b/nbdev_nbs/protein/fasta.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -18,16 +18,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+     ]
+    }
+   ],
    "source": [
     "from peptdeep.protein.fasta import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -166,8 +174,8 @@
        "</div>"
       ],
       "text/plain": [
-       "               sequence protein_idxes  miss_cleavage  is_prot_nterm   \n",
-       "0               MABCDEK             0              0           True  \\\n",
+       "               sequence protein_idxes  miss_cleavage  is_prot_nterm  \\\n",
+       "0               MABCDEK             0              0           True   \n",
        "1               LMNOPQR           0;1              0          False   \n",
        "2             LMNOPQRST             0              1          False   \n",
        "3          ABCDEKFGHIJK             0              1           True   \n",
@@ -189,7 +197,7 @@
        "8          False                  20  "
       ]
      },
-     "execution_count": null,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -216,7 +224,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -268,7 +276,7 @@
        "1         yy      gene           FGHIJKLMNOPQR"
       ]
      },
-     "execution_count": null,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -279,7 +287,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -438,8 +446,8 @@
        "</div>"
       ],
       "text/plain": [
-       "               sequence protein_idxes  miss_cleavage  is_prot_nterm   \n",
-       "0               MABCDEK             0              0           True  \\\n",
+       "               sequence protein_idxes  miss_cleavage  is_prot_nterm  \\\n",
+       "0               MABCDEK             0              0           True   \n",
        "1               LMNOPQR           0;1              0          False   \n",
        "2             LMNOPQRST             0              1          False   \n",
        "3          ABCDEKFGHIJK             0              1           True   \n",
@@ -461,7 +469,7 @@
        "8          False                  20       xx        "
       ]
      },
-     "execution_count": null,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -474,7 +482,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -506,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -550,8 +558,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -576,8 +584,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -589,8 +597,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>4;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -667,8 +675,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>3;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;3</td>\n",
        "      <td>12</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -680,8 +688,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -706,8 +714,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -719,8 +727,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>4;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -810,8 +818,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>3;14</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>14;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -836,8 +844,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>3;0;14</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;14;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -849,8 +857,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>3;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -862,8 +870,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -875,8 +883,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;15</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -888,8 +896,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M;Oxidation@M</td>\n",
-       "      <td>4;1;15</td>\n",
+       "      <td>Oxidation@M;Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -914,8 +922,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -927,8 +935,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;15</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -940,8 +948,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1;15</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...</td>\n",
+       "      <td>0;1;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -953,8 +961,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>4;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -964,8 +972,8 @@
        "</div>"
       ],
       "text/plain": [
-       "                sequence protein_idxes  miss_cleavage  is_prot_nterm   \n",
-       "0                MABCDEK             0              0           True  \\\n",
+       "                sequence protein_idxes  miss_cleavage  is_prot_nterm  \\\n",
+       "0                MABCDEK             0              0           True   \n",
        "1                MABCDEK             0              0           True   \n",
        "2                MABCDEK             0              0           True   \n",
        "3                MABCDEK             0              0           True   \n",
@@ -998,76 +1006,76 @@
        "30  MABCDEKFGHIJKLMNOPQR             0              2           True   \n",
        "31  MABCDEKFGHIJKLMNOPQR             0              2           True   \n",
        "\n",
-       "    is_prot_cterm                                               mods   \n",
-       "0           False                      Carbamidomethyl@C;Oxidation@M  \\\n",
+       "    is_prot_cterm                                               mods  \\\n",
+       "0           False                      Oxidation@M;Carbamidomethyl@C   \n",
        "1           False                                  Carbamidomethyl@C   \n",
-       "2           False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "3           False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
+       "2           False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "3           False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
        "4            True                                        Oxidation@M   \n",
        "5            True                                                      \n",
        "6            True                                        Oxidation@M   \n",
        "7            True                                                      \n",
        "8           False                                  Carbamidomethyl@C   \n",
-       "9           False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
-       "10          False                      Carbamidomethyl@C;Oxidation@M   \n",
+       "9           False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "10          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "11          False                                  Carbamidomethyl@C   \n",
-       "12          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "13          False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
+       "12          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "13          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
        "14           True                                        Oxidation@M   \n",
        "15           True                                                      \n",
        "16           True                  Acetyl@Protein N-term;Oxidation@M   \n",
        "17           True                              Acetyl@Protein N-term   \n",
        "18           True                                        Oxidation@M   \n",
        "19           True                                                      \n",
-       "20          False                      Carbamidomethyl@C;Oxidation@M   \n",
+       "20          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "21          False                                  Carbamidomethyl@C   \n",
-       "22          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "23          False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
-       "24          False                      Carbamidomethyl@C;Oxidation@M   \n",
-       "25          False                      Carbamidomethyl@C;Oxidation@M   \n",
-       "26          False          Carbamidomethyl@C;Oxidation@M;Oxidation@M   \n",
+       "22          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "23          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "24          False                      Oxidation@M;Carbamidomethyl@C   \n",
+       "25          False                      Oxidation@M;Carbamidomethyl@C   \n",
+       "26          False          Oxidation@M;Oxidation@M;Carbamidomethyl@C   \n",
        "27          False                                  Carbamidomethyl@C   \n",
-       "28          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "29          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "30          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "31          False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
+       "28          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "29          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "30          False  Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...   \n",
+       "31          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
        "\n",
        "   mod_sites  nAA proteins genes  \n",
-       "0        4;1    7       xx        \n",
+       "0        1;4    7       xx        \n",
        "1          4    7       xx        \n",
-       "2      4;0;1    7       xx        \n",
-       "3        4;0    7       xx        \n",
+       "2      0;1;4    7       xx        \n",
+       "3        0;4    7       xx        \n",
        "4          2    7    xx;yy  gene  \n",
        "5               7    xx;yy  gene  \n",
        "6          2    9       xx        \n",
        "7               9       xx        \n",
        "8          3   12       xx        \n",
-       "9        3;0   12       xx        \n",
-       "10       4;1   13       xx        \n",
+       "9        0;3   12       xx        \n",
+       "10       1;4   13       xx        \n",
        "11         4   13       xx        \n",
-       "12     4;0;1   13       xx        \n",
-       "13       4;0   13       xx        \n",
+       "12     0;1;4   13       xx        \n",
+       "13       0;4   13       xx        \n",
        "14         8   13    xx;yy  gene  \n",
        "15             13    xx;yy  gene  \n",
        "16       0;8   13    xx;yy  gene  \n",
        "17         0   13    xx;yy  gene  \n",
        "18         8   15       xx        \n",
        "19             15       xx        \n",
-       "20      3;14   19       xx        \n",
+       "20      14;3   19       xx        \n",
        "21         3   19       xx        \n",
-       "22    3;0;14   19       xx        \n",
-       "23       3;0   19       xx        \n",
-       "24       4;1   20       xx        \n",
-       "25      4;15   20       xx        \n",
-       "26    4;1;15   20       xx        \n",
+       "22    0;14;3   19       xx        \n",
+       "23       0;3   19       xx        \n",
+       "24       1;4   20       xx        \n",
+       "25      15;4   20       xx        \n",
+       "26    1;15;4   20       xx        \n",
        "27         4   20       xx        \n",
-       "28     4;0;1   20       xx        \n",
-       "29    4;0;15   20       xx        \n",
-       "30  4;0;1;15   20       xx        \n",
-       "31       4;0   20       xx        "
+       "28     0;1;4   20       xx        \n",
+       "29    0;15;4   20       xx        \n",
+       "30  0;1;15;4   20       xx        \n",
+       "31       0;4   20       xx        "
       ]
      },
-     "execution_count": null,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1079,7 +1087,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1125,7 +1133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -1169,8 +1177,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1195,8 +1203,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1208,8 +1216,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>4;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1338,8 +1346,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>3;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;3</td>\n",
        "      <td>12</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1351,8 +1359,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1377,8 +1385,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1390,8 +1398,8 @@
        "      <td>1</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>4;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;4</td>\n",
        "      <td>13</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1533,8 +1541,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>3;14</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>14;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1559,8 +1567,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>3;0;14</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;14;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1572,8 +1580,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>3;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;3</td>\n",
        "      <td>19</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1585,8 +1593,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1598,8 +1606,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;15</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1611,8 +1619,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M;Oxidation@M</td>\n",
-       "      <td>4;1;15</td>\n",
+       "      <td>Oxidation@M;Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1637,8 +1645,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1650,8 +1658,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;15</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1663,8 +1671,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1;15</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...</td>\n",
+       "      <td>0;1;15;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1676,8 +1684,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>4;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;4</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1687,8 +1695,8 @@
        "</div>"
       ],
       "text/plain": [
-       "                sequence protein_idxes  miss_cleavage  is_prot_nterm   \n",
-       "0                MABCDEK             0              0           True  \\\n",
+       "                sequence protein_idxes  miss_cleavage  is_prot_nterm  \\\n",
+       "0                MABCDEK             0              0           True   \n",
        "1                MABCDEK             0              0           True   \n",
        "2                MABCDEK             0              0           True   \n",
        "3                MABCDEK             0              0           True   \n",
@@ -1729,11 +1737,11 @@
        "38  MABCDEKFGHIJKLMNOPQR             0              2           True   \n",
        "39  MABCDEKFGHIJKLMNOPQR             0              2           True   \n",
        "\n",
-       "    is_prot_cterm                                               mods   \n",
-       "0           False                      Carbamidomethyl@C;Oxidation@M  \\\n",
+       "    is_prot_cterm                                               mods  \\\n",
+       "0           False                      Oxidation@M;Carbamidomethyl@C   \n",
        "1           False                                  Carbamidomethyl@C   \n",
-       "2           False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "3           False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
+       "2           False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "3           False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
        "4            True                                        Oxidation@M   \n",
        "5            True                                                      \n",
        "6            True                              Oxidation@M;Phospho@S   \n",
@@ -1743,11 +1751,11 @@
        "10           True                                          Phospho@T   \n",
        "11           True                                                      \n",
        "12          False                                  Carbamidomethyl@C   \n",
-       "13          False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
-       "14          False                      Carbamidomethyl@C;Oxidation@M   \n",
+       "13          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "14          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "15          False                                  Carbamidomethyl@C   \n",
-       "16          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "17          False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
+       "16          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "17          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
        "18           True                                        Oxidation@M   \n",
        "19           True                                                      \n",
        "20           True                  Acetyl@Protein N-term;Oxidation@M   \n",
@@ -1758,24 +1766,24 @@
        "25           True                                          Phospho@S   \n",
        "26           True                                          Phospho@T   \n",
        "27           True                                                      \n",
-       "28          False                      Carbamidomethyl@C;Oxidation@M   \n",
+       "28          False                      Oxidation@M;Carbamidomethyl@C   \n",
        "29          False                                  Carbamidomethyl@C   \n",
-       "30          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "31          False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
-       "32          False                      Carbamidomethyl@C;Oxidation@M   \n",
-       "33          False                      Carbamidomethyl@C;Oxidation@M   \n",
-       "34          False          Carbamidomethyl@C;Oxidation@M;Oxidation@M   \n",
+       "30          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "31          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
+       "32          False                      Oxidation@M;Carbamidomethyl@C   \n",
+       "33          False                      Oxidation@M;Carbamidomethyl@C   \n",
+       "34          False          Oxidation@M;Oxidation@M;Carbamidomethyl@C   \n",
        "35          False                                  Carbamidomethyl@C   \n",
-       "36          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "37          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "38          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "39          False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
+       "36          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "37          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "38          False  Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...   \n",
+       "39          False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
        "\n",
        "   mod_sites  nAA proteins genes  \n",
-       "0        4;1    7       xx        \n",
+       "0        1;4    7       xx        \n",
        "1          4    7       xx        \n",
-       "2      4;0;1    7       xx        \n",
-       "3        4;0    7       xx        \n",
+       "2      0;1;4    7       xx        \n",
+       "3        0;4    7       xx        \n",
        "4          2    7    xx;yy  gene  \n",
        "5               7    xx;yy  gene  \n",
        "6        2;8    9       xx        \n",
@@ -1785,11 +1793,11 @@
        "10         9    9       xx        \n",
        "11              9       xx        \n",
        "12         3   12       xx        \n",
-       "13       3;0   12       xx        \n",
-       "14       4;1   13       xx        \n",
+       "13       0;3   12       xx        \n",
+       "14       1;4   13       xx        \n",
        "15         4   13       xx        \n",
-       "16     4;0;1   13       xx        \n",
-       "17       4;0   13       xx        \n",
+       "16     0;1;4   13       xx        \n",
+       "17       0;4   13       xx        \n",
        "18         8   13    xx;yy  gene  \n",
        "19             13    xx;yy  gene  \n",
        "20       0;8   13    xx;yy  gene  \n",
@@ -1800,21 +1808,21 @@
        "25        14   15       xx        \n",
        "26        15   15       xx        \n",
        "27             15       xx        \n",
-       "28      3;14   19       xx        \n",
+       "28      14;3   19       xx        \n",
        "29         3   19       xx        \n",
-       "30    3;0;14   19       xx        \n",
-       "31       3;0   19       xx        \n",
-       "32       4;1   20       xx        \n",
-       "33      4;15   20       xx        \n",
-       "34    4;1;15   20       xx        \n",
+       "30    0;14;3   19       xx        \n",
+       "31       0;3   19       xx        \n",
+       "32       1;4   20       xx        \n",
+       "33      15;4   20       xx        \n",
+       "34    1;15;4   20       xx        \n",
        "35         4   20       xx        \n",
-       "36     4;0;1   20       xx        \n",
-       "37    4;0;15   20       xx        \n",
-       "38  4;0;1;15   20       xx        \n",
-       "39       4;0   20       xx        "
+       "36     0;1;4   20       xx        \n",
+       "37    0;15;4   20       xx        \n",
+       "38  0;1;15;4   20       xx        \n",
+       "39       0;4   20       xx        "
       ]
      },
-     "execution_count": null,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1828,7 +1836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -1873,8 +1881,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>4;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1901,8 +1909,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1915,8 +1923,8 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>4;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;4</td>\n",
        "      <td>7</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1971,8 +1979,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1;7;13</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1985,8 +1993,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;15;7;13</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;15;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -1999,8 +2007,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>4;0;1;15;7;13</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...</td>\n",
+       "      <td>0;1;15;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -2013,8 +2021,8 @@
        "      <td>2</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Dimeth...</td>\n",
-       "      <td>4;0;7;13</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C;Dimeth...</td>\n",
+       "      <td>0;4;7;13</td>\n",
        "      <td>20</td>\n",
        "      <td>xx</td>\n",
        "      <td></td>\n",
@@ -2026,8 +2034,8 @@
        "</div>"
       ],
       "text/plain": [
-       "                 sequence protein_idxes  miss_cleavage  is_prot_nterm   \n",
-       "0                 MABCDEK             0              0           True  \\\n",
+       "                 sequence protein_idxes  miss_cleavage  is_prot_nterm  \\\n",
+       "0                 MABCDEK             0              0           True   \n",
        "1                 MABCDEK             0              0           True   \n",
        "2                 MABCDEK             0              0           True   \n",
        "3                 MABCDEK             0              0           True   \n",
@@ -2039,36 +2047,36 @@
        "118  MABCDEKFGHIJKLMNOPQR             0              2           True   \n",
        "119  MABCDEKFGHIJKLMNOPQR             0              2           True   \n",
        "\n",
-       "     is_prot_cterm                                               mods   \n",
-       "0            False                      Carbamidomethyl@C;Oxidation@M  \\\n",
+       "     is_prot_cterm                                               mods  \\\n",
+       "0            False                      Oxidation@M;Carbamidomethyl@C   \n",
        "1            False                                  Carbamidomethyl@C   \n",
-       "2            False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "3            False            Carbamidomethyl@C;Acetyl@Protein N-term   \n",
+       "2            False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "3            False            Acetyl@Protein N-term;Carbamidomethyl@C   \n",
        "4             True                                        Oxidation@M   \n",
        "..             ...                                                ...   \n",
        "115          False  Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any N-t...   \n",
-       "116          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "117          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "118          False  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...   \n",
-       "119          False  Carbamidomethyl@C;Acetyl@Protein N-term;Dimeth...   \n",
+       "116          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "117          False  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...   \n",
+       "118          False  Acetyl@Protein N-term;Oxidation@M;Oxidation@M;...   \n",
+       "119          False  Acetyl@Protein N-term;Carbamidomethyl@C;Dimeth...   \n",
        "\n",
        "         mod_sites  nAA proteins genes labeling_channel  \n",
-       "0              4;1    7       xx                   none  \n",
+       "0              1;4    7       xx                   none  \n",
        "1                4    7       xx                   none  \n",
-       "2            4;0;1    7       xx                   none  \n",
-       "3              4;0    7       xx                   none  \n",
+       "2            0;1;4    7       xx                   none  \n",
+       "3              0;4    7       xx                   none  \n",
        "4                2    7    xx;yy  gene             none  \n",
        "..             ...  ...      ...   ...              ...  \n",
        "115       4;0;7;13   20       xx                  heavy  \n",
-       "116     4;0;1;7;13   20       xx                  heavy  \n",
-       "117    4;0;15;7;13   20       xx                  heavy  \n",
-       "118  4;0;1;15;7;13   20       xx                  heavy  \n",
-       "119       4;0;7;13   20       xx                  heavy  \n",
+       "116     0;1;4;7;13   20       xx                  heavy  \n",
+       "117    0;15;4;7;13   20       xx                  heavy  \n",
+       "118  0;1;15;4;7;13   20       xx                  heavy  \n",
+       "119       0;4;7;13   20       xx                  heavy  \n",
        "\n",
        "[120 rows x 11 columns]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2085,7 +2093,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2094,7 +2102,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -2129,16 +2137,16 @@
        "      <th>decoy</th>\n",
        "      <th>charge</th>\n",
        "      <th>...</th>\n",
-       "      <th>isotope_apex_mz</th>\n",
-       "      <th>isotope_right_most_mz</th>\n",
+       "      <th>i_5</th>\n",
+       "      <th>mono_isotope_idx</th>\n",
        "      <th>rt_pred</th>\n",
        "      <th>rt_norm_pred</th>\n",
        "      <th>ccs_pred</th>\n",
        "      <th>mobility_pred</th>\n",
-       "      <th>frag_stop_idx</th>\n",
        "      <th>nce</th>\n",
        "      <th>instrument</th>\n",
        "      <th>frag_start_idx</th>\n",
+       "      <th>frag_stop_idx</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -2155,16 +2163,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>481.739834</td>\n",
-       "      <td>482.241484</td>\n",
+       "      <td>0.001232</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.021263</td>\n",
        "      <td>0.021263</td>\n",
        "      <td>318.941895</td>\n",
        "      <td>0.785035</td>\n",
-       "      <td>7</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>0</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -2179,16 +2187,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>473.742377</td>\n",
-       "      <td>474.244027</td>\n",
+       "      <td>0.001173</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.092409</td>\n",
        "      <td>0.092409</td>\n",
        "      <td>317.660034</td>\n",
        "      <td>0.781693</td>\n",
-       "      <td>14</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>7</td>\n",
+       "      <td>14</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -2203,16 +2211,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>487.200207</td>\n",
-       "      <td>487.701857</td>\n",
-       "      <td>0.032798</td>\n",
-       "      <td>0.032798</td>\n",
-       "      <td>329.176941</td>\n",
+       "      <td>0.001409</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.032797</td>\n",
+       "      <td>0.032797</td>\n",
+       "      <td>329.177002</td>\n",
        "      <td>0.810355</td>\n",
-       "      <td>21</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>14</td>\n",
+       "      <td>21</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -2221,22 +2229,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>2;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;2</td>\n",
        "      <td>8</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>508.205490</td>\n",
-       "      <td>509.208790</td>\n",
+       "      <td>0.001604</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.109105</td>\n",
        "      <td>0.109105</td>\n",
-       "      <td>342.048767</td>\n",
+       "      <td>342.048706</td>\n",
        "      <td>0.842529</td>\n",
-       "      <td>28</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>21</td>\n",
+       "      <td>28</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -2251,16 +2259,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>481.739834</td>\n",
-       "      <td>482.241484</td>\n",
+       "      <td>0.001232</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.044289</td>\n",
        "      <td>0.044289</td>\n",
-       "      <td>321.865784</td>\n",
+       "      <td>321.865723</td>\n",
        "      <td>0.792231</td>\n",
-       "      <td>35</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>28</td>\n",
+       "      <td>35</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -2275,16 +2283,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>473.742377</td>\n",
-       "      <td>474.244027</td>\n",
+       "      <td>0.001173</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.158330</td>\n",
        "      <td>0.158330</td>\n",
        "      <td>323.465607</td>\n",
        "      <td>0.795979</td>\n",
-       "      <td>42</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>35</td>\n",
+       "      <td>42</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -2299,16 +2307,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>487.200207</td>\n",
-       "      <td>487.701857</td>\n",
-       "      <td>0.016275</td>\n",
-       "      <td>0.016275</td>\n",
+       "      <td>0.001409</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.016274</td>\n",
+       "      <td>0.016274</td>\n",
        "      <td>328.831970</td>\n",
        "      <td>0.809506</td>\n",
-       "      <td>49</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>42</td>\n",
+       "      <td>49</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -2317,22 +2325,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>6;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;6</td>\n",
        "      <td>8</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>508.205490</td>\n",
-       "      <td>509.208790</td>\n",
+       "      <td>0.001604</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.119288</td>\n",
        "      <td>0.119288</td>\n",
-       "      <td>339.180786</td>\n",
+       "      <td>339.180847</td>\n",
        "      <td>0.835465</td>\n",
-       "      <td>56</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>49</td>\n",
+       "      <td>56</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
@@ -2341,22 +2349,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>3;1</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>1;3</td>\n",
        "      <td>9</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>560.717907</td>\n",
-       "      <td>561.721207</td>\n",
+       "      <td>0.003490</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.048364</td>\n",
        "      <td>0.048364</td>\n",
-       "      <td>351.815094</td>\n",
+       "      <td>351.815063</td>\n",
        "      <td>0.867675</td>\n",
-       "      <td>64</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>56</td>\n",
+       "      <td>64</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
@@ -2371,16 +2379,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>552.720450</td>\n",
-       "      <td>553.723750</td>\n",
+       "      <td>0.003395</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.081848</td>\n",
        "      <td>0.081848</td>\n",
        "      <td>353.857971</td>\n",
        "      <td>0.872560</td>\n",
-       "      <td>72</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>64</td>\n",
+       "      <td>72</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
@@ -2389,22 +2397,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>3;0;1</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;1;3</td>\n",
        "      <td>9</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>581.723190</td>\n",
-       "      <td>582.726490</td>\n",
+       "      <td>0.003824</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.204708</td>\n",
        "      <td>0.204708</td>\n",
-       "      <td>362.488403</td>\n",
+       "      <td>362.488342</td>\n",
        "      <td>0.894392</td>\n",
-       "      <td>80</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>72</td>\n",
+       "      <td>80</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
@@ -2413,22 +2421,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>3;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;3</td>\n",
        "      <td>9</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>573.725732</td>\n",
-       "      <td>574.729032</td>\n",
+       "      <td>0.003724</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.279585</td>\n",
        "      <td>0.279585</td>\n",
        "      <td>361.742859</td>\n",
        "      <td>0.892406</td>\n",
-       "      <td>88</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>80</td>\n",
+       "      <td>88</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
@@ -2437,22 +2445,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Oxidation@M</td>\n",
-       "      <td>6;8</td>\n",
+       "      <td>Oxidation@M;Carbamidomethyl@C</td>\n",
+       "      <td>8;6</td>\n",
        "      <td>9</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>560.717907</td>\n",
-       "      <td>561.721207</td>\n",
+       "      <td>0.003490</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.015205</td>\n",
        "      <td>0.015205</td>\n",
-       "      <td>354.745117</td>\n",
+       "      <td>354.745087</td>\n",
        "      <td>0.874901</td>\n",
-       "      <td>96</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>88</td>\n",
+       "      <td>96</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>13</th>\n",
@@ -2467,16 +2475,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>552.720450</td>\n",
-       "      <td>553.723750</td>\n",
+       "      <td>0.003395</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.084342</td>\n",
        "      <td>0.084342</td>\n",
-       "      <td>355.178955</td>\n",
+       "      <td>355.178925</td>\n",
        "      <td>0.875817</td>\n",
-       "      <td>104</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>96</td>\n",
+       "      <td>104</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
@@ -2485,22 +2493,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...</td>\n",
-       "      <td>6;0;8</td>\n",
+       "      <td>Acetyl@Protein N-term;Oxidation@M;Carbamidomet...</td>\n",
+       "      <td>0;8;6</td>\n",
        "      <td>9</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>581.723190</td>\n",
-       "      <td>582.726490</td>\n",
+       "      <td>0.003824</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.134268</td>\n",
        "      <td>0.134268</td>\n",
        "      <td>363.174927</td>\n",
        "      <td>0.896086</td>\n",
-       "      <td>112</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>104</td>\n",
+       "      <td>112</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
@@ -2509,22 +2517,22 @@
        "      <td>0</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term</td>\n",
-       "      <td>6;0</td>\n",
+       "      <td>Acetyl@Protein N-term;Carbamidomethyl@C</td>\n",
+       "      <td>0;6</td>\n",
        "      <td>9</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>573.725732</td>\n",
-       "      <td>574.729032</td>\n",
+       "      <td>0.003724</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.263092</td>\n",
        "      <td>0.263092</td>\n",
-       "      <td>366.395264</td>\n",
+       "      <td>366.395203</td>\n",
        "      <td>0.903884</td>\n",
-       "      <td>120</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>112</td>\n",
+       "      <td>120</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>16</th>\n",
@@ -2539,16 +2547,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>678.863889</td>\n",
-       "      <td>679.867189</td>\n",
+       "      <td>0.003411</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.243750</td>\n",
        "      <td>0.243750</td>\n",
        "      <td>401.572327</td>\n",
        "      <td>0.992497</td>\n",
-       "      <td>130</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>120</td>\n",
+       "      <td>130</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>17</th>\n",
@@ -2563,16 +2571,16 @@
        "      <td>0</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>452.911685</td>\n",
-       "      <td>453.580551</td>\n",
+       "      <td>0.003411</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.243750</td>\n",
        "      <td>0.243750</td>\n",
        "      <td>470.914978</td>\n",
        "      <td>0.775925</td>\n",
-       "      <td>140</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>130</td>\n",
+       "      <td>140</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
@@ -2587,16 +2595,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>670.866431</td>\n",
-       "      <td>671.869731</td>\n",
+       "      <td>0.003305</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.299989</td>\n",
        "      <td>0.299989</td>\n",
-       "      <td>402.828186</td>\n",
+       "      <td>402.828217</td>\n",
        "      <td>0.995481</td>\n",
-       "      <td>150</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>140</td>\n",
+       "      <td>150</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
@@ -2611,16 +2619,16 @@
        "      <td>0</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>447.580046</td>\n",
-       "      <td>448.248913</td>\n",
+       "      <td>0.003305</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.299989</td>\n",
        "      <td>0.299989</td>\n",
-       "      <td>474.472504</td>\n",
+       "      <td>474.472473</td>\n",
        "      <td>0.781693</td>\n",
-       "      <td>160</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>150</td>\n",
+       "      <td>160</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
@@ -2635,16 +2643,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>699.869171</td>\n",
-       "      <td>700.872471</td>\n",
+       "      <td>0.003771</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.416815</td>\n",
        "      <td>0.416815</td>\n",
        "      <td>406.307281</td>\n",
        "      <td>1.004504</td>\n",
-       "      <td>170</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>160</td>\n",
+       "      <td>170</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
@@ -2659,16 +2667,16 @@
        "      <td>0</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>466.915206</td>\n",
-       "      <td>467.584073</td>\n",
+       "      <td>0.003771</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.416815</td>\n",
        "      <td>0.416815</td>\n",
-       "      <td>463.901062</td>\n",
+       "      <td>463.901123</td>\n",
        "      <td>0.764600</td>\n",
-       "      <td>180</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>170</td>\n",
+       "      <td>180</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
@@ -2683,16 +2691,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>691.871714</td>\n",
-       "      <td>692.875014</td>\n",
+       "      <td>0.003660</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.498515</td>\n",
        "      <td>0.498515</td>\n",
-       "      <td>407.171875</td>\n",
+       "      <td>407.171814</td>\n",
        "      <td>1.006527</td>\n",
-       "      <td>190</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>180</td>\n",
+       "      <td>190</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
@@ -2707,16 +2715,16 @@
        "      <td>0</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>461.583568</td>\n",
-       "      <td>462.252435</td>\n",
+       "      <td>0.003660</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.498515</td>\n",
        "      <td>0.498515</td>\n",
-       "      <td>468.311951</td>\n",
+       "      <td>468.311920</td>\n",
        "      <td>0.771782</td>\n",
-       "      <td>200</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>190</td>\n",
+       "      <td>200</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
@@ -2731,16 +2739,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>678.863889</td>\n",
-       "      <td>679.867189</td>\n",
+       "      <td>0.003411</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.339134</td>\n",
        "      <td>0.339134</td>\n",
-       "      <td>400.909943</td>\n",
-       "      <td>0.990860</td>\n",
-       "      <td>210</td>\n",
+       "      <td>400.909912</td>\n",
+       "      <td>0.990859</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>200</td>\n",
+       "      <td>210</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>25</th>\n",
@@ -2755,16 +2763,16 @@
        "      <td>1</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>452.911685</td>\n",
-       "      <td>453.580551</td>\n",
+       "      <td>0.003411</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.339134</td>\n",
        "      <td>0.339134</td>\n",
        "      <td>478.989624</td>\n",
        "      <td>0.789230</td>\n",
-       "      <td>220</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>210</td>\n",
+       "      <td>220</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>26</th>\n",
@@ -2779,16 +2787,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>670.866431</td>\n",
-       "      <td>671.869731</td>\n",
+       "      <td>0.003305</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.352144</td>\n",
        "      <td>0.352144</td>\n",
-       "      <td>402.555054</td>\n",
+       "      <td>402.555023</td>\n",
        "      <td>0.994806</td>\n",
-       "      <td>230</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>220</td>\n",
+       "      <td>230</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>27</th>\n",
@@ -2803,16 +2811,16 @@
        "      <td>1</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>447.580046</td>\n",
-       "      <td>448.248913</td>\n",
+       "      <td>0.003305</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.352144</td>\n",
        "      <td>0.352144</td>\n",
        "      <td>482.206787</td>\n",
        "      <td>0.794435</td>\n",
-       "      <td>240</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>230</td>\n",
+       "      <td>240</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>28</th>\n",
@@ -2827,16 +2835,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>699.869171</td>\n",
-       "      <td>700.872471</td>\n",
+       "      <td>0.003771</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.406691</td>\n",
        "      <td>0.406691</td>\n",
-       "      <td>414.260376</td>\n",
+       "      <td>414.260437</td>\n",
        "      <td>1.024166</td>\n",
-       "      <td>250</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>240</td>\n",
+       "      <td>250</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>29</th>\n",
@@ -2851,16 +2859,16 @@
        "      <td>1</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>466.915206</td>\n",
-       "      <td>467.584073</td>\n",
+       "      <td>0.003771</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.406691</td>\n",
        "      <td>0.406691</td>\n",
-       "      <td>470.269623</td>\n",
+       "      <td>470.269653</td>\n",
        "      <td>0.775096</td>\n",
-       "      <td>260</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>250</td>\n",
+       "      <td>260</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>30</th>\n",
@@ -2875,16 +2883,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>691.871714</td>\n",
-       "      <td>692.875014</td>\n",
+       "      <td>0.003660</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.462864</td>\n",
        "      <td>0.462864</td>\n",
        "      <td>417.726074</td>\n",
        "      <td>1.032617</td>\n",
-       "      <td>270</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>260</td>\n",
+       "      <td>270</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>31</th>\n",
@@ -2899,16 +2907,16 @@
        "      <td>1</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>461.583568</td>\n",
-       "      <td>462.252435</td>\n",
+       "      <td>0.003660</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.462864</td>\n",
        "      <td>0.462864</td>\n",
-       "      <td>469.226746</td>\n",
+       "      <td>469.226685</td>\n",
        "      <td>0.773290</td>\n",
-       "      <td>280</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>270</td>\n",
+       "      <td>280</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>32</th>\n",
@@ -2923,16 +2931,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>772.903742</td>\n",
-       "      <td>773.907042</td>\n",
+       "      <td>0.004945</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.277093</td>\n",
        "      <td>0.277093</td>\n",
        "      <td>421.076538</td>\n",
        "      <td>1.041983</td>\n",
-       "      <td>292</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>280</td>\n",
+       "      <td>292</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>33</th>\n",
@@ -2947,16 +2955,16 @@
        "      <td>1</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>515.604920</td>\n",
-       "      <td>516.273787</td>\n",
+       "      <td>0.004945</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.277093</td>\n",
        "      <td>0.277093</td>\n",
-       "      <td>490.627563</td>\n",
+       "      <td>490.627533</td>\n",
        "      <td>0.809400</td>\n",
-       "      <td>304</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>292</td>\n",
+       "      <td>304</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>34</th>\n",
@@ -2971,16 +2979,16 @@
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>764.906285</td>\n",
-       "      <td>765.909585</td>\n",
+       "      <td>0.004819</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.336550</td>\n",
        "      <td>0.336550</td>\n",
-       "      <td>423.214264</td>\n",
+       "      <td>423.214233</td>\n",
        "      <td>1.047176</td>\n",
-       "      <td>316</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>304</td>\n",
+       "      <td>316</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>35</th>\n",
@@ -2995,16 +3003,16 @@
        "      <td>1</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>510.273282</td>\n",
-       "      <td>510.942149</td>\n",
+       "      <td>0.004819</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.336550</td>\n",
        "      <td>0.336550</td>\n",
        "      <td>487.170013</td>\n",
        "      <td>0.803621</td>\n",
-       "      <td>328</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>316</td>\n",
+       "      <td>328</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>36</th>\n",
@@ -3019,16 +3027,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>772.903742</td>\n",
-       "      <td>773.907042</td>\n",
+       "      <td>0.004945</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.218114</td>\n",
        "      <td>0.218114</td>\n",
        "      <td>415.696411</td>\n",
        "      <td>1.028670</td>\n",
-       "      <td>340</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>328</td>\n",
+       "      <td>340</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>37</th>\n",
@@ -3043,16 +3051,16 @@
        "      <td>0</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>515.604920</td>\n",
-       "      <td>516.273787</td>\n",
+       "      <td>0.004945</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.218114</td>\n",
        "      <td>0.218114</td>\n",
        "      <td>473.192200</td>\n",
        "      <td>0.780636</td>\n",
-       "      <td>352</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>340</td>\n",
+       "      <td>352</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>38</th>\n",
@@ -3067,16 +3075,16 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>764.906285</td>\n",
-       "      <td>765.909585</td>\n",
+       "      <td>0.004819</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.252718</td>\n",
        "      <td>0.252718</td>\n",
        "      <td>416.934204</td>\n",
        "      <td>1.031637</td>\n",
-       "      <td>364</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>352</td>\n",
+       "      <td>364</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>39</th>\n",
@@ -3091,25 +3099,25 @@
        "      <td>0</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>510.273282</td>\n",
-       "      <td>510.942149</td>\n",
+       "      <td>0.004819</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.252718</td>\n",
        "      <td>0.252718</td>\n",
-       "      <td>477.759796</td>\n",
+       "      <td>477.759888</td>\n",
        "      <td>0.788098</td>\n",
-       "      <td>376</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>364</td>\n",
+       "      <td>376</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>40 rows × 27 columns</p>\n",
+       "<p>40 rows × 26 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "         sequence protein_idxes  miss_cleavage  is_prot_nterm  is_prot_cterm   \n",
-       "0        LMNPQRST             0              1          False           True  \\\n",
+       "         sequence protein_idxes  miss_cleavage  is_prot_nterm  is_prot_cterm  \\\n",
+       "0        LMNPQRST             0              1          False           True   \n",
        "1        LMNPQRST             0              1          False           True   \n",
        "2        ACDESTYK             0              0           True          False   \n",
        "3        ACDESTYK             0              0           True          False   \n",
@@ -3150,23 +3158,23 @@
        "38  FGHIKLMNPQRST             0              2          False           True   \n",
        "39  FGHIKLMNPQRST             0              2          False           True   \n",
        "\n",
-       "                                                 mods mod_sites  nAA  decoy   \n",
-       "0                                         Oxidation@M         2    8      0  \\\n",
+       "                                                 mods mod_sites  nAA  decoy  \\\n",
+       "0                                         Oxidation@M         2    8      0   \n",
        "1                                                                  8      0   \n",
        "2                                   Carbamidomethyl@C         2    8      0   \n",
-       "3             Carbamidomethyl@C;Acetyl@Protein N-term       2;0    8      0   \n",
+       "3             Acetyl@Protein N-term;Carbamidomethyl@C       0;2    8      0   \n",
        "4                                         Oxidation@M         6    8      1   \n",
        "5                                                                  8      1   \n",
        "6                                   Carbamidomethyl@C         6    8      1   \n",
-       "7             Carbamidomethyl@C;Acetyl@Protein N-term       6;0    8      1   \n",
-       "8                       Carbamidomethyl@C;Oxidation@M       3;1    9      0   \n",
+       "7             Acetyl@Protein N-term;Carbamidomethyl@C       0;6    8      1   \n",
+       "8                       Oxidation@M;Carbamidomethyl@C       1;3    9      0   \n",
        "9                                   Carbamidomethyl@C         3    9      0   \n",
-       "10  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...     3;0;1    9      0   \n",
-       "11            Carbamidomethyl@C;Acetyl@Protein N-term       3;0    9      0   \n",
-       "12                      Carbamidomethyl@C;Oxidation@M       6;8    9      1   \n",
+       "10  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...     0;1;3    9      0   \n",
+       "11            Acetyl@Protein N-term;Carbamidomethyl@C       0;3    9      0   \n",
+       "12                      Oxidation@M;Carbamidomethyl@C       8;6    9      1   \n",
        "13                                  Carbamidomethyl@C         6    9      1   \n",
-       "14  Carbamidomethyl@C;Acetyl@Protein N-term;Oxidat...     6;0;8    9      1   \n",
-       "15            Carbamidomethyl@C;Acetyl@Protein N-term       6;0    9      1   \n",
+       "14  Acetyl@Protein N-term;Oxidation@M;Carbamidomet...     0;8;6    9      1   \n",
+       "15            Acetyl@Protein N-term;Carbamidomethyl@C       0;6    9      1   \n",
        "16                                        Oxidation@M         7   11      0   \n",
        "17                                        Oxidation@M         7   11      0   \n",
        "18                                                                11      0   \n",
@@ -3192,136 +3200,94 @@
        "38                                                                13      0   \n",
        "39                                                                13      0   \n",
        "\n",
-       "    charge  ...  isotope_apex_mz  isotope_right_most_mz   rt_pred   \n",
-       "0        2  ...       481.739834             482.241484  0.021263  \\\n",
-       "1        2  ...       473.742377             474.244027  0.092409   \n",
-       "2        2  ...       487.200207             487.701857  0.032798   \n",
-       "3        2  ...       508.205490             509.208790  0.109105   \n",
-       "4        2  ...       481.739834             482.241484  0.044289   \n",
-       "5        2  ...       473.742377             474.244027  0.158330   \n",
-       "6        2  ...       487.200207             487.701857  0.016275   \n",
-       "7        2  ...       508.205490             509.208790  0.119288   \n",
-       "8        2  ...       560.717907             561.721207  0.048364   \n",
-       "9        2  ...       552.720450             553.723750  0.081848   \n",
-       "10       2  ...       581.723190             582.726490  0.204708   \n",
-       "11       2  ...       573.725732             574.729032  0.279585   \n",
-       "12       2  ...       560.717907             561.721207  0.015205   \n",
-       "13       2  ...       552.720450             553.723750  0.084342   \n",
-       "14       2  ...       581.723190             582.726490  0.134268   \n",
-       "15       2  ...       573.725732             574.729032  0.263092   \n",
-       "16       2  ...       678.863889             679.867189  0.243750   \n",
-       "17       3  ...       452.911685             453.580551  0.243750   \n",
-       "18       2  ...       670.866431             671.869731  0.299989   \n",
-       "19       3  ...       447.580046             448.248913  0.299989   \n",
-       "20       2  ...       699.869171             700.872471  0.416815   \n",
-       "21       3  ...       466.915206             467.584073  0.416815   \n",
-       "22       2  ...       691.871714             692.875014  0.498515   \n",
-       "23       3  ...       461.583568             462.252435  0.498515   \n",
-       "24       2  ...       678.863889             679.867189  0.339134   \n",
-       "25       3  ...       452.911685             453.580551  0.339134   \n",
-       "26       2  ...       670.866431             671.869731  0.352144   \n",
-       "27       3  ...       447.580046             448.248913  0.352144   \n",
-       "28       2  ...       699.869171             700.872471  0.406691   \n",
-       "29       3  ...       466.915206             467.584073  0.406691   \n",
-       "30       2  ...       691.871714             692.875014  0.462864   \n",
-       "31       3  ...       461.583568             462.252435  0.462864   \n",
-       "32       2  ...       772.903742             773.907042  0.277093   \n",
-       "33       3  ...       515.604920             516.273787  0.277093   \n",
-       "34       2  ...       764.906285             765.909585  0.336550   \n",
-       "35       3  ...       510.273282             510.942149  0.336550   \n",
-       "36       2  ...       772.903742             773.907042  0.218114   \n",
-       "37       3  ...       515.604920             516.273787  0.218114   \n",
-       "38       2  ...       764.906285             765.909585  0.252718   \n",
-       "39       3  ...       510.273282             510.942149  0.252718   \n",
-       "\n",
-       "    rt_norm_pred    ccs_pred  mobility_pred  frag_stop_idx   nce  instrument   \n",
-       "0       0.021263  318.941895       0.785035              7  30.0       Lumos  \\\n",
-       "1       0.092409  317.660034       0.781693             14  30.0       Lumos   \n",
-       "2       0.032798  329.176941       0.810355             21  30.0       Lumos   \n",
-       "3       0.109105  342.048767       0.842529             28  30.0       Lumos   \n",
-       "4       0.044289  321.865784       0.792231             35  30.0       Lumos   \n",
-       "5       0.158330  323.465607       0.795979             42  30.0       Lumos   \n",
-       "6       0.016275  328.831970       0.809506             49  30.0       Lumos   \n",
-       "7       0.119288  339.180786       0.835465             56  30.0       Lumos   \n",
-       "8       0.048364  351.815094       0.867675             64  30.0       Lumos   \n",
-       "9       0.081848  353.857971       0.872560             72  30.0       Lumos   \n",
-       "10      0.204708  362.488403       0.894392             80  30.0       Lumos   \n",
-       "11      0.279585  361.742859       0.892406             88  30.0       Lumos   \n",
-       "12      0.015205  354.745117       0.874901             96  30.0       Lumos   \n",
-       "13      0.084342  355.178955       0.875817            104  30.0       Lumos   \n",
-       "14      0.134268  363.174927       0.896086            112  30.0       Lumos   \n",
-       "15      0.263092  366.395264       0.903884            120  30.0       Lumos   \n",
-       "16      0.243750  401.572327       0.992497            130  30.0       Lumos   \n",
-       "17      0.243750  470.914978       0.775925            140  30.0       Lumos   \n",
-       "18      0.299989  402.828186       0.995481            150  30.0       Lumos   \n",
-       "19      0.299989  474.472504       0.781693            160  30.0       Lumos   \n",
-       "20      0.416815  406.307281       1.004504            170  30.0       Lumos   \n",
-       "21      0.416815  463.901062       0.764600            180  30.0       Lumos   \n",
-       "22      0.498515  407.171875       1.006527            190  30.0       Lumos   \n",
-       "23      0.498515  468.311951       0.771782            200  30.0       Lumos   \n",
-       "24      0.339134  400.909943       0.990860            210  30.0       Lumos   \n",
-       "25      0.339134  478.989624       0.789230            220  30.0       Lumos   \n",
-       "26      0.352144  402.555054       0.994806            230  30.0       Lumos   \n",
-       "27      0.352144  482.206787       0.794435            240  30.0       Lumos   \n",
-       "28      0.406691  414.260376       1.024166            250  30.0       Lumos   \n",
-       "29      0.406691  470.269623       0.775096            260  30.0       Lumos   \n",
-       "30      0.462864  417.726074       1.032617            270  30.0       Lumos   \n",
-       "31      0.462864  469.226746       0.773290            280  30.0       Lumos   \n",
-       "32      0.277093  421.076538       1.041983            292  30.0       Lumos   \n",
-       "33      0.277093  490.627563       0.809400            304  30.0       Lumos   \n",
-       "34      0.336550  423.214264       1.047176            316  30.0       Lumos   \n",
-       "35      0.336550  487.170013       0.803621            328  30.0       Lumos   \n",
-       "36      0.218114  415.696411       1.028670            340  30.0       Lumos   \n",
-       "37      0.218114  473.192200       0.780636            352  30.0       Lumos   \n",
-       "38      0.252718  416.934204       1.031637            364  30.0       Lumos   \n",
-       "39      0.252718  477.759796       0.788098            376  30.0       Lumos   \n",
+       "    charge  ...       i_5  mono_isotope_idx   rt_pred  rt_norm_pred  \\\n",
+       "0        2  ...  0.001232                 0  0.021263      0.021263   \n",
+       "1        2  ...  0.001173                 0  0.092409      0.092409   \n",
+       "2        2  ...  0.001409                 0  0.032797      0.032797   \n",
+       "3        2  ...  0.001604                 0  0.109105      0.109105   \n",
+       "4        2  ...  0.001232                 0  0.044289      0.044289   \n",
+       "5        2  ...  0.001173                 0  0.158330      0.158330   \n",
+       "6        2  ...  0.001409                 0  0.016274      0.016274   \n",
+       "7        2  ...  0.001604                 0  0.119288      0.119288   \n",
+       "8        2  ...  0.003490                 0  0.048364      0.048364   \n",
+       "9        2  ...  0.003395                 0  0.081848      0.081848   \n",
+       "10       2  ...  0.003824                 0  0.204708      0.204708   \n",
+       "11       2  ...  0.003724                 0  0.279585      0.279585   \n",
+       "12       2  ...  0.003490                 0  0.015205      0.015205   \n",
+       "13       2  ...  0.003395                 0  0.084342      0.084342   \n",
+       "14       2  ...  0.003824                 0  0.134268      0.134268   \n",
+       "15       2  ...  0.003724                 0  0.263092      0.263092   \n",
+       "16       2  ...  0.003411                 0  0.243750      0.243750   \n",
+       "17       3  ...  0.003411                 0  0.243750      0.243750   \n",
+       "18       2  ...  0.003305                 0  0.299989      0.299989   \n",
+       "19       3  ...  0.003305                 0  0.299989      0.299989   \n",
+       "20       2  ...  0.003771                 0  0.416815      0.416815   \n",
+       "21       3  ...  0.003771                 0  0.416815      0.416815   \n",
+       "22       2  ...  0.003660                 0  0.498515      0.498515   \n",
+       "23       3  ...  0.003660                 0  0.498515      0.498515   \n",
+       "24       2  ...  0.003411                 0  0.339134      0.339134   \n",
+       "25       3  ...  0.003411                 0  0.339134      0.339134   \n",
+       "26       2  ...  0.003305                 0  0.352144      0.352144   \n",
+       "27       3  ...  0.003305                 0  0.352144      0.352144   \n",
+       "28       2  ...  0.003771                 0  0.406691      0.406691   \n",
+       "29       3  ...  0.003771                 0  0.406691      0.406691   \n",
+       "30       2  ...  0.003660                 0  0.462864      0.462864   \n",
+       "31       3  ...  0.003660                 0  0.462864      0.462864   \n",
+       "32       2  ...  0.004945                 0  0.277093      0.277093   \n",
+       "33       3  ...  0.004945                 0  0.277093      0.277093   \n",
+       "34       2  ...  0.004819                 0  0.336550      0.336550   \n",
+       "35       3  ...  0.004819                 0  0.336550      0.336550   \n",
+       "36       2  ...  0.004945                 0  0.218114      0.218114   \n",
+       "37       3  ...  0.004945                 0  0.218114      0.218114   \n",
+       "38       2  ...  0.004819                 0  0.252718      0.252718   \n",
+       "39       3  ...  0.004819                 0  0.252718      0.252718   \n",
        "\n",
-       "    frag_start_idx  \n",
-       "0                0  \n",
-       "1                7  \n",
-       "2               14  \n",
-       "3               21  \n",
-       "4               28  \n",
-       "5               35  \n",
-       "6               42  \n",
-       "7               49  \n",
-       "8               56  \n",
-       "9               64  \n",
-       "10              72  \n",
-       "11              80  \n",
-       "12              88  \n",
-       "13              96  \n",
-       "14             104  \n",
-       "15             112  \n",
-       "16             120  \n",
-       "17             130  \n",
-       "18             140  \n",
-       "19             150  \n",
-       "20             160  \n",
-       "21             170  \n",
-       "22             180  \n",
-       "23             190  \n",
-       "24             200  \n",
-       "25             210  \n",
-       "26             220  \n",
-       "27             230  \n",
-       "28             240  \n",
-       "29             250  \n",
-       "30             260  \n",
-       "31             270  \n",
-       "32             280  \n",
-       "33             292  \n",
-       "34             304  \n",
-       "35             316  \n",
-       "36             328  \n",
-       "37             340  \n",
-       "38             352  \n",
-       "39             364  \n",
+       "      ccs_pred  mobility_pred   nce  instrument  frag_start_idx  frag_stop_idx  \n",
+       "0   318.941895       0.785035  30.0       Lumos               0              7  \n",
+       "1   317.660034       0.781693  30.0       Lumos               7             14  \n",
+       "2   329.177002       0.810355  30.0       Lumos              14             21  \n",
+       "3   342.048706       0.842529  30.0       Lumos              21             28  \n",
+       "4   321.865723       0.792231  30.0       Lumos              28             35  \n",
+       "5   323.465607       0.795979  30.0       Lumos              35             42  \n",
+       "6   328.831970       0.809506  30.0       Lumos              42             49  \n",
+       "7   339.180847       0.835465  30.0       Lumos              49             56  \n",
+       "8   351.815063       0.867675  30.0       Lumos              56             64  \n",
+       "9   353.857971       0.872560  30.0       Lumos              64             72  \n",
+       "10  362.488342       0.894392  30.0       Lumos              72             80  \n",
+       "11  361.742859       0.892406  30.0       Lumos              80             88  \n",
+       "12  354.745087       0.874901  30.0       Lumos              88             96  \n",
+       "13  355.178925       0.875817  30.0       Lumos              96            104  \n",
+       "14  363.174927       0.896086  30.0       Lumos             104            112  \n",
+       "15  366.395203       0.903884  30.0       Lumos             112            120  \n",
+       "16  401.572327       0.992497  30.0       Lumos             120            130  \n",
+       "17  470.914978       0.775925  30.0       Lumos             130            140  \n",
+       "18  402.828217       0.995481  30.0       Lumos             140            150  \n",
+       "19  474.472473       0.781693  30.0       Lumos             150            160  \n",
+       "20  406.307281       1.004504  30.0       Lumos             160            170  \n",
+       "21  463.901123       0.764600  30.0       Lumos             170            180  \n",
+       "22  407.171814       1.006527  30.0       Lumos             180            190  \n",
+       "23  468.311920       0.771782  30.0       Lumos             190            200  \n",
+       "24  400.909912       0.990859  30.0       Lumos             200            210  \n",
+       "25  478.989624       0.789230  30.0       Lumos             210            220  \n",
+       "26  402.555023       0.994806  30.0       Lumos             220            230  \n",
+       "27  482.206787       0.794435  30.0       Lumos             230            240  \n",
+       "28  414.260437       1.024166  30.0       Lumos             240            250  \n",
+       "29  470.269653       0.775096  30.0       Lumos             250            260  \n",
+       "30  417.726074       1.032617  30.0       Lumos             260            270  \n",
+       "31  469.226685       0.773290  30.0       Lumos             270            280  \n",
+       "32  421.076538       1.041983  30.0       Lumos             280            292  \n",
+       "33  490.627533       0.809400  30.0       Lumos             292            304  \n",
+       "34  423.214233       1.047176  30.0       Lumos             304            316  \n",
+       "35  487.170013       0.803621  30.0       Lumos             316            328  \n",
+       "36  415.696411       1.028670  30.0       Lumos             328            340  \n",
+       "37  473.192200       0.780636  30.0       Lumos             340            352  \n",
+       "38  416.934204       1.031637  30.0       Lumos             352            364  \n",
+       "39  477.759888       0.788098  30.0       Lumos             364            376  \n",
        "\n",
-       "[40 rows x 27 columns]"
+       "[40 rows x 26 columns]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3352,15 +3318,15 @@
     "_lib.predict_all()\n",
     "assert (_lib.precursor_df.decoy==1).any()\n",
     "assert ('MACDESTY'[::-1]+'K') in _lib.precursor_df.sequence.values\n",
-    "assert 'isotope_apex_offset' in _lib.precursor_df.columns\n",
-    "assert 'isotope_apex_intensity' in _lib.precursor_df.columns\n",
+    "assert 'i_0' in _lib.precursor_df.columns\n",
+    "assert 'i_1' in _lib.precursor_df.columns\n",
     "assert ~_lib.precursor_df.sequence.str.contains('B').any()\n",
     "_lib.precursor_df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -3395,16 +3361,16 @@
        "      <th>decoy</th>\n",
        "      <th>charge</th>\n",
        "      <th>...</th>\n",
-       "      <th>isotope_apex_mz</th>\n",
-       "      <th>isotope_right_most_mz</th>\n",
+       "      <th>i_5</th>\n",
+       "      <th>mono_isotope_idx</th>\n",
        "      <th>rt_pred</th>\n",
        "      <th>rt_norm_pred</th>\n",
        "      <th>ccs_pred</th>\n",
        "      <th>mobility_pred</th>\n",
-       "      <th>frag_stop_idx</th>\n",
        "      <th>nce</th>\n",
        "      <th>instrument</th>\n",
        "      <th>frag_start_idx</th>\n",
+       "      <th>frag_stop_idx</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3421,64 +3387,64 @@
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>495.755484</td>\n",
-       "      <td>496.257134</td>\n",
+       "      <td>0.001352</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.242660</td>\n",
        "      <td>0.242660</td>\n",
-       "      <td>345.390869</td>\n",
-       "      <td>0.850475</td>\n",
-       "      <td>7</td>\n",
+       "      <td>345.390839</td>\n",
+       "      <td>0.850135</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>0</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>YTSEDCAK</td>\n",
-       "      <td>0</td>\n",
+       "      <td>LMNPQRST</td>\n",
        "      <td>0</td>\n",
-       "      <td>True</td>\n",
+       "      <td>1</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Acetyl@Protein N-term;Dimeth...</td>\n",
-       "      <td>6;0;8</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>0</td>\n",
        "      <td>8</td>\n",
-       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>526.243325</td>\n",
-       "      <td>526.744975</td>\n",
-       "      <td>0.106988</td>\n",
-       "      <td>0.106988</td>\n",
-       "      <td>347.019043</td>\n",
-       "      <td>0.855165</td>\n",
-       "      <td>14</td>\n",
+       "      <td>0.027430</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.063860</td>\n",
+       "      <td>0.063860</td>\n",
+       "      <td>313.133270</td>\n",
+       "      <td>0.770554</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>7</td>\n",
+       "      <td>14</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>YTSEDCAK</td>\n",
-       "      <td>0</td>\n",
+       "      <td>LMNPQRST</td>\n",
        "      <td>0</td>\n",
-       "      <td>True</td>\n",
+       "      <td>1</td>\n",
        "      <td>False</td>\n",
-       "      <td>Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any N-t...</td>\n",
-       "      <td>6;0;8</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>2;0</td>\n",
        "      <td>8</td>\n",
-       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>523.275878</td>\n",
-       "      <td>523.777528</td>\n",
-       "      <td>0.009153</td>\n",
-       "      <td>0.009153</td>\n",
-       "      <td>331.465332</td>\n",
-       "      <td>0.816775</td>\n",
-       "      <td>21</td>\n",
+       "      <td>0.027954</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.017637</td>\n",
+       "      <td>0.017637</td>\n",
+       "      <td>314.302277</td>\n",
+       "      <td>0.773615</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>14</td>\n",
+       "      <td>21</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -3487,22 +3453,22 @@
        "      <td>1</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
-       "      <td>0</td>\n",
+       "      <td>Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>6;0</td>\n",
        "      <td>8</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>491.780212</td>\n",
-       "      <td>492.281862</td>\n",
-       "      <td>0.152593</td>\n",
-       "      <td>0.152593</td>\n",
-       "      <td>320.333069</td>\n",
-       "      <td>0.788686</td>\n",
-       "      <td>28</td>\n",
+       "      <td>0.027954</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.040846</td>\n",
+       "      <td>0.040846</td>\n",
+       "      <td>319.400330</td>\n",
+       "      <td>0.786163</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>21</td>\n",
+       "      <td>28</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -3511,22 +3477,22 @@
        "      <td>1</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
-       "      <td>6;0</td>\n",
+       "      <td>Dimethyl:2H(6)13C(2)@Any N-term</td>\n",
+       "      <td>0</td>\n",
        "      <td>8</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>499.777669</td>\n",
-       "      <td>500.279319</td>\n",
-       "      <td>0.040845</td>\n",
-       "      <td>0.040845</td>\n",
-       "      <td>319.400391</td>\n",
-       "      <td>0.786564</td>\n",
-       "      <td>35</td>\n",
+       "      <td>0.027430</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.152593</td>\n",
+       "      <td>0.152593</td>\n",
+       "      <td>320.333069</td>\n",
+       "      <td>0.788271</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
        "      <td>28</td>\n",
+       "      <td>35</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -3553,103 +3519,103 @@
        "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>83</th>\n",
-       "      <td>FGHIKLMNPQRST</td>\n",
+       "      <th>75</th>\n",
+       "      <td>SRQPNMLKIHGFT</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
        "      <td>Dimethyl@Any N-term;Dimethyl@K</td>\n",
-       "      <td>0;5</td>\n",
+       "      <td>0;8</td>\n",
        "      <td>13</td>\n",
-       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>792.937585</td>\n",
-       "      <td>793.940885</td>\n",
-       "      <td>0.636318</td>\n",
-       "      <td>0.636318</td>\n",
-       "      <td>428.658142</td>\n",
-       "      <td>1.060983</td>\n",
-       "      <td>792</td>\n",
+       "      <td>0.005469</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.620949</td>\n",
+       "      <td>0.620949</td>\n",
+       "      <td>430.461273</td>\n",
+       "      <td>1.065108</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
-       "      <td>780</td>\n",
+       "      <td>692</td>\n",
+       "      <td>704</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>84</th>\n",
-       "      <td>FGHIKLMNPQRST</td>\n",
+       "      <th>76</th>\n",
+       "      <td>SRQPNMLKIHGFT</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl@Any N-term;Dimethyl@K</td>\n",
-       "      <td>0;5</td>\n",
+       "      <td>Oxidation@M;Dimethyl@Any N-term;Dimethyl@K</td>\n",
+       "      <td>6;0;8</td>\n",
        "      <td>13</td>\n",
-       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>528.960816</td>\n",
-       "      <td>529.629682</td>\n",
-       "      <td>0.636318</td>\n",
-       "      <td>0.636318</td>\n",
-       "      <td>482.273010</td>\n",
-       "      <td>0.795796</td>\n",
-       "      <td>804</td>\n",
+       "      <td>0.005604</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.468698</td>\n",
+       "      <td>0.468698</td>\n",
+       "      <td>482.796692</td>\n",
+       "      <td>0.796481</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
-       "      <td>792</td>\n",
+       "      <td>704</td>\n",
+       "      <td>716</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>85</th>\n",
-       "      <td>FGHIKLMNPQRST</td>\n",
+       "      <th>77</th>\n",
+       "      <td>SRQPNMLKIHGFT</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...</td>\n",
-       "      <td>0;5</td>\n",
+       "      <td>Oxidation@M;Dimethyl@Any N-term;Dimethyl@K</td>\n",
+       "      <td>6;0;8</td>\n",
        "      <td>13</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>534.323729</td>\n",
-       "      <td>534.992596</td>\n",
-       "      <td>0.206957</td>\n",
-       "      <td>0.206957</td>\n",
-       "      <td>478.660187</td>\n",
-       "      <td>0.789903</td>\n",
-       "      <td>816</td>\n",
+       "      <td>0.005604</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.468698</td>\n",
+       "      <td>0.468698</td>\n",
+       "      <td>428.150757</td>\n",
+       "      <td>1.059489</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
-       "      <td>804</td>\n",
+       "      <td>716</td>\n",
+       "      <td>728</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>86</th>\n",
-       "      <td>SRQPNMLKIHGFT</td>\n",
+       "      <th>78</th>\n",
+       "      <td>FGHIKLMNPQRST</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>False</td>\n",
        "      <td>True</td>\n",
-       "      <td>Oxidation@M;Dimethyl@Any N-term;Dimethyl@K</td>\n",
-       "      <td>6;0;8</td>\n",
+       "      <td>Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...</td>\n",
+       "      <td>0;5</td>\n",
        "      <td>13</td>\n",
-       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>800.935042</td>\n",
-       "      <td>801.938342</td>\n",
-       "      <td>0.468698</td>\n",
-       "      <td>0.468698</td>\n",
-       "      <td>428.150787</td>\n",
-       "      <td>1.059819</td>\n",
-       "      <td>828</td>\n",
+       "      <td>0.058123</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.206957</td>\n",
+       "      <td>0.206957</td>\n",
+       "      <td>412.858307</td>\n",
+       "      <td>1.021552</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
-       "      <td>816</td>\n",
+       "      <td>728</td>\n",
+       "      <td>740</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>87</th>\n",
+       "      <th>79</th>\n",
        "      <td>FGHIKLMNPQRST</td>\n",
        "      <td>0</td>\n",
        "      <td>2</td>\n",
@@ -3659,94 +3625,81 @@
        "      <td>0;5</td>\n",
        "      <td>13</td>\n",
        "      <td>0</td>\n",
-       "      <td>4</td>\n",
+       "      <td>3</td>\n",
        "      <td>...</td>\n",
-       "      <td>400.994616</td>\n",
-       "      <td>401.496266</td>\n",
+       "      <td>0.058123</td>\n",
+       "      <td>2</td>\n",
        "      <td>0.206957</td>\n",
        "      <td>0.206957</td>\n",
-       "      <td>605.993408</td>\n",
-       "      <td>0.750029</td>\n",
-       "      <td>840</td>\n",
+       "      <td>478.660187</td>\n",
+       "      <td>0.789583</td>\n",
        "      <td>30.0</td>\n",
        "      <td>Lumos</td>\n",
-       "      <td>828</td>\n",
+       "      <td>740</td>\n",
+       "      <td>752</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>88 rows × 28 columns</p>\n",
+       "<p>80 rows × 27 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "         sequence protein_idxes  miss_cleavage  is_prot_nterm  is_prot_cterm   \n",
-       "0        LMNPQRST             0              1          False           True  \\\n",
-       "1        YTSEDCAK             0              0           True          False   \n",
-       "2        YTSEDCAK             0              0           True          False   \n",
+       "         sequence protein_idxes  miss_cleavage  is_prot_nterm  is_prot_cterm  \\\n",
+       "0        LMNPQRST             0              1          False           True   \n",
+       "1        LMNPQRST             0              1          False           True   \n",
+       "2        LMNPQRST             0              1          False           True   \n",
        "3        SRQPNMLT             0              1          False           True   \n",
        "4        SRQPNMLT             0              1          False           True   \n",
        "..            ...           ...            ...            ...            ...   \n",
-       "83  FGHIKLMNPQRST             0              2          False           True   \n",
-       "84  FGHIKLMNPQRST             0              2          False           True   \n",
-       "85  FGHIKLMNPQRST             0              2          False           True   \n",
-       "86  SRQPNMLKIHGFT             0              2          False           True   \n",
-       "87  FGHIKLMNPQRST             0              2          False           True   \n",
+       "75  SRQPNMLKIHGFT             0              2          False           True   \n",
+       "76  SRQPNMLKIHGFT             0              2          False           True   \n",
+       "77  SRQPNMLKIHGFT             0              2          False           True   \n",
+       "78  FGHIKLMNPQRST             0              2          False           True   \n",
+       "79  FGHIKLMNPQRST             0              2          False           True   \n",
        "\n",
-       "                                                 mods mod_sites  nAA  decoy   \n",
-       "0                     Oxidation@M;Dimethyl@Any N-term       2;0    8      0  \\\n",
-       "1   Carbamidomethyl@C;Acetyl@Protein N-term;Dimeth...     6;0;8    8      1   \n",
-       "2   Carbamidomethyl@C;Dimethyl:2H(6)13C(2)@Any N-t...     6;0;8    8      1   \n",
-       "3                     Dimethyl:2H(6)13C(2)@Any N-term         0    8      1   \n",
-       "4         Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term       6;0    8      1   \n",
+       "                                                 mods mod_sites  nAA  decoy  \\\n",
+       "0                     Oxidation@M;Dimethyl@Any N-term       2;0    8      0   \n",
+       "1                     Dimethyl:2H(6)13C(2)@Any N-term         0    8      0   \n",
+       "2         Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term       2;0    8      0   \n",
+       "3         Oxidation@M;Dimethyl:2H(6)13C(2)@Any N-term       6;0    8      1   \n",
+       "4                     Dimethyl:2H(6)13C(2)@Any N-term         0    8      1   \n",
        "..                                                ...       ...  ...    ...   \n",
-       "83                     Dimethyl@Any N-term;Dimethyl@K       0;5   13      0   \n",
-       "84                     Dimethyl@Any N-term;Dimethyl@K       0;5   13      0   \n",
-       "85  Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
-       "86         Oxidation@M;Dimethyl@Any N-term;Dimethyl@K     6;0;8   13      1   \n",
-       "87  Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
+       "75                     Dimethyl@Any N-term;Dimethyl@K       0;8   13      1   \n",
+       "76         Oxidation@M;Dimethyl@Any N-term;Dimethyl@K     6;0;8   13      1   \n",
+       "77         Oxidation@M;Dimethyl@Any N-term;Dimethyl@K     6;0;8   13      1   \n",
+       "78  Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
+       "79  Dimethyl:2H(6)13C(2)@Any N-term;Dimethyl:2H(6)...       0;5   13      0   \n",
        "\n",
-       "    charge  ... isotope_apex_mz  isotope_right_most_mz   rt_pred   \n",
-       "0        2  ...      495.755484             496.257134  0.242660  \\\n",
-       "1        2  ...      526.243325             526.744975  0.106988   \n",
-       "2        2  ...      523.275878             523.777528  0.009153   \n",
-       "3        2  ...      491.780212             492.281862  0.152593   \n",
-       "4        2  ...      499.777669             500.279319  0.040845   \n",
-       "..     ...  ...             ...                    ...       ...   \n",
-       "83       2  ...      792.937585             793.940885  0.636318   \n",
-       "84       3  ...      528.960816             529.629682  0.636318   \n",
-       "85       3  ...      534.323729             534.992596  0.206957   \n",
-       "86       2  ...      800.935042             801.938342  0.468698   \n",
-       "87       4  ...      400.994616             401.496266  0.206957   \n",
+       "    charge  ...       i_5 mono_isotope_idx   rt_pred  rt_norm_pred  \\\n",
+       "0        2  ...  0.001352                0  0.242660      0.242660   \n",
+       "1        2  ...  0.027430                2  0.063860      0.063860   \n",
+       "2        2  ...  0.027954                2  0.017637      0.017637   \n",
+       "3        2  ...  0.027954                2  0.040846      0.040846   \n",
+       "4        2  ...  0.027430                2  0.152593      0.152593   \n",
+       "..     ...  ...       ...              ...       ...           ...   \n",
+       "75       2  ...  0.005469                0  0.620949      0.620949   \n",
+       "76       3  ...  0.005604                0  0.468698      0.468698   \n",
+       "77       2  ...  0.005604                0  0.468698      0.468698   \n",
+       "78       2  ...  0.058123                2  0.206957      0.206957   \n",
+       "79       3  ...  0.058123                2  0.206957      0.206957   \n",
        "\n",
-       "    rt_norm_pred    ccs_pred  mobility_pred  frag_stop_idx   nce  instrument   \n",
-       "0       0.242660  345.390869       0.850475              7  30.0       Lumos  \\\n",
-       "1       0.106988  347.019043       0.855165             14  30.0       Lumos   \n",
-       "2       0.009153  331.465332       0.816775             21  30.0       Lumos   \n",
-       "3       0.152593  320.333069       0.788686             28  30.0       Lumos   \n",
-       "4       0.040845  319.400391       0.786564             35  30.0       Lumos   \n",
-       "..           ...         ...            ...            ...   ...         ...   \n",
-       "83      0.636318  428.658142       1.060983            792  30.0       Lumos   \n",
-       "84      0.636318  482.273010       0.795796            804  30.0       Lumos   \n",
-       "85      0.206957  478.660187       0.789903            816  30.0       Lumos   \n",
-       "86      0.468698  428.150787       1.059819            828  30.0       Lumos   \n",
-       "87      0.206957  605.993408       0.750029            840  30.0       Lumos   \n",
+       "      ccs_pred  mobility_pred   nce  instrument  frag_start_idx  frag_stop_idx  \n",
+       "0   345.390839       0.850135  30.0       Lumos               0              7  \n",
+       "1   313.133270       0.770554  30.0       Lumos               7             14  \n",
+       "2   314.302277       0.773615  30.0       Lumos              14             21  \n",
+       "3   319.400330       0.786163  30.0       Lumos              21             28  \n",
+       "4   320.333069       0.788271  30.0       Lumos              28             35  \n",
+       "..         ...            ...   ...         ...             ...            ...  \n",
+       "75  430.461273       1.065108  30.0       Lumos             692            704  \n",
+       "76  482.796692       0.796481  30.0       Lumos             704            716  \n",
+       "77  428.150757       1.059489  30.0       Lumos             716            728  \n",
+       "78  412.858307       1.021552  30.0       Lumos             728            740  \n",
+       "79  478.660187       0.789583  30.0       Lumos             740            752  \n",
        "\n",
-       "    frag_start_idx  \n",
-       "0                0  \n",
-       "1                7  \n",
-       "2               14  \n",
-       "3               21  \n",
-       "4               28  \n",
-       "..             ...  \n",
-       "83             780  \n",
-       "84             792  \n",
-       "85             804  \n",
-       "86             816  \n",
-       "87             828  \n",
-       "\n",
-       "[88 rows x 28 columns]"
+       "[80 rows x 27 columns]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3760,8 +3713,8 @@
     "_lib.predict_all()\n",
     "assert (_lib.precursor_df.decoy==1).any()\n",
     "assert ('MACDESTY'[::-1]+'K') in _lib.precursor_df.sequence.values\n",
-    "assert 'isotope_apex_offset' in _lib.precursor_df.columns\n",
-    "assert 'isotope_apex_intensity' in _lib.precursor_df.columns\n",
+    "assert 'i_0' in _lib.precursor_df.columns\n",
+    "assert 'i_1' in _lib.precursor_df.columns\n",
     "assert ~_lib.precursor_df.sequence.str.contains('B').any()\n",
     "_lib.precursor_df"
    ]
@@ -3779,6 +3732,18 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/nbs_tests/test_fasta_lib_precursor_lib.ipynb b/nbs_tests/test_fasta_lib_precursor_lib.ipynb
index 5e8e76a0..d1ee398c 100644
--- a/nbs_tests/test_fasta_lib_precursor_lib.ipynb
+++ b/nbs_tests/test_fasta_lib_precursor_lib.ipynb
@@ -5,13 +5,20 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-01-24 16:54:02> WARNING: Temp mmap arrays are written to /var/folders/fh/hf8t3l1x02d42ggk3b304_rh0000gn/T/temp_mmap_cbynx21w. Cleanup of this folder is OS dependant, and might need to be triggered manually! Current space: 640,486,821,888\n",
-      "2024-01-24 16:54:02> WARNING: No Bruker libraries are available for this operating system. Mobility and m/z values need to be estimated. While this estimation often returns acceptable results with errors < 0.02 Th, huge errors (e.g. offsets of 6 Th) have already been observed for some samples!\n",
-      "2024-01-24 16:54:02> \n"
+      "2024-01-29 22:16:22> WARNING: Temp mmap arrays are written to /var/folders/fh/hf8t3l1x02d42ggk3b304_rh0000gn/T/temp_mmap_oan6nfyd. Cleanup of this folder is OS dependant, and might need to be triggered manually! Current space: 638,180,737,024\n",
+      "2024-01-29 22:16:22> WARNING: No Bruker libraries are available for this operating system. Mobility and m/z values need to be estimated. While this estimation often returns acceptable results with errors < 0.02 Th, huge errors (e.g. offsets of 6 Th) have already been observed for some samples!\n",
+      "2024-01-29 22:16:22> \n"
      ]
     }
    ],
@@ -31,59 +38,59 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-01-24 16:54:03> [PeptDeep] Running library task ...\n",
-      "2024-01-24 16:54:03> Input files (precursor_table): ['precursor_table.csv']\n",
-      "2024-01-24 16:54:03> Platform information:\n",
-      "2024-01-24 16:54:03> system        - Darwin\n",
-      "2024-01-24 16:54:03> release       - 23.2.0\n",
-      "2024-01-24 16:54:03> version       - 14.2.1\n",
-      "2024-01-24 16:54:03> machine       - arm64\n",
-      "2024-01-24 16:54:03> processor     - arm\n",
-      "2024-01-24 16:54:03> cpu count     - 10\n",
-      "2024-01-24 16:54:03> ram           - 34.6/64.0 Gb (available/total)\n",
-      "2024-01-24 16:54:03> \n",
-      "2024-01-24 16:54:03> Python information:\n",
-      "2024-01-24 16:54:03> alphabase        - 1.2.0\n",
-      "2024-01-24 16:54:03> alphabase>       - \n",
-      "2024-01-24 16:54:03> alpharaw         - 0.2.0\n",
-      "2024-01-24 16:54:03> alpharaw>        - \n",
-      "2024-01-24 16:54:03> biopython        - 1.79\n",
-      "2024-01-24 16:54:03> click            - 8.1.3\n",
-      "2024-01-24 16:54:03> lxml             - 4.9.1\n",
-      "2024-01-24 16:54:03> numba            - 0.58.1\n",
-      "2024-01-24 16:54:03> numpy            - 1.26.3\n",
-      "2024-01-24 16:54:03> pandas           - 2.1.4\n",
-      "2024-01-24 16:54:03> peptdeep         - 1.1.4\n",
-      "2024-01-24 16:54:03> psutil           - 5.9.2\n",
-      "2024-01-24 16:54:03> pyteomics        - 4.5.6\n",
-      "2024-01-24 16:54:03> python           - 3.9.12\n",
-      "2024-01-24 16:54:03> scikit-learn     - 1.1.2\n",
-      "2024-01-24 16:54:03> streamlit        - 1.30.0\n",
-      "2024-01-24 16:54:03> streamlit-aggrid - 0.3.3\n",
-      "2024-01-24 16:54:03> streamlit>       - \n",
-      "2024-01-24 16:54:03> torch            - 2.0.0\n",
-      "2024-01-24 16:54:03> tqdm             - 4.64.0\n",
-      "2024-01-24 16:54:03> transformers     - 4.28.1\n",
-      "2024-01-24 16:54:03> \n",
-      "2024-01-24 16:54:05> xxx/library.tsv does not exist, use default IRT_PEPTIDE_DF to translate irt\n",
-      "2024-01-24 16:54:05> Generating the spectral library ...\n",
-      "2024-01-24 16:54:05> Loaded 3 precursors.\n",
-      "2024-01-24 16:54:05> Predicting RT/IM/MS2 for 3 precursors ...\n",
-      "2024-01-24 16:54:05> Predicting RT ...\n"
+      "2024-01-29 22:16:23> [PeptDeep] Running library task ...\n",
+      "2024-01-29 22:16:23> Input files (precursor_table): ['precursor_table.csv']\n",
+      "2024-01-29 22:16:23> Platform information:\n",
+      "2024-01-29 22:16:23> system        - Darwin\n",
+      "2024-01-29 22:16:23> release       - 23.2.0\n",
+      "2024-01-29 22:16:23> version       - 14.2.1\n",
+      "2024-01-29 22:16:23> machine       - arm64\n",
+      "2024-01-29 22:16:23> processor     - arm\n",
+      "2024-01-29 22:16:23> cpu count     - 10\n",
+      "2024-01-29 22:16:23> ram           - 35.3/64.0 Gb (available/total)\n",
+      "2024-01-29 22:16:23> \n",
+      "2024-01-29 22:16:23> Python information:\n",
+      "2024-01-29 22:16:23> alphabase        - 1.2.0\n",
+      "2024-01-29 22:16:23> alphabase>       - \n",
+      "2024-01-29 22:16:23> alpharaw         - 0.2.0\n",
+      "2024-01-29 22:16:23> alpharaw>        - \n",
+      "2024-01-29 22:16:23> biopython        - 1.79\n",
+      "2024-01-29 22:16:23> click            - 8.1.3\n",
+      "2024-01-29 22:16:23> lxml             - 4.9.1\n",
+      "2024-01-29 22:16:23> numba            - 0.58.1\n",
+      "2024-01-29 22:16:23> numpy            - 1.26.3\n",
+      "2024-01-29 22:16:23> pandas           - 2.1.4\n",
+      "2024-01-29 22:16:23> peptdeep         - 1.1.4\n",
+      "2024-01-29 22:16:23> psutil           - 5.9.2\n",
+      "2024-01-29 22:16:23> pyteomics        - 4.5.6\n",
+      "2024-01-29 22:16:23> python           - 3.9.12\n",
+      "2024-01-29 22:16:23> scikit-learn     - 1.1.2\n",
+      "2024-01-29 22:16:23> streamlit        - 1.30.0\n",
+      "2024-01-29 22:16:23> streamlit-aggrid - 0.3.3\n",
+      "2024-01-29 22:16:23> streamlit>       - \n",
+      "2024-01-29 22:16:23> torch            - 2.0.0\n",
+      "2024-01-29 22:16:23> tqdm             - 4.64.0\n",
+      "2024-01-29 22:16:23> transformers     - 4.28.1\n",
+      "2024-01-29 22:16:23> \n",
+      "2024-01-29 22:16:25> xxx/library.tsv does not exist, use default IRT_PEPTIDE_DF to translate irt\n",
+      "2024-01-29 22:16:25> Generating the spectral library ...\n",
+      "2024-01-29 22:16:25> Loaded 3 precursors.\n",
+      "2024-01-29 22:16:25> Predicting RT/IM/MS2 for 3 precursors ...\n",
+      "2024-01-29 22:16:25> Predicting RT ...\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 3/3 [00:00<00:00, 238.62it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 319.48it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-01-24 16:54:05> Predicting mobility ...\n"
+      "2024-01-29 22:16:25> Predicting mobility ...\n"
      ]
     },
     {
@@ -91,14 +98,14 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "100%|██████████| 3/3 [00:00<00:00, 393.66it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 387.66it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-01-24 16:54:05> Predicting MS2 ...\n"
+      "2024-01-29 22:16:25> Predicting MS2 ...\n"
      ]
     },
     {
@@ -106,17 +113,17 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "100%|██████████| 3/3 [00:00<00:00, 176.88it/s]"
+      "100%|██████████| 3/3 [00:00<00:00, 175.87it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-01-24 16:54:05> End predicting RT/IM/MS2\n",
-      "2024-01-24 16:54:05> Predicting the spectral library with 3 precursors and 0.00M fragments used 0.5398 GB memory\n",
-      "2024-01-24 16:54:05> Saving HDF library to /Users/wenfengzeng/peptdeep/spec_libs/predict.speclib.hdf ...\n",
-      "2024-01-24 16:54:05> Library generated!!\n"
+      "2024-01-29 22:16:25> End predicting RT/IM/MS2\n",
+      "2024-01-29 22:16:25> Predicting the spectral library with 3 precursors and 0.00M fragments used 0.5076 GB memory\n",
+      "2024-01-29 22:16:25> Saving HDF library to /Users/wenfengzeng/peptdeep/spec_libs/predict.speclib.hdf ...\n",
+      "2024-01-29 22:16:25> Library generated!!\n"
      ]
     },
     {
diff --git a/peptdeep/constants/default_settings.yaml b/peptdeep/constants/default_settings.yaml
index 31ed1fa1..a90c1f10 100644
--- a/peptdeep/constants/default_settings.yaml
+++ b/peptdeep/constants/default_settings.yaml
@@ -72,6 +72,13 @@ model_mgr:
   external_ms2_model: ''
   external_rt_model: ''
   external_ccs_model: ''
+  charge_model_type: seq
+  charge_model_choices:
+  - seq
+  - modseq
+  charge_model_file: ''
+  charge_prob_cutoff: 0.3
+  use_predicted_charge_in_speclib: True # if True, it ignores min/max_precursor_charge in `library`
   instrument_group:
     ThermoTOF: ThermoTOF
     Astral: ThermoTOF
diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 5c0bea65..281a0c5d 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -1,16 +1,60 @@
 import pandas as pd
 import numpy as np
 
-
 from peptdeep.model.generic_property_prediction import (
     ModelInterface_for_Generic_AASeq_MultiLabelClassification,
     Model_for_Generic_AASeq_BinaryClassification_Transformer,
     ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
     Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
 )
+
+class ChargeModelInterface:
+    def predict_charges_as_prob(self,
+        pep_df:pd.DataFrame, 
+        min_precursor_charge:int,
+        max_precursor_charge:int,
+    ):
+        df = self.predict(pep_df.copy())
+        df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
+        df["charge"] = [self.charge_range[
+            min_precursor_charge-self.min_predict_charge:
+            max_precursor_charge-self.min_predict_charge+1
+        ]]*len(df)
+        df["charge_prob"] = df.charge_prob.apply(
+            lambda x: x[
+                min_precursor_charge-self.min_predict_charge:
+                max_precursor_charge-self.min_predict_charge+1
+            ]
+        )
+        df = df.explode(
+            ["charge","charge_prob"], ignore_index=True
+        ).dropna(subset=["charge"])
+        df["charge"] = df.charge.astype(np.int8)
+        df["charge_prob"] = df.charge_prob.astype(np.float32)
+        return df
+
+    def predict_and_clip_charges(self, 
+        pep_df:pd.DataFrame, 
+        charge_prob_cutoff:float,
+    ):
+        df = self.predict(pep_df.copy())
+        df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
+        df["charge"] = df.charge_prob.apply(
+            lambda x: self.charge_range[x>charge_prob_cutoff]
+        )
+        df["charge_prob"] = df.charge_prob.apply(
+            lambda x: x[x>charge_prob_cutoff]
+        )
+        df = df.explode(
+            ["charge","charge_prob"], ignore_index=True
+        ).dropna(subset=["charge"])
+        df["charge"] = df.charge.astype(np.int8)
+        df["charge_prob"] = df.charge_prob.astype(np.float32)
+        return df
     
 class ChargeModelForModAASeq(
-    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification
+    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
+    ChargeModelInterface
 ):
     """
     ModelInterface for charge prediction for modified peptides
@@ -24,29 +68,15 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
 
         self.target_column_to_predict = "charge_probs"
         self.target_column_to_train = "charge_indicators"
-        self.min_charge = min_charge
-        self.max_charge = max_charge
+        self.min_predict_charge = min_charge
+        self.max_predict_charge = max_charge
         self.charge_range = np.arange(
             min_charge, max_charge+1, dtype=np.int8
         )
-        
-    def predict_charges_for_pep_df(self, 
-        pep_df:pd.DataFrame, 
-        charge_prob=0.3,
-        drop_probs_column=True
-    ):
-        df = self.predict(pep_df)
-        df["charge"] = df.charge_probs.apply(
-            lambda x: self.charge_range[x>charge_prob]
-        )
-        df = df.explode("charge").dropna(subset=["charge"])
-        if drop_probs_column:
-            df.drop(columns="charge_probs", inplace=True)
-        df["charge"] = df.charge.astype(np.int8)
-        return df
 
 class ChargeModelForAASeq(
-    ModelInterface_for_Generic_AASeq_MultiLabelClassification
+    ModelInterface_for_Generic_AASeq_MultiLabelClassification,
+    ChargeModelInterface
 ):
     """
     ModelInterface for charge prediction for amino acid sequence
@@ -60,24 +90,11 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
 
         self.target_column_to_predict = "charge_probs"
         self.target_column_to_train = "charge_indicators"
-        self.min_charge = min_charge
-        self.max_charge = max_charge
-        self.charge_range = np.arange(min_charge, max_charge+1, dtype=np.int8)
-        
-    def predict_charges_for_pep_df(self, 
-        pep_df:pd.DataFrame, 
-        charge_prob=0.3,
-        drop_probs_column=True
-    ):
-        df = self.predict(pep_df)
-        df["charge"] = df.charge_probs.apply(
-            lambda x: self.charge_range[x>charge_prob]
+        self.min_predict_charge = min_charge
+        self.max_predict_charge = max_charge
+        self.charge_range = np.arange(
+            min_charge, max_charge+1, dtype=np.int8
         )
-        df = df.explode("charge").dropna(subset=["charge"])
-        if drop_probs_column:
-            df.drop(columns="charge_probs", inplace=True)
-        df["charge"] = df.charge.astype(np.int8)
-        return df
 
 def group_psm_df_by_sequence(
     psm_df: pd.DataFrame,
diff --git a/peptdeep/pretrained_models.py b/peptdeep/pretrained_models.py
index 704ea711..d119362a 100644
--- a/peptdeep/pretrained_models.py
+++ b/peptdeep/pretrained_models.py
@@ -9,6 +9,7 @@
 import logging
 import shutil
 import ssl
+import typing
 from pickle import UnpicklingError
 import torch.multiprocessing as mp
 if sys.platform.lower().startswith("linux"):
@@ -43,6 +44,7 @@
 )
 from peptdeep.model.rt import AlphaRTModel
 from peptdeep.model.ccs import AlphaCCSModel
+from peptdeep.model.charge import ChargeModelForAASeq, ChargeModelForModAASeq
 from peptdeep.utils import (
     uniform_sampling, evaluate_linear_regression
 )
@@ -299,12 +301,25 @@ def __init__(self,
         self.rt_model:AlphaRTModel = AlphaRTModel(device=device)
         self.ccs_model:AlphaCCSModel = AlphaCCSModel(device=device)
         self.load_installed_models()
+
+        self.charge_model:typing.Union[ChargeModelForAASeq,ChargeModelForModAASeq] = None
+
         self.reset_by_global_settings(reload_models=False)
 
     def reset_by_global_settings(self,
         reload_models=True,
     ):
         mgr_settings = global_settings['model_mgr']
+
+        if os.path.isfile(mgr_settings['charge_model_file']):
+            if mgr_settings['charge_model_type'] == 'modseq':
+                self.charge_model = ChargeModelForModAASeq()
+            else:
+                self.charge_model = ChargeModelForAASeq()
+            self.charge_model.load(mgr_settings['charge_model_file'])
+        self.charge_prob_cutoff = mgr_settings['charge_prob_cutoff']
+        self.use_predicted_charge_in_speclib = mgr_settings['use_predicted_charge_in_speclib']
+
         if reload_models:
             self.load_installed_models(mgr_settings['model_type'])
             self.load_external_models(
@@ -1141,4 +1156,3 @@ def refine_df(df):
                 process_num = process_num,
                 mp_batch_size=mp_batch_size,
             )
-
diff --git a/peptdeep/protein/fasta.py b/peptdeep/protein/fasta.py
index aede97b1..eaefd0c9 100644
--- a/peptdeep/protein/fasta.py
+++ b/peptdeep/protein/fasta.py
@@ -172,3 +172,19 @@ def __init__(self,
             else:
                 print("Oops, `PredictSpecLibFasta.model_manager` is None, while it should not happen")
                 self.model_manager = model_manager
+
+    def add_charge(self):
+        if self.model_manager.charge_model is None:
+            super().add_charge()
+        else:
+            if self.model_manager.use_predicted_charge_in_speclib:
+                self._precursor_df = self.model_manager.charge_model.predict_and_clip_charges(
+                    self.precursor_df, 
+                    charge_prob_cutoff=self.model_manager.charge_prob_cutoff
+                )
+            else:
+                self._precursor_df = self.model_manager.charge_model.predict_charges_as_prob(
+                    self.precursor_df, 
+                    min_precursor_charge=self.min_precursor_charge,
+                    max_precursor_charge=self.max_precursor_charge
+                )

From 83bf54e65fb7ada6787f5d0edd1957827eb6364c Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Tue, 30 Jan 2024 00:13:52 +0100
Subject: [PATCH 06/18] multilabel must predict_in_order

---
 nbdev_nbs/model/charge.ipynb                  | 308 +++++++++++++++---
 nbdev_nbs/protein/fasta.ipynb                 |  54 ++-
 peptdeep/model/charge.py                      |  14 +
 peptdeep/model/generic_property_prediction.py |  44 ++-
 4 files changed, 311 insertions(+), 109 deletions(-)

diff --git a/nbdev_nbs/model/charge.ipynb b/nbdev_nbs/model/charge.ipynb
index 7755d736..7522bd5c 100644
--- a/nbdev_nbs/model/charge.ipynb
+++ b/nbdev_nbs/model/charge.ipynb
@@ -56,39 +56,31 @@
        "      <td>ABCDE</td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.739165</td>\n",
+       "      <td>0.693502</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>FGHIJK</td>\n",
-       "      <td>[0, 1]</td>\n",
-       "      <td>6</td>\n",
-       "      <td>0.439334</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
+       "      <th>2</th>\n",
        "      <td>FGHIJK</td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.627932</td>\n",
+       "      <td>0.462107</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <th>3</th>\n",
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.628110</td>\n",
+       "      <td>0.544402</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <th>4</th>\n",
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.587332</td>\n",
+       "      <td>0.394243</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -97,11 +89,10 @@
       ],
       "text/plain": [
        "  sequence charge_indicators  nAA  charge_prob  charge\n",
-       "0    ABCDE            [1, 0]    5     0.739165       1\n",
-       "1   FGHIJK            [0, 1]    6     0.439334       1\n",
-       "1   FGHIJK            [0, 1]    6     0.627932       2\n",
-       "2   LMNOPQ            [1, 1]    6     0.628110       1\n",
-       "2   LMNOPQ            [1, 1]    6     0.587332       2"
+       "0    ABCDE            [1, 0]    5     0.693502       1\n",
+       "2   FGHIJK            [0, 1]    6     0.462107       2\n",
+       "3   LMNOPQ            [1, 1]    6     0.544402       1\n",
+       "4   LMNOPQ            [1, 1]    6     0.394243       2"
       ]
      },
      "execution_count": null,
@@ -116,11 +107,10 @@
     "model = ChargeModelForAASeq(min_charge=1, max_charge=2)\n",
     "\n",
     "seq_df = pd.DataFrame({\n",
-    "    'sequence': ['ABCDE','FGHIJK','LMNOPQ','RSTUVWXYZ'],\n",
-    "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0]],\n",
+    "    'sequence': ['ABCDE','FGHIJK','LMNOPQ','RSTUVWXYZ','HIJKL'],\n",
+    "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0],[0,0]],\n",
     "})\n",
     "model.train(seq_df)\n",
-    "model.predict(seq_df)\n",
     "model.predict_and_clip_charges(seq_df, charge_prob_cutoff=0.3)"
    ]
   },
@@ -163,31 +153,39 @@
        "      <td>ABCDE</td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.249596</td>\n",
+       "      <td>0.186966</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>HIJKL</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.253555</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
        "      <td>FGHIJK</td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.627932</td>\n",
+       "      <td>0.462107</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <th>3</th>\n",
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.587332</td>\n",
+       "      <td>0.394243</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
+       "      <th>4</th>\n",
        "      <td>RSTUVWXYZ</td>\n",
        "      <td>[0, 0]</td>\n",
        "      <td>9</td>\n",
-       "      <td>0.260932</td>\n",
+       "      <td>0.129340</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -196,10 +194,11 @@
       ],
       "text/plain": [
        "    sequence charge_indicators  nAA  charge_prob  charge\n",
-       "0      ABCDE            [1, 0]    5     0.249596       2\n",
-       "1     FGHIJK            [0, 1]    6     0.627932       2\n",
-       "2     LMNOPQ            [1, 1]    6     0.587332       2\n",
-       "3  RSTUVWXYZ            [0, 0]    9     0.260932       2"
+       "0      ABCDE            [1, 0]    5     0.186966       2\n",
+       "1      HIJKL            [0, 0]    5     0.253555       2\n",
+       "2     FGHIJK            [0, 1]    6     0.462107       2\n",
+       "3     LMNOPQ            [1, 1]    6     0.394243       2\n",
+       "4  RSTUVWXYZ            [0, 0]    9     0.129340       2"
       ]
      },
      "execution_count": null,
@@ -211,6 +210,103 @@
     "model.predict_charges_as_prob(seq_df, 2, 4)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>charge_indicators</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>charge</th>\n",
+       "      <th>charge_prob</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABCDE</td>\n",
+       "      <td>[1, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.693502</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>HIJKL</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.196651</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.148395</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.544402</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>RSTUVWXYZ</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.132826</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    sequence charge_indicators  nAA  charge  charge_prob\n",
+       "0      ABCDE            [1, 0]    5       1     0.693502\n",
+       "1      HIJKL            [0, 0]    5       1     0.196651\n",
+       "2     FGHIJK            [0, 1]    6       1     0.148395\n",
+       "3     LMNOPQ            [1, 1]    6       1     0.544402\n",
+       "4  RSTUVWXYZ            [0, 0]    9       1     0.132826"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "seq_df['charge'] = 1\n",
+    "model.predict_prob_for_charge(seq_df)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -254,7 +350,7 @@
        "      <td></td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.814867</td>\n",
+       "      <td>0.794292</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -264,7 +360,7 @@
        "      <td></td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.708186</td>\n",
+       "      <td>0.654079</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -274,17 +370,17 @@
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.712738</td>\n",
+       "      <td>0.747879</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <th>3</th>\n",
        "      <td>LMNOPQ</td>\n",
        "      <td>Oxidation@M</td>\n",
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.534221</td>\n",
+       "      <td>0.691985</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -293,10 +389,10 @@
       ],
       "text/plain": [
        "  sequence         mods mod_sites charge_indicators  nAA  charge_prob  charge\n",
-       "0    ABCDE                                   [1, 0]    5     0.814867       1\n",
-       "1   FGHIJK                                   [0, 1]    6     0.708186       2\n",
-       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.712738       1\n",
-       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.534221       2"
+       "0    ABCDE                                   [1, 0]    5     0.794292       1\n",
+       "1   FGHIJK                                   [0, 1]    6     0.654079       2\n",
+       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.747879       1\n",
+       "3   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.691985       2"
       ]
      },
      "execution_count": null,
@@ -317,7 +413,6 @@
     "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0]],\n",
     "})\n",
     "model.train(modseq_df)\n",
-    "model.predict(modseq_df)\n",
     "model.predict_and_clip_charges(modseq_df, charge_prob_cutoff=0.3)"
    ]
   },
@@ -364,7 +459,7 @@
        "      <td></td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.224946</td>\n",
+       "      <td>0.199697</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -374,7 +469,7 @@
        "      <td></td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.708186</td>\n",
+       "      <td>0.654079</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -384,7 +479,7 @@
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.534221</td>\n",
+       "      <td>0.691985</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -394,7 +489,7 @@
        "      <td>3</td>\n",
        "      <td>[0, 0]</td>\n",
        "      <td>9</td>\n",
-       "      <td>0.278221</td>\n",
+       "      <td>0.282058</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -403,10 +498,10 @@
       ],
       "text/plain": [
        "    sequence         mods mod_sites charge_indicators  nAA  charge_prob  \\\n",
-       "0      ABCDE                                   [1, 0]    5     0.224946   \n",
-       "1     FGHIJK                                   [0, 1]    6     0.708186   \n",
-       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6     0.534221   \n",
-       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9     0.278221   \n",
+       "0      ABCDE                                   [1, 0]    5     0.199697   \n",
+       "1     FGHIJK                                   [0, 1]    6     0.654079   \n",
+       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6     0.691985   \n",
+       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9     0.282058   \n",
        "\n",
        "   charge  \n",
        "0       2  \n",
@@ -423,6 +518,117 @@
    "source": [
     "model.predict_charges_as_prob(modseq_df, 2, 4)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>mods</th>\n",
+       "      <th>mod_sites</th>\n",
+       "      <th>charge_indicators</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>charge</th>\n",
+       "      <th>charge_prob</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABCDE</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[1, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.794292</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.191645</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LMNOPQ</td>\n",
+       "      <td>Oxidation@M</td>\n",
+       "      <td>2</td>\n",
+       "      <td>[1, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.747879</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>RSTUVWXYZ</td>\n",
+       "      <td>Phospho@T</td>\n",
+       "      <td>3</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.188159</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    sequence         mods mod_sites charge_indicators  nAA  charge  \\\n",
+       "0      ABCDE                                   [1, 0]    5       1   \n",
+       "1     FGHIJK                                   [0, 1]    6       1   \n",
+       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6       1   \n",
+       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9       1   \n",
+       "\n",
+       "   charge_prob  \n",
+       "0     0.794292  \n",
+       "1     0.191645  \n",
+       "2     0.747879  \n",
+       "3     0.188159  "
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "modseq_df['charge'] = 1\n",
+    "model.predict_prob_for_charge(modseq_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/nbdev_nbs/protein/fasta.ipynb b/nbdev_nbs/protein/fasta.ipynb
index 3332879e..1256ad16 100644
--- a/nbdev_nbs/protein/fasta.ipynb
+++ b/nbdev_nbs/protein/fasta.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -197,7 +197,7 @@
        "8          False                  20  "
       ]
      },
-     "execution_count": 3,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -224,7 +224,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -276,7 +276,7 @@
        "1         yy      gene           FGHIJKLMNOPQR"
       ]
      },
-     "execution_count": 4,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -287,7 +287,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -469,7 +469,7 @@
        "8          False                  20       xx        "
       ]
      },
-     "execution_count": 5,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -482,7 +482,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -514,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1075,7 +1075,7 @@
        "31       0;4   20       xx        "
       ]
      },
-     "execution_count": 7,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1087,7 +1087,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1133,7 +1133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1822,7 +1822,7 @@
        "39       0;4   20       xx        "
       ]
      },
-     "execution_count": 9,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1836,7 +1836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -2076,7 +2076,7 @@
        "[120 rows x 11 columns]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2093,7 +2093,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2102,7 +2102,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -3287,7 +3287,7 @@
        "[40 rows x 26 columns]"
       ]
      },
-     "execution_count": 13,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3326,7 +3326,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -3699,7 +3699,7 @@
        "[80 rows x 27 columns]"
       ]
      },
-     "execution_count": 14,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3732,18 +3732,6 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 281a0c5d..ded7df06 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -9,6 +9,8 @@
 )
 
 class ChargeModelInterface:
+    def __init__(self):
+        raise TypeError("The abstract interface class cannot be initialized")
     def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
@@ -32,6 +34,18 @@ def predict_charges_as_prob(self,
         df["charge"] = df.charge.astype(np.int8)
         df["charge_prob"] = df.charge_prob.astype(np.float32)
         return df
+    
+    def predict_prob_for_charge(self,
+        precursor_df:pd.DataFrame,
+    ):
+        if "charge" not in precursor_df.columns:
+            raise KeyError("precursor_df must contain `charge` column")
+        precursor_df = self.predict(precursor_df)
+        precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
+            lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
+        ).astype(np.float32)
+        precursor_df.drop(columns="charge_probs", inplace=True)
+        return precursor_df
 
     def predict_and_clip_charges(self, 
         pep_df:pd.DataFrame, 
diff --git a/peptdeep/model/generic_property_prediction.py b/peptdeep/model/generic_property_prediction.py
index 3777143d..0d7b04c6 100644
--- a/peptdeep/model/generic_property_prediction.py
+++ b/peptdeep/model/generic_property_prediction.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 import peptdeep.model.building_block as building_block
-from peptdeep.model.model_interface import ModelInterface
+from peptdeep.model.model_interface import ModelInterface, is_precursor_sorted
 
 ASCII_NUM=128
 
@@ -435,6 +435,12 @@ def _get_targets_from_batch_df(self, batch_df, **kwargs):
             np.stack(batch_df[self.target_column_to_train].values), 
             dtype=torch.float32
         )
+    
+    def _check_predict_in_order(self, precursor_df:pd.DataFrame):
+        if not is_precursor_sorted(precursor_df):
+            # multilabel prediction can only predict in order
+            precursor_df.sort_values("nAA", inplace=True)
+            precursor_df.reset_index(drop=True, inplace=True)
 
     def _prepare_predict_data_df(self, precursor_df, **kwargs):
         precursor_df[self.target_column_to_predict] = [
@@ -443,18 +449,9 @@ def _prepare_predict_data_df(self, precursor_df, **kwargs):
         self.predict_df = precursor_df
 
     def _set_batch_predict_data(self, batch_df, predict_values, **kwargs):
-        if self._predict_in_order:
-            self.predict_df.loc[:,self.target_column_to_predict].values[
-                batch_df.index.values[0]:batch_df.index.values[-1]+1
-            ] = list(predict_values)
-        else:
-            # self.predict_df.loc[
-            #     batch_df.index,self.target_column_to_predict
-            # ] = [val.tolist() for val in predict_values]
-
-            # fail to assign list of list/ndarray by .loc, use for loop instead (slow)
-            for idx,val in zip(batch_df.index.values,predict_values):
-                self.predict_df.loc[idx,self.target_column_to_predict] = val
+        self.predict_df.loc[:,self.target_column_to_predict].values[
+            batch_df.index.values[0]:batch_df.index.values[-1]+1
+        ] = list(predict_values)
 
 class ModelInterface_for_Generic_ModAASeq_MultiLabelClassification(
     ModelInterface_for_Generic_ModAASeq_BinaryClassification
@@ -481,6 +478,12 @@ def _get_targets_from_batch_df(self, batch_df, **kwargs):
             np.stack(batch_df[self.target_column_to_train].values), 
             dtype=torch.float32
         )
+    
+    def _check_predict_in_order(self, precursor_df:pd.DataFrame):
+        if not is_precursor_sorted(precursor_df):
+            # multilabel prediction can only predict in order
+            precursor_df.sort_values("nAA", inplace=True)
+            precursor_df.reset_index(drop=True, inplace=True)
 
     def _prepare_predict_data_df(self, precursor_df, **kwargs):
         precursor_df[self.target_column_to_predict] = [
@@ -489,18 +492,9 @@ def _prepare_predict_data_df(self, precursor_df, **kwargs):
         self.predict_df = precursor_df
 
     def _set_batch_predict_data(self, batch_df, predict_values, **kwargs):
-        if self._predict_in_order:
-            self.predict_df.loc[:,self.target_column_to_predict].values[
-                batch_df.index.values[0]:batch_df.index.values[-1]+1
-            ] = list(predict_values)
-        else:
-            # self.predict_df.loc[
-            #     batch_df.index,self.target_column_to_predict
-            # ] = [val.tolist() for val in predict_values]
-
-            # fail to assign list of list/ndarray by .loc, use for loop instead (slow)
-            for idx,val in zip(batch_df.index.values,predict_values):
-                self.predict_df.loc[idx,self.target_column_to_predict] = val
+        self.predict_df.loc[:,self.target_column_to_predict].values[
+            batch_df.index.values[0]:batch_df.index.values[-1]+1
+        ] = list(predict_values)
 
 # alias
 ModelInterface_for_Generic_AASeq_MultiTargetClassification = ModelInterface_for_Generic_AASeq_MultiLabelClassification

From 830ce15de65be17a714e288036832144ccc6208d Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Tue, 30 Jan 2024 00:26:03 +0100
Subject: [PATCH 07/18] remove interface class

---
 peptdeep/model/charge.py | 102 +++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 26 deletions(-)

diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index ded7df06..7952d22d 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -7,10 +7,28 @@
     ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
     Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
 )
+    
+class ChargeModelForModAASeq(
+    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
+):
+    """
+    ModelInterface for charge prediction for modified peptides
+    """
+    def __init__(self, min_charge:int=1, max_charge:int=6):
+        super().__init__(
+            num_target_values=max_charge-min_charge+1,
+            model_class=Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
+            nlayers=4, hidden_dim=128, dropout=0.1
+        )
+
+        self.target_column_to_predict = "charge_probs"
+        self.target_column_to_train = "charge_indicators"
+        self.min_predict_charge = min_charge
+        self.max_predict_charge = max_charge
+        self.charge_range = np.arange(
+            min_charge, max_charge+1, dtype=np.int8
+        )
 
-class ChargeModelInterface:
-    def __init__(self):
-        raise TypeError("The abstract interface class cannot be initialized")
     def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
@@ -65,32 +83,9 @@ def predict_and_clip_charges(self,
         df["charge"] = df.charge.astype(np.int8)
         df["charge_prob"] = df.charge_prob.astype(np.float32)
         return df
-    
-class ChargeModelForModAASeq(
-    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
-    ChargeModelInterface
-):
-    """
-    ModelInterface for charge prediction for modified peptides
-    """
-    def __init__(self, min_charge:int=1, max_charge:int=6):
-        super().__init__(
-            num_target_values=max_charge-min_charge+1,
-            model_class=Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
-            nlayers=4, hidden_dim=128, dropout=0.1
-        )
-
-        self.target_column_to_predict = "charge_probs"
-        self.target_column_to_train = "charge_indicators"
-        self.min_predict_charge = min_charge
-        self.max_predict_charge = max_charge
-        self.charge_range = np.arange(
-            min_charge, max_charge+1, dtype=np.int8
-        )
 
 class ChargeModelForAASeq(
     ModelInterface_for_Generic_AASeq_MultiLabelClassification,
-    ChargeModelInterface
 ):
     """
     ModelInterface for charge prediction for amino acid sequence
@@ -110,6 +105,61 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
             min_charge, max_charge+1, dtype=np.int8
         )
 
+    def predict_charges_as_prob(self,
+        pep_df:pd.DataFrame, 
+        min_precursor_charge:int,
+        max_precursor_charge:int,
+    ):
+        df = self.predict(pep_df.copy())
+        df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
+        df["charge"] = [self.charge_range[
+            min_precursor_charge-self.min_predict_charge:
+            max_precursor_charge-self.min_predict_charge+1
+        ]]*len(df)
+        df["charge_prob"] = df.charge_prob.apply(
+            lambda x: x[
+                min_precursor_charge-self.min_predict_charge:
+                max_precursor_charge-self.min_predict_charge+1
+            ]
+        )
+        df = df.explode(
+            ["charge","charge_prob"], ignore_index=True
+        ).dropna(subset=["charge"])
+        df["charge"] = df.charge.astype(np.int8)
+        df["charge_prob"] = df.charge_prob.astype(np.float32)
+        return df
+    
+    def predict_prob_for_charge(self,
+        precursor_df:pd.DataFrame,
+    ):
+        if "charge" not in precursor_df.columns:
+            raise KeyError("precursor_df must contain `charge` column")
+        precursor_df = self.predict(precursor_df)
+        precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
+            lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
+        ).astype(np.float32)
+        precursor_df.drop(columns="charge_probs", inplace=True)
+        return precursor_df
+
+    def predict_and_clip_charges(self, 
+        pep_df:pd.DataFrame, 
+        charge_prob_cutoff:float,
+    ):
+        df = self.predict(pep_df.copy())
+        df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
+        df["charge"] = df.charge_prob.apply(
+            lambda x: self.charge_range[x>charge_prob_cutoff]
+        )
+        df["charge_prob"] = df.charge_prob.apply(
+            lambda x: x[x>charge_prob_cutoff]
+        )
+        df = df.explode(
+            ["charge","charge_prob"], ignore_index=True
+        ).dropna(subset=["charge"])
+        df["charge"] = df.charge.astype(np.int8)
+        df["charge_prob"] = df.charge_prob.astype(np.float32)
+        return df
+
 def group_psm_df_by_sequence(
     psm_df: pd.DataFrame,
     min_charge:int,

From 549ef6a19ce322ab2a25cef0493a88431a04fca2 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Tue, 30 Jan 2024 00:40:28 +0100
Subject: [PATCH 08/18] add batch_size and verbose

---
 peptdeep/model/charge.py | 43 ++++++++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 7952d22d..5912a608 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -33,8 +33,14 @@ def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
         max_precursor_charge:int,
+        batch_size=1024,
+        verbose=False,
     ):
-        df = self.predict(pep_df.copy())
+        df = self.predict(
+            pep_df.copy(), 
+            batch_size=batch_size,
+            verbose=verbose,
+        )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = [self.charge_range[
             min_precursor_charge-self.min_predict_charge:
@@ -55,10 +61,15 @@ def predict_charges_as_prob(self,
     
     def predict_prob_for_charge(self,
         precursor_df:pd.DataFrame,
+        batch_size=1024,
+        verbose=False,
     ):
         if "charge" not in precursor_df.columns:
             raise KeyError("precursor_df must contain `charge` column")
-        precursor_df = self.predict(precursor_df)
+        precursor_df = self.predict(precursor_df,
+            batch_size=batch_size,
+            verbose=verbose,
+        )
         precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
             lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
         ).astype(np.float32)
@@ -68,8 +79,13 @@ def predict_prob_for_charge(self,
     def predict_and_clip_charges(self, 
         pep_df:pd.DataFrame, 
         charge_prob_cutoff:float,
+        batch_size=1024,
+        verbose=False,
     ):
-        df = self.predict(pep_df.copy())
+        df = self.predict(pep_df.copy(),
+            batch_size=batch_size,
+            verbose=verbose,
+        )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = df.charge_prob.apply(
             lambda x: self.charge_range[x>charge_prob_cutoff]
@@ -109,8 +125,13 @@ def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
         max_precursor_charge:int,
+        batch_size=1024,
+        verbose=False,
     ):
-        df = self.predict(pep_df.copy())
+        df = self.predict(pep_df.copy(),
+            batch_size=batch_size,
+            verbose=verbose,
+        )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = [self.charge_range[
             min_precursor_charge-self.min_predict_charge:
@@ -131,10 +152,15 @@ def predict_charges_as_prob(self,
     
     def predict_prob_for_charge(self,
         precursor_df:pd.DataFrame,
+        batch_size=1024,
+        verbose=False,
     ):
         if "charge" not in precursor_df.columns:
             raise KeyError("precursor_df must contain `charge` column")
-        precursor_df = self.predict(precursor_df)
+        precursor_df = self.predict(precursor_df,
+            batch_size=batch_size,
+            verbose=verbose,
+        )
         precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
             lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
         ).astype(np.float32)
@@ -144,8 +170,13 @@ def predict_prob_for_charge(self,
     def predict_and_clip_charges(self, 
         pep_df:pd.DataFrame, 
         charge_prob_cutoff:float,
+        batch_size=1024,
+        verbose=False,
     ):
-        df = self.predict(pep_df.copy())
+        df = self.predict(pep_df.copy(),
+            batch_size=batch_size,
+            verbose=verbose,
+        )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = df.charge_prob.apply(
             lambda x: self.charge_range[x>charge_prob_cutoff]

From edfc759f7e5f516387c96a4a35af7a8ab840743d Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Tue, 30 Jan 2024 00:46:47 +0100
Subject: [PATCH 09/18] batch_size and verbose as properties

---
 peptdeep/model/charge.py | 55 +++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 5912a608..344f56ae 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -28,18 +28,18 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
         self.charge_range = np.arange(
             min_charge, max_charge+1, dtype=np.int8
         )
+        self.predict_batch_size = 1024
+        self.predict_verbose = False
 
     def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
         max_precursor_charge:int,
-        batch_size=1024,
-        verbose=False,
     ):
         df = self.predict(
             pep_df.copy(), 
-            batch_size=batch_size,
-            verbose=verbose,
+            batch_size=self.predict_batch_size,
+            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = [self.charge_range[
@@ -61,14 +61,13 @@ def predict_charges_as_prob(self,
     
     def predict_prob_for_charge(self,
         precursor_df:pd.DataFrame,
-        batch_size=1024,
-        verbose=False,
     ):
         if "charge" not in precursor_df.columns:
             raise KeyError("precursor_df must contain `charge` column")
-        precursor_df = self.predict(precursor_df,
-            batch_size=batch_size,
-            verbose=verbose,
+        precursor_df = self.predict(
+            precursor_df,
+            batch_size=self.predict_batch_size,
+            verbose=self.predict_verbose,
         )
         precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
             lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
@@ -79,12 +78,11 @@ def predict_prob_for_charge(self,
     def predict_and_clip_charges(self, 
         pep_df:pd.DataFrame, 
         charge_prob_cutoff:float,
-        batch_size=1024,
-        verbose=False,
     ):
-        df = self.predict(pep_df.copy(),
-            batch_size=batch_size,
-            verbose=verbose,
+        df = self.predict(
+            pep_df.copy(),
+            batch_size=self.predict_batch_size,
+            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = df.charge_prob.apply(
@@ -120,17 +118,18 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
         self.charge_range = np.arange(
             min_charge, max_charge+1, dtype=np.int8
         )
+        self.predict_batch_size = 1024
+        self.predict_verbose = False
 
     def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
         max_precursor_charge:int,
-        batch_size=1024,
-        verbose=False,
     ):
-        df = self.predict(pep_df.copy(),
-            batch_size=batch_size,
-            verbose=verbose,
+        df = self.predict(
+            pep_df.copy(),
+            batch_size=self.predict_batch_size,
+            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = [self.charge_range[
@@ -152,14 +151,13 @@ def predict_charges_as_prob(self,
     
     def predict_prob_for_charge(self,
         precursor_df:pd.DataFrame,
-        batch_size=1024,
-        verbose=False,
     ):
         if "charge" not in precursor_df.columns:
             raise KeyError("precursor_df must contain `charge` column")
-        precursor_df = self.predict(precursor_df,
-            batch_size=batch_size,
-            verbose=verbose,
+        precursor_df = self.predict(
+            precursor_df,
+            batch_size=self.predict_batch_size,
+            verbose=self.predict_verbose,
         )
         precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
             lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
@@ -170,12 +168,11 @@ def predict_prob_for_charge(self,
     def predict_and_clip_charges(self, 
         pep_df:pd.DataFrame, 
         charge_prob_cutoff:float,
-        batch_size=1024,
-        verbose=False,
     ):
-        df = self.predict(pep_df.copy(),
-            batch_size=batch_size,
-            verbose=verbose,
+        df = self.predict(
+            pep_df.copy(),
+            batch_size=self.predict_batch_size,
+            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = df.charge_prob.apply(

From 8f9039634a9ac1da2931d113af307829adfe3932 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Tue, 30 Jan 2024 01:51:33 +0100
Subject: [PATCH 10/18] predict_mp for charges

---
 nbdev_nbs/model/charge.ipynb             | 218 +++++++++++++++++------
 peptdeep/constants/default_settings.yaml |   1 +
 peptdeep/model/charge.py                 |  20 +--
 peptdeep/model/model_interface.py        |   2 +-
 peptdeep/pretrained_models.py            |   1 +
 peptdeep/protein/fasta.py                |   1 +
 6 files changed, 174 insertions(+), 69 deletions(-)

diff --git a/nbdev_nbs/model/charge.ipynb b/nbdev_nbs/model/charge.ipynb
index 7522bd5c..176de010 100644
--- a/nbdev_nbs/model/charge.ipynb
+++ b/nbdev_nbs/model/charge.ipynb
@@ -22,6 +22,21 @@
       "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
      ]
     },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicting with multiprocessing ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1 [00:00<?, ?it/s]OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n",
+      "100%|██████████| 1/1 [00:03<00:00,  3.26s/it]\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -56,15 +71,23 @@
        "      <td>ABCDE</td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.693502</td>\n",
+       "      <td>0.725907</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>HIJKL</td>\n",
+       "      <td>[0, 0]</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.304970</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>FGHIJK</td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.462107</td>\n",
+       "      <td>0.376047</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -72,7 +95,7 @@
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.544402</td>\n",
+       "      <td>0.611807</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -80,7 +103,7 @@
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.394243</td>\n",
+       "      <td>0.434347</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -89,10 +112,11 @@
       ],
       "text/plain": [
        "  sequence charge_indicators  nAA  charge_prob  charge\n",
-       "0    ABCDE            [1, 0]    5     0.693502       1\n",
-       "2   FGHIJK            [0, 1]    6     0.462107       2\n",
-       "3   LMNOPQ            [1, 1]    6     0.544402       1\n",
-       "4   LMNOPQ            [1, 1]    6     0.394243       2"
+       "0    ABCDE            [1, 0]    5     0.725907       1\n",
+       "1    HIJKL            [0, 0]    5     0.304970       2\n",
+       "2   FGHIJK            [0, 1]    6     0.376047       2\n",
+       "3   LMNOPQ            [1, 1]    6     0.611807       1\n",
+       "4   LMNOPQ            [1, 1]    6     0.434347       2"
       ]
      },
      "execution_count": null,
@@ -119,6 +143,21 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicting with multiprocessing ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1 [00:00<?, ?it/s]OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n",
+      "100%|██████████| 1/1 [00:03<00:00,  3.14s/it]\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -153,7 +192,7 @@
        "      <td>ABCDE</td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.186966</td>\n",
+       "      <td>0.297068</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -161,7 +200,7 @@
        "      <td>HIJKL</td>\n",
        "      <td>[0, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.253555</td>\n",
+       "      <td>0.304970</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -169,7 +208,7 @@
        "      <td>FGHIJK</td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.462107</td>\n",
+       "      <td>0.376047</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -177,7 +216,7 @@
        "      <td>LMNOPQ</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.394243</td>\n",
+       "      <td>0.434347</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -185,7 +224,7 @@
        "      <td>RSTUVWXYZ</td>\n",
        "      <td>[0, 0]</td>\n",
        "      <td>9</td>\n",
-       "      <td>0.129340</td>\n",
+       "      <td>0.206633</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -194,11 +233,11 @@
       ],
       "text/plain": [
        "    sequence charge_indicators  nAA  charge_prob  charge\n",
-       "0      ABCDE            [1, 0]    5     0.186966       2\n",
-       "1      HIJKL            [0, 0]    5     0.253555       2\n",
-       "2     FGHIJK            [0, 1]    6     0.462107       2\n",
-       "3     LMNOPQ            [1, 1]    6     0.394243       2\n",
-       "4  RSTUVWXYZ            [0, 0]    9     0.129340       2"
+       "0      ABCDE            [1, 0]    5     0.297068       2\n",
+       "1      HIJKL            [0, 0]    5     0.304970       2\n",
+       "2     FGHIJK            [0, 1]    6     0.376047       2\n",
+       "3     LMNOPQ            [1, 1]    6     0.434347       2\n",
+       "4  RSTUVWXYZ            [0, 0]    9     0.206633       2"
       ]
      },
      "execution_count": null,
@@ -215,6 +254,21 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicting with multiprocessing ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1 [00:00<?, ?it/s]OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n",
+      "100%|██████████| 1/1 [00:03<00:00,  3.18s/it]\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -250,7 +304,7 @@
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.693502</td>\n",
+       "      <td>0.725907</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -258,7 +312,7 @@
        "      <td>[0, 0]</td>\n",
        "      <td>5</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.196651</td>\n",
+       "      <td>0.131404</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -266,7 +320,7 @@
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.148395</td>\n",
+       "      <td>0.126947</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -274,7 +328,7 @@
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.544402</td>\n",
+       "      <td>0.611807</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -282,7 +336,7 @@
        "      <td>[0, 0]</td>\n",
        "      <td>9</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.132826</td>\n",
+       "      <td>0.162039</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -290,11 +344,11 @@
       ],
       "text/plain": [
        "    sequence charge_indicators  nAA  charge  charge_prob\n",
-       "0      ABCDE            [1, 0]    5       1     0.693502\n",
-       "1      HIJKL            [0, 0]    5       1     0.196651\n",
-       "2     FGHIJK            [0, 1]    6       1     0.148395\n",
-       "3     LMNOPQ            [1, 1]    6       1     0.544402\n",
-       "4  RSTUVWXYZ            [0, 0]    9       1     0.132826"
+       "0      ABCDE            [1, 0]    5       1     0.725907\n",
+       "1      HIJKL            [0, 0]    5       1     0.131404\n",
+       "2     FGHIJK            [0, 1]    6       1     0.126947\n",
+       "3     LMNOPQ            [1, 1]    6       1     0.611807\n",
+       "4  RSTUVWXYZ            [0, 0]    9       1     0.162039"
       ]
      },
      "execution_count": null,
@@ -312,6 +366,21 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicting with multiprocessing ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1 [00:00<?, ?it/s]OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n",
+      "100%|██████████| 1/1 [00:03<00:00,  3.20s/it]\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -350,7 +419,7 @@
        "      <td></td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.794292</td>\n",
+       "      <td>0.733565</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -360,27 +429,37 @@
        "      <td></td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.654079</td>\n",
-       "      <td>2</td>\n",
+       "      <td>0.350178</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
+       "      <td>FGHIJK</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>[0, 1]</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.650386</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
        "      <td>LMNOPQ</td>\n",
        "      <td>Oxidation@M</td>\n",
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.747879</td>\n",
+       "      <td>0.742781</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
+       "      <th>4</th>\n",
        "      <td>LMNOPQ</td>\n",
        "      <td>Oxidation@M</td>\n",
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.691985</td>\n",
+       "      <td>0.617950</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -389,10 +468,11 @@
       ],
       "text/plain": [
        "  sequence         mods mod_sites charge_indicators  nAA  charge_prob  charge\n",
-       "0    ABCDE                                   [1, 0]    5     0.794292       1\n",
-       "1   FGHIJK                                   [0, 1]    6     0.654079       2\n",
-       "2   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.747879       1\n",
-       "3   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.691985       2"
+       "0    ABCDE                                   [1, 0]    5     0.733565       1\n",
+       "1   FGHIJK                                   [0, 1]    6     0.350178       1\n",
+       "2   FGHIJK                                   [0, 1]    6     0.650386       2\n",
+       "3   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.742781       1\n",
+       "4   LMNOPQ  Oxidation@M         2            [1, 1]    6     0.617950       2"
       ]
      },
      "execution_count": null,
@@ -421,6 +501,21 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicting with multiprocessing ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1 [00:00<?, ?it/s]OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n",
+      "100%|██████████| 1/1 [00:03<00:00,  3.23s/it]\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -459,7 +554,7 @@
        "      <td></td>\n",
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
-       "      <td>0.199697</td>\n",
+       "      <td>0.244911</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -469,7 +564,7 @@
        "      <td></td>\n",
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.654079</td>\n",
+       "      <td>0.650386</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -479,7 +574,7 @@
        "      <td>2</td>\n",
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
-       "      <td>0.691985</td>\n",
+       "      <td>0.617950</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -489,7 +584,7 @@
        "      <td>3</td>\n",
        "      <td>[0, 0]</td>\n",
        "      <td>9</td>\n",
-       "      <td>0.282058</td>\n",
+       "      <td>0.228520</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -498,10 +593,10 @@
       ],
       "text/plain": [
        "    sequence         mods mod_sites charge_indicators  nAA  charge_prob  \\\n",
-       "0      ABCDE                                   [1, 0]    5     0.199697   \n",
-       "1     FGHIJK                                   [0, 1]    6     0.654079   \n",
-       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6     0.691985   \n",
-       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9     0.282058   \n",
+       "0      ABCDE                                   [1, 0]    5     0.244911   \n",
+       "1     FGHIJK                                   [0, 1]    6     0.650386   \n",
+       "2     LMNOPQ  Oxidation@M         2            [1, 1]    6     0.617950   \n",
+       "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9     0.228520   \n",
        "\n",
        "   charge  \n",
        "0       2  \n",
@@ -524,6 +619,21 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicting with multiprocessing ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1 [00:00<?, ?it/s]OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n",
+      "100%|██████████| 1/1 [00:03<00:00,  3.35s/it]\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -563,7 +673,7 @@
        "      <td>[1, 0]</td>\n",
        "      <td>5</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.794292</td>\n",
+       "      <td>0.733565</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -573,7 +683,7 @@
        "      <td>[0, 1]</td>\n",
        "      <td>6</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.191645</td>\n",
+       "      <td>0.350178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -583,7 +693,7 @@
        "      <td>[1, 1]</td>\n",
        "      <td>6</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.747879</td>\n",
+       "      <td>0.742781</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -593,7 +703,7 @@
        "      <td>[0, 0]</td>\n",
        "      <td>9</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.188159</td>\n",
+       "      <td>0.294222</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -607,10 +717,10 @@
        "3  RSTUVWXYZ    Phospho@T         3            [0, 0]    9       1   \n",
        "\n",
        "   charge_prob  \n",
-       "0     0.794292  \n",
-       "1     0.191645  \n",
-       "2     0.747879  \n",
-       "3     0.188159  "
+       "0     0.733565  \n",
+       "1     0.350178  \n",
+       "2     0.742781  \n",
+       "3     0.294222  "
       ]
      },
      "execution_count": null,
diff --git a/peptdeep/constants/default_settings.yaml b/peptdeep/constants/default_settings.yaml
index a90c1f10..a7e7845d 100644
--- a/peptdeep/constants/default_settings.yaml
+++ b/peptdeep/constants/default_settings.yaml
@@ -100,6 +100,7 @@ model_mgr:
   predict:
     batch_size_ms2: 512
     batch_size_rt_ccs: 1024
+    batch_size_charge: 1024
     verbose: True
     multiprocessing: True
   transfer:
diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 344f56ae..74e4871a 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -29,17 +29,15 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
             min_charge, max_charge+1, dtype=np.int8
         )
         self.predict_batch_size = 1024
-        self.predict_verbose = False
 
     def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
         max_precursor_charge:int,
     ):
-        df = self.predict(
+        df = self.predict_mp(
             pep_df.copy(), 
             batch_size=self.predict_batch_size,
-            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = [self.charge_range[
@@ -64,10 +62,9 @@ def predict_prob_for_charge(self,
     ):
         if "charge" not in precursor_df.columns:
             raise KeyError("precursor_df must contain `charge` column")
-        precursor_df = self.predict(
+        precursor_df = self.predict_mp(
             precursor_df,
             batch_size=self.predict_batch_size,
-            verbose=self.predict_verbose,
         )
         precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
             lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
@@ -79,10 +76,9 @@ def predict_and_clip_charges(self,
         pep_df:pd.DataFrame, 
         charge_prob_cutoff:float,
     ):
-        df = self.predict(
+        df = self.predict_mp(
             pep_df.copy(),
             batch_size=self.predict_batch_size,
-            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = df.charge_prob.apply(
@@ -119,17 +115,15 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
             min_charge, max_charge+1, dtype=np.int8
         )
         self.predict_batch_size = 1024
-        self.predict_verbose = False
 
     def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
         max_precursor_charge:int,
     ):
-        df = self.predict(
+        df = self.predict_mp(
             pep_df.copy(),
             batch_size=self.predict_batch_size,
-            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = [self.charge_range[
@@ -154,10 +148,9 @@ def predict_prob_for_charge(self,
     ):
         if "charge" not in precursor_df.columns:
             raise KeyError("precursor_df must contain `charge` column")
-        precursor_df = self.predict(
+        precursor_df = self.predict_mp(
             precursor_df,
             batch_size=self.predict_batch_size,
-            verbose=self.predict_verbose,
         )
         precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
             lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
@@ -169,10 +162,9 @@ def predict_and_clip_charges(self,
         pep_df:pd.DataFrame, 
         charge_prob_cutoff:float,
     ):
-        df = self.predict(
+        df = self.predict_mp(
             pep_df.copy(),
             batch_size=self.predict_batch_size,
-            verbose=self.predict_verbose,
         )
         df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
         df["charge"] = df.charge_prob.apply(
diff --git a/peptdeep/model/model_interface.py b/peptdeep/model/model_interface.py
index 9d88bc7d..f8f65b13 100644
--- a/peptdeep/model/model_interface.py
+++ b/peptdeep/model/model_interface.py
@@ -421,7 +421,7 @@ def predict_mp(self,
             return self.predict(
                 precursor_df, 
                 batch_size=batch_size,
-                verbose=False,
+                verbose=True,
                 **kwargs
             )
             
diff --git a/peptdeep/pretrained_models.py b/peptdeep/pretrained_models.py
index d119362a..cb733d54 100644
--- a/peptdeep/pretrained_models.py
+++ b/peptdeep/pretrained_models.py
@@ -317,6 +317,7 @@ def reset_by_global_settings(self,
             else:
                 self.charge_model = ChargeModelForAASeq()
             self.charge_model.load(mgr_settings['charge_model_file'])
+            self.charge_model.predict_batch_size = mgr_settings['predict']['batch_size_charge']
         self.charge_prob_cutoff = mgr_settings['charge_prob_cutoff']
         self.use_predicted_charge_in_speclib = mgr_settings['use_predicted_charge_in_speclib']
 
diff --git a/peptdeep/protein/fasta.py b/peptdeep/protein/fasta.py
index eaefd0c9..b2a9ba90 100644
--- a/peptdeep/protein/fasta.py
+++ b/peptdeep/protein/fasta.py
@@ -177,6 +177,7 @@ def add_charge(self):
         if self.model_manager.charge_model is None:
             super().add_charge()
         else:
+            print("Predicting charge states ...")
             if self.model_manager.use_predicted_charge_in_speclib:
                 self._precursor_df = self.model_manager.charge_model.predict_and_clip_charges(
                     self.precursor_df, 

From 3e868d49313af59d2bd0786039a92cdfce0c6f1b Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Tue, 30 Jan 2024 10:17:05 +0100
Subject: [PATCH 11/18] _ChargeModelInterface for common methods

---
 peptdeep/model/charge.py | 113 ++++++++++-----------------------------
 1 file changed, 27 insertions(+), 86 deletions(-)

diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 74e4871a..429a2e95 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -7,29 +7,10 @@
     ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
     Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
 )
-    
-class ChargeModelForModAASeq(
-    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
-):
-    """
-    ModelInterface for charge prediction for modified peptides
-    """
-    def __init__(self, min_charge:int=1, max_charge:int=6):
-        super().__init__(
-            num_target_values=max_charge-min_charge+1,
-            model_class=Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
-            nlayers=4, hidden_dim=128, dropout=0.1
-        )
-
-        self.target_column_to_predict = "charge_probs"
-        self.target_column_to_train = "charge_indicators"
-        self.min_predict_charge = min_charge
-        self.max_predict_charge = max_charge
-        self.charge_range = np.arange(
-            min_charge, max_charge+1, dtype=np.int8
-        )
-        self.predict_batch_size = 1024
 
+class _ChargeModelInterface:
+    def __init__(self, *args, **kwargs):
+        raise TypeError("Interface class cannot be instantiated.")
     def predict_charges_as_prob(self,
         pep_df:pd.DataFrame, 
         min_precursor_charge:int,
@@ -93,9 +74,33 @@ def predict_and_clip_charges(self,
         df["charge"] = df.charge.astype(np.int8)
         df["charge_prob"] = df.charge_prob.astype(np.float32)
         return df
+    
+class ChargeModelForModAASeq(
+    ModelInterface_for_Generic_ModAASeq_MultiLabelClassification,
+    _ChargeModelInterface
+):
+    """
+    ModelInterface for charge prediction for modified peptides
+    """
+    def __init__(self, min_charge:int=1, max_charge:int=6):
+        super().__init__(
+            num_target_values=max_charge-min_charge+1,
+            model_class=Model_for_Generic_ModAASeq_BinaryClassification_Transformer,
+            nlayers=4, hidden_dim=128, dropout=0.1
+        )
+
+        self.target_column_to_predict = "charge_probs"
+        self.target_column_to_train = "charge_indicators"
+        self.min_predict_charge = min_charge
+        self.max_predict_charge = max_charge
+        self.charge_range = np.arange(
+            min_charge, max_charge+1, dtype=np.int8
+        )
+        self.predict_batch_size = 1024
 
 class ChargeModelForAASeq(
     ModelInterface_for_Generic_AASeq_MultiLabelClassification,
+    _ChargeModelInterface
 ):
     """
     ModelInterface for charge prediction for amino acid sequence
@@ -116,70 +121,6 @@ def __init__(self, min_charge:int=1, max_charge:int=6):
         )
         self.predict_batch_size = 1024
 
-    def predict_charges_as_prob(self,
-        pep_df:pd.DataFrame, 
-        min_precursor_charge:int,
-        max_precursor_charge:int,
-    ):
-        df = self.predict_mp(
-            pep_df.copy(),
-            batch_size=self.predict_batch_size,
-        )
-        df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
-        df["charge"] = [self.charge_range[
-            min_precursor_charge-self.min_predict_charge:
-            max_precursor_charge-self.min_predict_charge+1
-        ]]*len(df)
-        df["charge_prob"] = df.charge_prob.apply(
-            lambda x: x[
-                min_precursor_charge-self.min_predict_charge:
-                max_precursor_charge-self.min_predict_charge+1
-            ]
-        )
-        df = df.explode(
-            ["charge","charge_prob"], ignore_index=True
-        ).dropna(subset=["charge"])
-        df["charge"] = df.charge.astype(np.int8)
-        df["charge_prob"] = df.charge_prob.astype(np.float32)
-        return df
-    
-    def predict_prob_for_charge(self,
-        precursor_df:pd.DataFrame,
-    ):
-        if "charge" not in precursor_df.columns:
-            raise KeyError("precursor_df must contain `charge` column")
-        precursor_df = self.predict_mp(
-            precursor_df,
-            batch_size=self.predict_batch_size,
-        )
-        precursor_df["charge_prob"] = precursor_df[["charge_probs","charge"]].apply(
-            lambda x: x.iloc[0][x.iloc[1]-self.min_predict_charge], axis=1
-        ).astype(np.float32)
-        precursor_df.drop(columns="charge_probs", inplace=True)
-        return precursor_df
-
-    def predict_and_clip_charges(self, 
-        pep_df:pd.DataFrame, 
-        charge_prob_cutoff:float,
-    ):
-        df = self.predict_mp(
-            pep_df.copy(),
-            batch_size=self.predict_batch_size,
-        )
-        df.rename(columns={"charge_probs":"charge_prob"}, inplace=True)
-        df["charge"] = df.charge_prob.apply(
-            lambda x: self.charge_range[x>charge_prob_cutoff]
-        )
-        df["charge_prob"] = df.charge_prob.apply(
-            lambda x: x[x>charge_prob_cutoff]
-        )
-        df = df.explode(
-            ["charge","charge_prob"], ignore_index=True
-        ).dropna(subset=["charge"])
-        df["charge"] = df.charge.astype(np.int8)
-        df["charge_prob"] = df.charge_prob.astype(np.float32)
-        return df
-
 def group_psm_df_by_sequence(
     psm_df: pd.DataFrame,
     min_charge:int,

From b1b673ce93cce223bb059ba6743ccad26a636afe Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Wed, 31 Jan 2024 10:42:03 +0100
Subject: [PATCH 12/18] CHROE: logging for MS files

---
 peptdeep/pipeline_api.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/peptdeep/pipeline_api.py b/peptdeep/pipeline_api.py
index db91d9b4..d6849975 100644
--- a/peptdeep/pipeline_api.py
+++ b/peptdeep/pipeline_api.py
@@ -192,6 +192,15 @@ def match_psms()->Tuple[
         ms2_file_list
     )
 
+    logging.info(
+        f"{len(ms2_file_dict)} MS files for fragment extraction: \n" +
+        "\n".join([
+            f"  - {raw_name} : {_path}" for raw_name, _path 
+            in ms2_file_dict.items()
+        ])
+        + "\n"
+    )
+
     psm_df = psm_df[
         psm_df.raw_name.isin(ms2_file_dict)
     ].reset_index(drop=True)

From 00815b3b0585e7f2e671d74d1bd191c3d6095749 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Wed, 31 Jan 2024 13:49:03 +0100
Subject: [PATCH 13/18] CHROE: logging charge pred

---
 peptdeep/protein/fasta.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/peptdeep/protein/fasta.py b/peptdeep/protein/fasta.py
index b2a9ba90..7e849ef9 100644
--- a/peptdeep/protein/fasta.py
+++ b/peptdeep/protein/fasta.py
@@ -177,7 +177,7 @@ def add_charge(self):
         if self.model_manager.charge_model is None:
             super().add_charge()
         else:
-            print("Predicting charge states ...")
+            print(f"Predicting charge states for {len(self.precursor_df)} peptides ...")
             if self.model_manager.use_predicted_charge_in_speclib:
                 self._precursor_df = self.model_manager.charge_model.predict_and_clip_charges(
                     self.precursor_df, 

From c697a69f45c2ad10cd8edaa2360d453b5925c032 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Wed, 31 Jan 2024 20:22:40 +0100
Subject: [PATCH 14/18] min/max charge for charge pred in speclib

---
 nbdev_nbs/model/charge.ipynb | 14 ++++++++++++--
 peptdeep/model/charge.py     |  7 ++++++-
 peptdeep/protein/fasta.py    |  2 ++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/nbdev_nbs/model/charge.ipynb b/nbdev_nbs/model/charge.ipynb
index 176de010..6f9fc898 100644
--- a/nbdev_nbs/model/charge.ipynb
+++ b/nbdev_nbs/model/charge.ipynb
@@ -135,7 +135,12 @@
     "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0],[0,0]],\n",
     "})\n",
     "model.train(seq_df)\n",
-    "model.predict_and_clip_charges(seq_df, charge_prob_cutoff=0.3)"
+    "model.predict_and_clip_charges(\n",
+    "    seq_df, \n",
+    "    min_precursor_charge=model.min_predict_charge,\n",
+    "    max_precursor_charge=model.max_predict_charge,\n",
+    "    charge_prob_cutoff=0.3\n",
+    ")"
    ]
   },
   {
@@ -493,7 +498,12 @@
     "    'charge_indicators': [[1,0],[0,1],[1,1],[0,0]],\n",
     "})\n",
     "model.train(modseq_df)\n",
-    "model.predict_and_clip_charges(modseq_df, charge_prob_cutoff=0.3)"
+    "model.predict_and_clip_charges(\n",
+    "    modseq_df, \n",
+    "    min_precursor_charge=model.min_predict_charge,\n",
+    "    max_precursor_charge=model.max_predict_charge,\n",
+    "    charge_prob_cutoff=0.3\n",
+    ")"
    ]
   },
   {
diff --git a/peptdeep/model/charge.py b/peptdeep/model/charge.py
index 429a2e95..ba459c73 100644
--- a/peptdeep/model/charge.py
+++ b/peptdeep/model/charge.py
@@ -55,6 +55,8 @@ def predict_prob_for_charge(self,
 
     def predict_and_clip_charges(self, 
         pep_df:pd.DataFrame, 
+        min_precursor_charge:int,
+        max_precursor_charge:int,
         charge_prob_cutoff:float,
     ):
         df = self.predict_mp(
@@ -69,9 +71,12 @@ def predict_and_clip_charges(self,
             lambda x: x[x>charge_prob_cutoff]
         )
         df = df.explode(
-            ["charge","charge_prob"], ignore_index=True
+            ["charge","charge_prob"]
         ).dropna(subset=["charge"])
         df["charge"] = df.charge.astype(np.int8)
+        df = df.query(
+            f"charge>={min_precursor_charge} and charge<={max_precursor_charge}"
+        ).reset_index(drop=True)
         df["charge_prob"] = df.charge_prob.astype(np.float32)
         return df
     
diff --git a/peptdeep/protein/fasta.py b/peptdeep/protein/fasta.py
index 7e849ef9..49ccd270 100644
--- a/peptdeep/protein/fasta.py
+++ b/peptdeep/protein/fasta.py
@@ -181,6 +181,8 @@ def add_charge(self):
             if self.model_manager.use_predicted_charge_in_speclib:
                 self._precursor_df = self.model_manager.charge_model.predict_and_clip_charges(
                     self.precursor_df, 
+                    min_precursor_charge=self.min_precursor_charge,
+                    max_precursor_charge=self.max_precursor_charge,
                     charge_prob_cutoff=self.model_manager.charge_prob_cutoff
                 )
             else:

From 61b6a3fd751fa791a3febca0d8adca94bfb02605 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Wed, 31 Jan 2024 22:17:53 +0100
Subject: [PATCH 15/18] irt_pep_df for irt translation

---
 peptdeep/model/rt.py             | 21 +++++++++++++--------
 peptdeep/spec_lib/predict_lib.py |  6 ++++--
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/peptdeep/model/rt.py b/peptdeep/model/rt.py
index 9254a014..c863b125 100644
--- a/peptdeep/model/rt.py
+++ b/peptdeep/model/rt.py
@@ -162,18 +162,23 @@ def _get_features_from_batch_df(self,
         )
 
     def add_irt_column_to_precursor_df(self,
-        precursor_df: pd.DataFrame
+        precursor_df: pd.DataFrame,
+        irt_pep_df:pd.DataFrame = None,
     ):
-        print(f"Predict RT for {len(IRT_PEPTIDE_DF)} iRT precursors.")
-        self.predict(IRT_PEPTIDE_DF)
-        eval_df = evaluate_linear_regression(IRT_PEPTIDE_DF, "rt_pred", y="irt")
+        if irt_pep_df is None:
+            irt_pep_df = IRT_PEPTIDE_DF
+        print(f"Predict RT for {len(irt_pep_df)} iRT precursors.")
+        self.predict(irt_pep_df)
+        if "irt" not in irt_pep_df.columns:
+            irt_pep_df["irt"] = irt_pep_df["rt"]
+        eval_df = evaluate_linear_regression(irt_pep_df, "rt_pred", y="irt")
         print("Linear regression of `rt_pred` to `irt`:")
         print(eval_df)
         # simple linear regression
-        # rt_pred_mean = IRT_PEPTIDE_DF.rt_pred.mean()
-        # irt_mean = IRT_PEPTIDE_DF.irt.mean()
-        # x = IRT_PEPTIDE_DF.rt_pred.values - rt_pred_mean
-        # y = IRT_PEPTIDE_DF.irt.values - irt_mean
+        # rt_pred_mean = irt_pep_df.rt_pred.mean()
+        # irt_mean = irt_pep_df.irt.mean()
+        # x = irt_pep_df.rt_pred.values - rt_pred_mean
+        # y = irt_pep_df.irt.values - irt_mean
         # slope = np.sum(x*y)/np.sum(x*x)
         # intercept = irt_mean - slope*rt_pred_mean
         # end linear regression
diff --git a/peptdeep/spec_lib/predict_lib.py b/peptdeep/spec_lib/predict_lib.py
index e43068ee..b136890e 100644
--- a/peptdeep/spec_lib/predict_lib.py
+++ b/peptdeep/spec_lib/predict_lib.py
@@ -90,9 +90,11 @@ def set_precursor_and_fragment(self,
             if col not in self.charged_frag_types
         ], inplace=True)
 
-    def translate_rt_to_irt_pred(self):
+    def translate_rt_to_irt_pred(self, irt_pep_df:pd.DataFrame = None):
         """ Add 'irt_pred' into columns based on 'rt_pred' """
-        return self.model_manager.rt_model.add_irt_column_to_precursor_df(self._precursor_df)
+        return self.model_manager.rt_model.add_irt_column_to_precursor_df(
+            self._precursor_df, irt_pep_df=irt_pep_df
+        )
 
     def predict_all(self, 
         min_required_precursor_num_for_mp:int=2000,

From 17a2ad7d57e470d38e2616689965fce4aba68707 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Thu, 22 Feb 2024 22:19:13 +0100
Subject: [PATCH 16/18] FIX match_psms()

---
 peptdeep/pipeline_api.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/peptdeep/pipeline_api.py b/peptdeep/pipeline_api.py
index d6849975..46e50889 100644
--- a/peptdeep/pipeline_api.py
+++ b/peptdeep/pipeline_api.py
@@ -135,8 +135,9 @@ def match_psms()->Tuple[
 
     Returns
     -------
-    Tuple[pd.DataFrame,pd.DataFrame]
-        pd.DataFrame: the PSM DataFrame, and
+    Tuple[pd.DataFrame,pd.DataFrame,pd.DataFrame]
+        pd.DataFrame: the PSM DataFrame
+        pd.DataFrame: the fragment mz DataFrame
         pd.DataFrame: the matched fragment intensity DataFrame
     """
     mgr_settings = global_settings['model_mgr']
@@ -325,7 +326,7 @@ def transfer_learn(verbose=True):
                 dfs, frag_inten_dfs
             )
         elif len(mgr_settings['transfer']['ms_files'])>0:
-            psm_df, frag_df = match_psms()
+            psm_df, _, frag_df = match_psms()
         else:
             psm_df = import_psm_df(
                 mgr_settings['transfer']['psm_files'],

From 516ffe99016985dcaec1e5c8985cb367d3162a37 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Thu, 22 Feb 2024 22:19:27 +0100
Subject: [PATCH 17/18] =?UTF-8?q?Bump=20version:=201.1.5=20=E2=86=92=201.1?=
 =?UTF-8?q?.6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg                                          | 2 +-
 docs/conf.py                                              | 2 +-
 peptdeep/__init__.py                                      | 2 +-
 release/one_click_linux_gui/control                       | 2 +-
 release/one_click_linux_gui/create_installer_linux.sh     | 2 +-
 release/one_click_macos_gui/Info.plist                    | 4 ++--
 release/one_click_macos_gui/create_installer_macos.sh     | 4 ++--
 release/one_click_macos_gui/distribution.xml              | 2 +-
 release/one_click_windows_gui/create_installer_windows.sh | 2 +-
 release/one_click_windows_gui/peptdeep_innoinstaller.iss  | 2 +-
 settings.ini                                              | 2 +-
 11 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 7060a748..fe93a21e 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.1.5
+current_version = 1.1.6
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
diff --git a/docs/conf.py b/docs/conf.py
index e70be6ba..8b82fc4b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -23,7 +23,7 @@
 copyright = '2022, Mann Labs, MPIB'
 author = 'Mann Labs, MPIB'
 
-release = "1.1.5"
+release = "1.1.6"
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/peptdeep/__init__.py b/peptdeep/__init__.py
index 659c11c4..61c5edb3 100644
--- a/peptdeep/__init__.py
+++ b/peptdeep/__init__.py
@@ -11,7 +11,7 @@
 #     pass
 
 __project__ = "peptdeep"
-__version__ = "1.1.5"
+__version__ = "1.1.6"
 __license__ = "Apache 2.0"
 __description__ = "The AlphaX deep learning framework for Proteomics"
 __author__ = "Mann Labs"
diff --git a/release/one_click_linux_gui/control b/release/one_click_linux_gui/control
index 808fbc4f..16d6fa36 100644
--- a/release/one_click_linux_gui/control
+++ b/release/one_click_linux_gui/control
@@ -1,5 +1,5 @@
 Package: peptdeep
-Version: 1.1.5
+Version: 1.1.6
 Architecture: all
 Maintainer: Mann Labs <opensource@alphapept.com>
 Description: peptdeep
diff --git a/release/one_click_linux_gui/create_installer_linux.sh b/release/one_click_linux_gui/create_installer_linux.sh
index 12048b49..e8820f69 100644
--- a/release/one_click_linux_gui/create_installer_linux.sh
+++ b/release/one_click_linux_gui/create_installer_linux.sh
@@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
 # Setting up the local package
 cd release/one_click_linux_gui
 # Make sure you include the required extra packages and always use the stable or very-stable options!
-pip install "../../dist/peptdeep-1.1.5-py3-none-any.whl[stable]"
+pip install "../../dist/peptdeep-1.1.6-py3-none-any.whl[stable]"
 
 if [ "$1" == "CPU" ]; then
     pip install torch -U --extra-index-url https://download.pytorch.org/whl/cpu
diff --git a/release/one_click_macos_gui/Info.plist b/release/one_click_macos_gui/Info.plist
index 12f67d9b..fd3a740b 100644
--- a/release/one_click_macos_gui/Info.plist
+++ b/release/one_click_macos_gui/Info.plist
@@ -9,9 +9,9 @@
 	<key>CFBundleIconFile</key>
 	<string>alpha_logo.icns</string>
 	<key>CFBundleIdentifier</key>
-	<string>peptdeep.1.1.5</string>
+	<string>peptdeep.1.1.6</string>
 	<key>CFBundleShortVersionString</key>
-	<string>1.1.5</string>
+	<string>1.1.6</string>
 	<key>CFBundleInfoDictionaryVersion</key>
 	<string>6.0</string>
 	<key>CFBundleName</key>
diff --git a/release/one_click_macos_gui/create_installer_macos.sh b/release/one_click_macos_gui/create_installer_macos.sh
index 5792fef5..42fcd8a0 100644
--- a/release/one_click_macos_gui/create_installer_macos.sh
+++ b/release/one_click_macos_gui/create_installer_macos.sh
@@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel
 
 # Setting up the local package
 cd release/one_click_macos_gui
-pip install "../../dist/peptdeep-1.1.5-py3-none-any.whl[stable]"
+pip install "../../dist/peptdeep-1.1.6-py3-none-any.whl[stable]"
 
 # Creating the stand-alone pyinstaller folder
 pip install pyinstaller
@@ -40,5 +40,5 @@ cp ../../LICENSE.txt Resources/LICENSE.txt
 cp ../logos/alpha_logo.png Resources/alpha_logo.png
 chmod 777 scripts/*
 
-pkgbuild --root dist/peptdeep --identifier de.mpg.biochem.peptdeep.app --version 1.1.5 --install-location /Applications/peptdeep.app --scripts scripts peptdeep.pkg
+pkgbuild --root dist/peptdeep --identifier de.mpg.biochem.peptdeep.app --version 1.1.6 --install-location /Applications/peptdeep.app --scripts scripts peptdeep.pkg
 productbuild --distribution distribution.xml --resources Resources --package-path peptdeep.pkg dist/peptdeep_gui_installer_macos.pkg
diff --git a/release/one_click_macos_gui/distribution.xml b/release/one_click_macos_gui/distribution.xml
index acfb6db2..a65bc9b1 100644
--- a/release/one_click_macos_gui/distribution.xml
+++ b/release/one_click_macos_gui/distribution.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8" standalone="no"?>
 <installer-script minSpecVersion="1.000000">
-    <title>peptdeep 1.1.5</title>
+    <title>peptdeep 1.1.6</title>
     <background mime-type="image/png" file="alpha_logo.png" scaling="proportional"/>
     <welcome file="welcome.html" mime-type="text/html" />
     <conclusion file="conclusion.html" mime-type="text/html" />
diff --git a/release/one_click_windows_gui/create_installer_windows.sh b/release/one_click_windows_gui/create_installer_windows.sh
index 31658a88..f6675c8d 100644
--- a/release/one_click_windows_gui/create_installer_windows.sh
+++ b/release/one_click_windows_gui/create_installer_windows.sh
@@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
 # Setting up the local package
 cd release/one_click_windows_gui
 # Make sure you include the required extra packages and always use the stable or very-stable options!
-pip install "../../dist/peptdeep-1.1.5-py3-none-any.whl[stable]"
+pip install "../../dist/peptdeep-1.1.6-py3-none-any.whl[stable]"
 
 # Creating the stand-alone pyinstaller folder
 pip install pyinstaller
diff --git a/release/one_click_windows_gui/peptdeep_innoinstaller.iss b/release/one_click_windows_gui/peptdeep_innoinstaller.iss
index 8e7f2e3d..7be43673 100644
--- a/release/one_click_windows_gui/peptdeep_innoinstaller.iss
+++ b/release/one_click_windows_gui/peptdeep_innoinstaller.iss
@@ -2,7 +2,7 @@
 ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 
 #define MyAppName "peptdeep"
-#define MyAppVersion "1.1.5"
+#define MyAppVersion "1.1.6"
 #define MyAppPublisher "Max Planck Institute of Biochemistry and the University of Copenhagen, Mann Labs"
 #define MyAppURL "https://github.com/MannLabs/peptdeep"
 #define MyAppExeName "peptdeep_gui.exe"
diff --git a/settings.ini b/settings.ini
index b46b3b6e..51805e24 100644
--- a/settings.ini
+++ b/settings.ini
@@ -5,7 +5,7 @@
 ### Python library ###
 repo = alphapeptdeep
 lib_name = peptdeep
-version = 1.1.5
+version = 1.1.6
 min_python = 3.7
 license = apache2
 

From 439cee2624ff541afc1c2ac600897464c4295a8b Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Thu, 22 Feb 2024 22:27:50 +0100
Subject: [PATCH 18/18] FIX #138

---
 peptdeep/pipeline_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/peptdeep/pipeline_api.py b/peptdeep/pipeline_api.py
index 46e50889..4e500afc 100644
--- a/peptdeep/pipeline_api.py
+++ b/peptdeep/pipeline_api.py
@@ -326,7 +326,7 @@ def transfer_learn(verbose=True):
                 dfs, frag_inten_dfs
             )
         elif len(mgr_settings['transfer']['ms_files'])>0:
-            psm_df, _, frag_df = match_psms()
+            psm_df, frag_mz_df, frag_df = match_psms()
         else:
             psm_df = import_psm_df(
                 mgr_settings['transfer']['psm_files'],