Skip to content

Commit

Permalink
Merge pull request #181 from MannLabs/179-peptdeep-hla
Browse files Browse the repository at this point in the history
#179 ADD HLA module
  • Loading branch information
jalew188 authored Jul 3, 2024
2 parents 8c65e6e + 57d2aef commit 7fed088
Show file tree
Hide file tree
Showing 7 changed files with 923 additions and 8 deletions.
329 changes: 329 additions & 0 deletions nbs_tests/hla/hla_class1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install -q pydivsufsort"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
]
}
],
"source": [
"from peptdeep.hla.hla_class1 import HLA1_Binding_Classifier\n",
"\n",
"model = HLA1_Binding_Classifier()\n",
"model.load_pretrained_hla_model()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"prot1 = 'MABCDEKFGHIJKLMNOPQRST'\n",
"prot2 = 'FGHIJKLMNOPQR'\n",
"protein_dict = {\n",
" 'xx': {\n",
" 'protein_id': 'xx',\n",
" 'gene_name': '',\n",
" 'sequence': prot1\n",
" },\n",
" 'yy': {\n",
" 'protein_id': 'yy',\n",
" 'gene_name': 'gene',\n",
" 'sequence': prot2\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 14.32it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>start_pos</th>\n",
" <th>end_pos</th>\n",
" <th>nAA</th>\n",
" <th>HLA_prob_pred</th>\n",
" <th>sequence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>0.124847</td>\n",
" <td>MABCDEKF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14</td>\n",
" <td>22</td>\n",
" <td>8</td>\n",
" <td>0.040122</td>\n",
" <td>LMNOPQRS</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>0.674667</td>\n",
" <td>KLMNOPQR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12</td>\n",
" <td>20</td>\n",
" <td>8</td>\n",
" <td>0.119722</td>\n",
" <td>JKLMNOPQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11</td>\n",
" <td>19</td>\n",
" <td>8</td>\n",
" <td>0.104152</td>\n",
" <td>IJKLMNOP</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>5</td>\n",
" <td>19</td>\n",
" <td>14</td>\n",
" <td>0.163758</td>\n",
" <td>DEKFGHIJKLMNOP</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>4</td>\n",
" <td>18</td>\n",
" <td>14</td>\n",
" <td>0.000618</td>\n",
" <td>CDEKFGHIJKLMNO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>3</td>\n",
" <td>17</td>\n",
" <td>14</td>\n",
" <td>0.000773</td>\n",
" <td>BCDEKFGHIJKLMN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>9</td>\n",
" <td>23</td>\n",
" <td>14</td>\n",
" <td>0.525840</td>\n",
" <td>GHIJKLMNOPQRST</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>6</td>\n",
" <td>20</td>\n",
" <td>14</td>\n",
" <td>0.156962</td>\n",
" <td>EKFGHIJKLMNOPQ</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>84 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" start_pos end_pos nAA HLA_prob_pred sequence\n",
"0 1 9 8 0.124847 MABCDEKF\n",
"1 14 22 8 0.040122 LMNOPQRS\n",
"2 13 21 8 0.674667 KLMNOPQR\n",
"3 12 20 8 0.119722 JKLMNOPQ\n",
"4 11 19 8 0.104152 IJKLMNOP\n",
".. ... ... ... ... ...\n",
"79 5 19 14 0.163758 DEKFGHIJKLMNOP\n",
"80 4 18 14 0.000618 CDEKFGHIJKLMNO\n",
"81 3 17 14 0.000773 BCDEKFGHIJKLMN\n",
"82 9 23 14 0.525840 GHIJKLMNOPQRST\n",
"83 6 20 14 0.156962 EKFGHIJKLMNOPQ\n",
"\n",
"[84 rows x 5 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict_from_proteins(protein_data=protein_dict, prob_threshold=0.0)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sequence</th>\n",
" <th>nAA</th>\n",
" <th>HLA_prob_pred</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>MABCDEKF</td>\n",
" <td>8</td>\n",
" <td>0.124847</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>KLMNOPQR</td>\n",
" <td>8</td>\n",
" <td>0.674667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>DEKFGHIJKLMNOP</td>\n",
" <td>14</td>\n",
" <td>0.163758</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sequence nAA HLA_prob_pred\n",
"0 MABCDEKF 8 0.124847\n",
"1 KLMNOPQR 8 0.674667\n",
"2 DEKFGHIJKLMNOP 14 0.163758"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"peptide_df = pd.DataFrame({\n",
" \"sequence\": [\"MABCDEKF\",\"KLMNOPQR\",\"DEKFGHIJKLMNOP\"]\n",
"})\n",
"model.predict_peptide_df_(peptide_df=peptide_df)\n",
"peptide_df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions peptdeep/constants/default_settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ model:
PEPTDEEP_HOME: "~/peptdeep" # ~ refers to user folder (e.g. C:/Users/username)

local_model_zip_name: "pretrained_models.zip"
local_hla_model_zip_name: "hla_model.zip"

# overwritable config
model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/pretrained_models.zip"
hla_model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/hla_model.zip"

task_workflow: [library]
task_choices:
Expand Down
Empty file added peptdeep/hla/__init__.py
Empty file.
Loading

0 comments on commit 7fed088

Please sign in to comment.