-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #181 from MannLabs/179-peptdeep-hla
#179 ADD HLA module
- Loading branch information
Showing
7 changed files
with
923 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,329 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%reload_ext autoreload\n", | ||
"%autoreload 2" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Note: you may need to restart the kernel to use updated packages.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%pip install -q pydivsufsort" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from peptdeep.hla.hla_class1 import HLA1_Binding_Classifier\n", | ||
"\n", | ||
"model = HLA1_Binding_Classifier()\n", | ||
"model.load_pretrained_hla_model()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"prot1 = 'MABCDEKFGHIJKLMNOPQRST'\n", | ||
"prot2 = 'FGHIJKLMNOPQR'\n", | ||
"protein_dict = {\n", | ||
" 'xx': {\n", | ||
" 'protein_id': 'xx',\n", | ||
" 'gene_name': '',\n", | ||
" 'sequence': prot1\n", | ||
" },\n", | ||
" 'yy': {\n", | ||
" 'protein_id': 'yy',\n", | ||
" 'gene_name': 'gene',\n", | ||
" 'sequence': prot2\n", | ||
" }\n", | ||
"}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"100%|██████████| 1/1 [00:00<00:00, 14.32it/s]\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>start_pos</th>\n", | ||
" <th>end_pos</th>\n", | ||
" <th>nAA</th>\n", | ||
" <th>HLA_prob_pred</th>\n", | ||
" <th>sequence</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>1</td>\n", | ||
" <td>9</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0.124847</td>\n", | ||
" <td>MABCDEKF</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>14</td>\n", | ||
" <td>22</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0.040122</td>\n", | ||
" <td>LMNOPQRS</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>13</td>\n", | ||
" <td>21</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0.674667</td>\n", | ||
" <td>KLMNOPQR</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>12</td>\n", | ||
" <td>20</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0.119722</td>\n", | ||
" <td>JKLMNOPQ</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>11</td>\n", | ||
" <td>19</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0.104152</td>\n", | ||
" <td>IJKLMNOP</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>...</th>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>79</th>\n", | ||
" <td>5</td>\n", | ||
" <td>19</td>\n", | ||
" <td>14</td>\n", | ||
" <td>0.163758</td>\n", | ||
" <td>DEKFGHIJKLMNOP</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>80</th>\n", | ||
" <td>4</td>\n", | ||
" <td>18</td>\n", | ||
" <td>14</td>\n", | ||
" <td>0.000618</td>\n", | ||
" <td>CDEKFGHIJKLMNO</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>81</th>\n", | ||
" <td>3</td>\n", | ||
" <td>17</td>\n", | ||
" <td>14</td>\n", | ||
" <td>0.000773</td>\n", | ||
" <td>BCDEKFGHIJKLMN</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>82</th>\n", | ||
" <td>9</td>\n", | ||
" <td>23</td>\n", | ||
" <td>14</td>\n", | ||
" <td>0.525840</td>\n", | ||
" <td>GHIJKLMNOPQRST</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>83</th>\n", | ||
" <td>6</td>\n", | ||
" <td>20</td>\n", | ||
" <td>14</td>\n", | ||
" <td>0.156962</td>\n", | ||
" <td>EKFGHIJKLMNOPQ</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"<p>84 rows × 5 columns</p>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" start_pos end_pos nAA HLA_prob_pred sequence\n", | ||
"0 1 9 8 0.124847 MABCDEKF\n", | ||
"1 14 22 8 0.040122 LMNOPQRS\n", | ||
"2 13 21 8 0.674667 KLMNOPQR\n", | ||
"3 12 20 8 0.119722 JKLMNOPQ\n", | ||
"4 11 19 8 0.104152 IJKLMNOP\n", | ||
".. ... ... ... ... ...\n", | ||
"79 5 19 14 0.163758 DEKFGHIJKLMNOP\n", | ||
"80 4 18 14 0.000618 CDEKFGHIJKLMNO\n", | ||
"81 3 17 14 0.000773 BCDEKFGHIJKLMN\n", | ||
"82 9 23 14 0.525840 GHIJKLMNOPQRST\n", | ||
"83 6 20 14 0.156962 EKFGHIJKLMNOPQ\n", | ||
"\n", | ||
"[84 rows x 5 columns]" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"model.predict_from_proteins(protein_data=protein_dict, prob_threshold=0.0)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>sequence</th>\n", | ||
" <th>nAA</th>\n", | ||
" <th>HLA_prob_pred</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>MABCDEKF</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0.124847</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>KLMNOPQR</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0.674667</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>DEKFGHIJKLMNOP</td>\n", | ||
" <td>14</td>\n", | ||
" <td>0.163758</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" sequence nAA HLA_prob_pred\n", | ||
"0 MABCDEKF 8 0.124847\n", | ||
"1 KLMNOPQR 8 0.674667\n", | ||
"2 DEKFGHIJKLMNOP 14 0.163758" | ||
] | ||
}, | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"peptide_df = pd.DataFrame({\n", | ||
" \"sequence\": [\"MABCDEKF\",\"KLMNOPQR\",\"DEKFGHIJKLMNOP\"]\n", | ||
"})\n", | ||
"model.predict_peptide_df_(peptide_df=peptide_df)\n", | ||
"peptide_df" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "base", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.