Skip to content

Commit

Permalink
remove settings.OPENPREDICT_DATA_DIR to use get_openpredict_dir() fct
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Jul 25, 2023
1 parent 9684b6c commit 98de256
Show file tree
Hide file tree
Showing 15 changed files with 305 additions and 55 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/publish-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ name: Publish package
# Publish to PyPI when new release on GitHub, if tests pass
on:
workflow_dispatch:
release:
types: [created]
# release:
# types: [created]

jobs:

Expand Down
2 changes: 1 addition & 1 deletion src/drkg_model/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import os

from trapi_predict_kit.utils import get_openpredict_dir
from openpredict_model.utils import get_openpredict_dir


# Downloading 500M kgpredict external dependency to avoid to have to commit it to dvc
Expand Down
3 changes: 2 additions & 1 deletion src/drkg_model/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import pandas as pd
import torch as th
import torch.nn.functional as fn
from trapi_predict_kit.utils import get_entities_labels

from drkg_model.download import download
from trapi_predict_kit.utils import get_entities_labels, get_openpredict_dir
from openpredict_model.utils import get_openpredict_dir

# Predict drug repurposing based on the DRKG (drug repurposing KG) by Arif Yilmaz
# TODO: cannot be integrated to TRAPI because it returns negative scores
Expand Down
2 changes: 1 addition & 1 deletion src/openpredict_model/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

from fastapi import APIRouter, File, Query, UploadFile
from rdflib import Graph
from trapi_predict_kit.rdf_utils import retrieve_features, retrieve_models

from openpredict_model.evidence_path.predict import do_evidence_path
from openpredict_model.explain_shap.explain_shap import get_explanations
from openpredict_model.train import add_embedding
from openpredict_model.utils import retrieve_features, retrieve_models


class FeatureTypesDrugs(str, Enum):
Expand Down
14 changes: 7 additions & 7 deletions src/openpredict_model/evidence_path/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
import pandas as pd
from gensim.models import KeyedVectors

from trapi_predict_kit.config import settings
from openpredict_model.evidence_path.train import getQuantiles, path_weight_product
from openpredict_model.utils import get_openpredict_dir

## Evidence path for OpenPredict model by Elif

df_op = pd.read_csv(f"{settings.OPENPREDICT_DATA_DIR}/resources/openpredict-omim-drug.csv")
df_op = pd.read_csv(f"{get_openpredict_dir()}/resources/openpredict-omim-drug.csv")

drug_fp_vectors = KeyedVectors.load_word2vec_format(
f'{settings.OPENPREDICT_DATA_DIR}/embedding/drugs_fp_embed.txt', binary=False)
f'{get_openpredict_dir()}/embedding/drugs_fp_embed.txt', binary=False)
disease_hp_vectors = KeyedVectors.load_word2vec_format(
f'{settings.OPENPREDICT_DATA_DIR}/embedding/disease_hp_embed.txt', binary=False)
f'{get_openpredict_dir()}/embedding/disease_hp_embed.txt', binary=False)

df_op = df_op.rename(columns={'omimid': 'disease_id', 'drugid': 'drug_id'})
df_op.disease_id = df_op.disease_id.astype(str)
Expand All @@ -32,7 +32,7 @@ def generate_paths_for_apair(drug, disease, drug_emb_vectors, disease_emb_vector
(threshold_drug,threshold_disease) =getQuantiles(drug_emb_vectors, disease_emb_vectors, threshold_drugs)

if(features_drug is not None) :
filtered_embedding_drugs = KeyedVectors.load_word2vec_format(f'{settings.OPENPREDICT_DATA_DIR}/evidence-path-model/feature_{str(features_drug)}.txt', binary=False)
filtered_embedding_drugs = KeyedVectors.load_word2vec_format(f'{get_openpredict_dir()}/evidence-path-model/feature_{str(features_drug)}.txt', binary=False)
similarDrugs = filtered_embedding_drugs.most_similar(drug, topn=100)
(threshold_drug,threshold_disease) =getQuantiles(filtered_embedding_drugs, disease_emb_vectors, threshold_drugs)
else :
Expand All @@ -54,9 +54,9 @@ def generate_paths_for_apair(drug, disease, drug_emb_vectors, disease_emb_vector
(threshold_drug,threshold_disease) =getQuantiles(drug_emb_vectors, disease_emb_vectors, threshold_diseases)


# TODO: USE settings.OPENPREDICT_DATA_DIR instead of lucky relative path
# TODO: USE get_openpredict_dir() instead of lucky relative path
if(features_disease is not None) :
filtered_embedding_diseases = KeyedVectors.load_word2vec_format(f'{settings.OPENPREDICT_DATA_DIR}/evidence-path-model/feature_{str(features_disease)}.txt', binary=False)
filtered_embedding_diseases = KeyedVectors.load_word2vec_format(f'{get_openpredict_dir()}/evidence-path-model/feature_{str(features_disease)}.txt', binary=False)
# filtered_embedding_diseases = KeyedVectors.load_word2vec_format(f'openpredict/data/embedding/feature_specific_embeddings_KG/feature_{str(features_disease)}.txt', binary=False)
similarDiseases = filtered_embedding_diseases.most_similar(disease, topn=100)
(threshold_drug,threshold_disease) =getQuantiles(drug_fp_vectors, filtered_embedding_diseases, threshold_diseases)
Expand Down
7 changes: 3 additions & 4 deletions src/openpredict_model/evidence_path/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@
import pandas as pd
from gensim.models import KeyedVectors

from trapi_predict_kit.config import settings
from openpredict_model.utils import load_features_embeddings
from openpredict_model.utils import load_features_embeddings, get_openpredict_dir

## Evidence path for OpenPredict model by Elif


drug_fp_vectors = KeyedVectors.load_word2vec_format(
f'{settings.OPENPREDICT_DATA_DIR}/embedding/drugs_fp_embed.txt', binary=False)
f'{get_openpredict_dir()}/embedding/drugs_fp_embed.txt', binary=False)
disease_hp_vectors = KeyedVectors.load_word2vec_format(
f'{settings.OPENPREDICT_DATA_DIR}/embedding/disease_hp_embed.txt', binary=False)
f'{get_openpredict_dir()}/embedding/disease_hp_embed.txt', binary=False)


###############################################
Expand Down
3 changes: 2 additions & 1 deletion src/openpredict_model/explain_shap/explain_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import pandas as pd
import shap as shap
import sklearn
from trapi_predict_kit.utils import get_entities_labels

from trapi_predict_kit.utils import get_entities_labels, get_openpredict_dir
from openpredict_model.predict import query_omim_drugbank_classifier
from openpredict_model.utils import get_openpredict_dir

# XPREDICT framework may be used to add explanation features to drug repositioning applications

Expand Down
4 changes: 2 additions & 2 deletions src/openpredict_model/generate_disease_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
import pandas as pd

from trapi_predict_kit.config import settings
from openpredict_model.utils import get_openpredict_dir

# Standalone script to generate diseases features

Expand All @@ -16,7 +16,7 @@ def fasta2seq(lines):

def download():
"""Download a jar file required for the processing in data/lib/"""
if not os.path.exists(settings.OPENPREDICT_DATA_DIR / "lib" / "sml-toolkit-0.9.jar" ):
if not os.path.exists(get_openpredict_dir() / "lib" / "sml-toolkit-0.9.jar" ):
print("sml-toolkit-0.9.jar not present, downloading it")
try:
os.system(f'mkdir -p data/lib')
Expand Down
5 changes: 2 additions & 3 deletions src/openpredict_model/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
import pandas as pd

from trapi_predict_kit import load
from trapi_predict_kit.config import settings
from trapi_predict_kit.decorators import trapi_predict
from trapi_predict_kit.predict_output import PredictOptions, PredictOutput
from trapi_predict_kit.utils import get_entities_labels, get_entity_types, log
from openpredict_model.train import createFeaturesSparkOrDF
from openpredict_model.utils import load_features_embeddings, similarity_embeddings
from openpredict_model.utils import load_features_embeddings, similarity_embeddings, get_openpredict_dir

trapi_nodes = {
"biolink:Disease": {
Expand Down Expand Up @@ -151,7 +150,7 @@ def query_omim_drugbank_classifier(input_curie, model_id):

# TODO: should we update this file too when we create new runs?
drugDiseaseKnown = pd.read_csv(
os.path.join(settings.OPENPREDICT_DATA_DIR, 'resources', 'openpredict-omim-drug.csv'), delimiter=',')
os.path.join(get_openpredict_dir(), 'resources', 'openpredict-omim-drug.csv'), delimiter=',')
drugDiseaseKnown.rename(
columns={'drugid': 'Drug', 'omimid': 'Disease'}, inplace=True)
drugDiseaseKnown.Disease = drugDiseaseKnown.Disease.astype(str)
Expand Down
21 changes: 11 additions & 10 deletions src/openpredict_model/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@

from trapi_predict_kit import save
from trapi_predict_kit.config import settings
from trapi_predict_kit.rdf_utils import get_run_id
from trapi_predict_kit.utils import log

from openpredict_model.utils import get_run_id, get_openpredict_dir

cli = typer.Typer(help="Training for OpenPredict model")



# @is_fairstep(label='Prepare known drug-disease associations dictionary')
def get_known_associations():
drugDiseaseKnown = pd.read_csv(
os.path.join(settings.OPENPREDICT_DATA_DIR, 'resources', 'openpredict-omim-drug.csv'),
os.path.join(get_openpredict_dir(), 'resources', 'openpredict-omim-drug.csv'),
delimiter=','
)
drugDiseaseKnown.rename(
Expand All @@ -46,20 +47,20 @@ def get_drug_disease_features(from_model_id: str):
drugfeatfiles = ['drugs-fingerprint-sim.csv', 'drugs-se-sim.csv',
'drugs-ppi-sim.csv', 'drugs-target-go-sim.csv', 'drugs-target-seq-sim.csv']
diseasefeatfiles = ['diseases-hpo-sim.csv', 'diseases-pheno-sim.csv']
drugfeatfiles = [os.path.join(settings.OPENPREDICT_DATA_DIR,
drugfeatfiles = [os.path.join(get_openpredict_dir(),
baseline_features_folder, fn) for fn in drugfeatfiles]
diseasefeatfiles = [os.path.join(settings.OPENPREDICT_DATA_DIR,
diseasefeatfiles = [os.path.join(get_openpredict_dir(),
baseline_features_folder, fn) for fn in diseasefeatfiles]
# baseline_features_folder = "data/baseline_features/"
# TODO: Translator IDs version (MONDO & CHEBI)
drug_df, disease_df = mergeFeatureMatrix(
drugfeatfiles, diseasefeatfiles)
else:
print(type(from_model_id))
print(f"📥 Loading the features tensor from {settings.OPENPREDICT_DATA_DIR}/features/{str(from_model_id)}_features.pickle")
print(f"📥 Loading the features tensor from {get_openpredict_dir()}/features/{str(from_model_id)}_features.pickle")

(drug_df, disease_df) = pickle.load(open(
f"{settings.OPENPREDICT_DATA_DIR}/features/{str(from_model_id)}_features.pickle",
f"{get_openpredict_dir()}/features/{str(from_model_id)}_features.pickle",
"rb"
))
print("Drug Features ", drug_df.columns.levels[0])
Expand Down Expand Up @@ -656,7 +657,7 @@ def train_model(from_model_id: str = 'openpredict_baseline'):
# pickle.dump(
# (drug_df, disease_df),
# open(
# f"{settings.OPENPREDICT_DATA_DIR}/features/{from_model_id}_features.pickle",
# f"{get_openpredict_dir()}/features/{from_model_id}_features.pickle",
# "wb"
# )
# )
Expand Down Expand Up @@ -708,9 +709,9 @@ def add_embedding(
# TODO: now also save the feature dataframe for each run to be able to add embedding to any run?
# Or can we just use the models/run_id.pickle file instead of having 2 files for 1 run?
print('📥 Loading features file: ' +
f"{settings.OPENPREDICT_DATA_DIR}/features/{from_model_id}_features.pickle")
f"{get_openpredict_dir()}/features/{from_model_id}_features.pickle")
(drug_df, disease_df) = pickle.load(open(
f"{settings.OPENPREDICT_DATA_DIR}/features/{from_model_id}_features.pickle",
f"{get_openpredict_dir()}/features/{from_model_id}_features.pickle",
"rb"
))

Expand Down Expand Up @@ -761,7 +762,7 @@ def add_embedding(
pickle.dump(
(drug_df, disease_df),
open(
f"{settings.OPENPREDICT_DATA_DIR}/features/{run_id}_features.pickle",
f"{get_openpredict_dir()}/features/{run_id}_features.pickle",
"wb"
)
)
Expand Down
Loading

0 comments on commit 98de256

Please sign in to comment.