-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge.yaml
225 lines (224 loc) · 7.33 KB
/
merge.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
---
configuration:
output_directory: data/merged
checkpoint: false
property_types:
# define the type for non-canonical node/edge properties
combined_score: 'xsd:float'
confidence_score: 'xsd:float'
neighborhood: 'xsd:float'
neighborhood_transferred: 'xsd:float'
fusion: 'xsd:float'
cooccurence: 'xsd:float'
homology: 'xsd:float'
coexpression: 'xsd:float'
coexpression_transferred: 'xsd:float'
experiments: 'xsd:float'
experiments_transferred: 'xsd:float'
database: 'xsd:float'
database_transferred: 'xsd:float'
textmining: 'xsd:float'
textmining_transferred: 'xsd:float'
merged_graph:
name: IDG graph
source:
atc:
name: "Anatomical Therapeutic Chemical Classifications"
input:
format: tsv
filename:
- data/transformed/atc/atc-classes_nodes.tsv
- data/transformed/atc/atc-classes_edges.tsv
drug-central-dti:
name: "DrugCentral Drug-Target Interactions"
input:
format: tsv
filename:
- data/transformed/drug_central/drugcentral-dti_nodes.tsv
- data/transformed/drug_central/drugcentral-dti_edges.tsv
drug-central-atc:
name: "DrugCentral ATC Codes and Drug Dosages"
input:
format: tsv
filename:
- data/transformed/drug_central/drugcentral-atc_ddd_nodes.tsv
- data/transformed/drug_central/drugcentral-atc_ddd_edges.tsv
drug-central-reference:
name: "DrugCentral Drug References"
input:
format: tsv
filename:
- data/transformed/drug_central/drugcentral-reference_nodes.tsv
- data/transformed/drug_central/drugcentral-reference_edges.tsv
drug-central-property:
name: "DrugCentral Drug Properties and References (inc. mentions)"
input:
format: tsv
filename:
- data/transformed/drug_central/drugcentral-property_nodes.tsv
- data/transformed/drug_central/drugcentral-property_edges.tsv
drug-central-approval:
name: "DrugCentral Drug Regulatory Approval Status"
input:
format: tsv
filename:
- data/transformed/drug_central/drugcentral-approval_nodes.tsv
- data/transformed/drug_central/drugcentral-approval_edges.tsv
drug-central-identifier:
name: "DrugCentral Drug Identifiers"
input:
format: tsv
filename:
- data/transformed/drug_central/drugcentral-identifier_nodes.tsv
- data/transformed/drug_central/drugcentral-identifier_edges.tsv
drug-central-indications:
name: "DrugCentral Indications and Other Relationships"
input:
format: tsv
filename:
- data/transformed/drug_central/drugcentral-omop_relationship_nodes.tsv
- data/transformed/drug_central/drugcentral-omop_relationship_edges.tsv
gocams:
name: "Gene Ontology Causal Activity Modeling"
input:
format: tsv
filename:
- data/transformed/gocams/gocams-fix-nodes_nodes.tsv
- data/transformed/gocams/gocams-fix-edges_edges.tsv
hpa:
name: "Human Proteome Atlas"
input:
format: tsv
filename:
- data/transformed/hpa/hpa-data_nodes.tsv
- data/transformed/hpa/hpa-data_edges.tsv
omim:
name: "OMIM Gene to Disease relationships"
input:
format: tsv
filename:
- data/transformed/omim/omim_nodes.tsv
- data/transformed/omim/omim_edges.tsv
chebi:
name: "Chemical Entities of Biological Interest"
input:
format: tsv
filename:
- data/transformed/ontologies/chebi_kgx_tsv_nodes.tsv
- data/transformed/ontologies/chebi_kgx_tsv_edges.tsv
go:
name: "Gene Ontology"
input:
format: tsv
filename:
- data/transformed/ontologies/go_kgx_tsv_nodes.tsv
- data/transformed/ontologies/go_kgx_tsv_edges.tsv
hp:
name: "Human Phenotype Ontology"
input:
format: tsv
filename:
- data/transformed/ontologies/hp_kgx_tsv_nodes.tsv
- data/transformed/ontologies/hp_kgx_tsv_edges.tsv
mondo:
name: "Mondo Disease Ontology"
input:
format: tsv
filename:
- data/transformed/ontologies/mondo_kgx_tsv_nodes.tsv
- data/transformed/ontologies/mondo_kgx_tsv_edges.tsv
ogms:
name: "Ontology for General Medical Science"
input:
format: tsv
filename:
- data/transformed/ontologies/ogms_kgx_tsv_nodes.tsv
- data/transformed/ontologies/ogms_kgx_tsv_edges.tsv
orphanet-gene:
name: "OrphaNet gene to disease"
input:
format: tsv
filename:
- data/transformed/orphanet/orphanet_gene_nodes.tsv
- data/transformed/orphanet/orphanet_gene_edges.tsv
orphanet-phenotype:
name: "OrphaNet phenotype to disease"
input:
format: tsv
filename:
- data/transformed/orphanet/orphanet_pheno_nodes.tsv
- data/transformed/orphanet/orphanet_pheno_edges.tsv
reactomepathways:
name: "Reactome - all pathways"
input:
format: tsv
filename:
- data/transformed/reactome/reactomepathways_nodes.tsv
- data/transformed/reactome/reactomepathways_edges.tsv
reactomepathwaysrelation:
name: "Reactome - relationships between pathways"
input:
format: tsv
filename:
- data/transformed/reactome/reactomepathwaysrelation_nodes.tsv
- data/transformed/reactome/reactomepathwaysrelation_edges.tsv
chebi2reactome:
name: "Reactome - CHEBI maps"
input:
format: tsv
filename:
- data/transformed/reactome/chebi2reactome_nodes.tsv
- data/transformed/reactome/chebi2reactome_edges.tsv
uniprot2reactome:
name: "Reactome - UniProtKB maps"
input:
format: tsv
filename:
- data/transformed/reactome/uniprot2reactome_nodes.tsv
- data/transformed/reactome/uniprot2reactome_edges.tsv
tcrd-ids:
name: "Target Central Resource Database - IDs"
input:
format: tsv
filename:
- data/transformed/tcrd/tcrd-ids_nodes.tsv
- data/transformed/tcrd/tcrd-ids_edges.tsv
# tcrd-protein:
# name: "Target Central Resource Database - Proteins"
# input:
# format: tsv
# filename:
# - data/transformed/tcrd/tcrd-protein_nodes.tsv
# - data/transformed/tcrd/tcrd-protein_edges.tsv
string:
name: "STRING"
input:
format: tsv
filename:
- data/transformed/string/string_nodes.tsv_nodes.tsv
- data/transformed/string/string_edges.tsv_edges.tsv
# upheno2:
# name: "uPheno2"
# input:
# format: tsv
# filename:
# - data/transformed/upheno2/upheno2_edges.tsv
# - data/transformed/upheno2/upheno2_nodes.tsv
operations:
- name: kgx.graph_operations.summarize_graph.generate_graph_stats
args:
graph_name: IDG Graph
filename: merged_graph_stats.yaml
node_facet_properties:
- provided_by
edge_facet_properties:
- provided_by
- source
destination:
merged-kg-tsv:
format: tsv
filename: merged-kg
# merged-kg-nt:
# format: nt
# compression: gz
# filename: KG-IDG.nt.gz