-
Notifications
You must be signed in to change notification settings - Fork 1
/
web.py
162 lines (126 loc) · 4.61 KB
/
web.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import json
import os
from string import Template
from flask import request, jsonify
from helpers import query, update, log, generate_uuid
from escape_helpers import sparql_escape_uri, sparql_escape_string, sparql_escape_int, sparql_escape_datetime
import pandas as pd
from .file_handler import postfile
def store_json(data):
"""
Store json data to a file and call postfile to store in in a triplestore
:param data: data in json format
:return: response from storing data in triple store
"""
file_id = generate_uuid()
dumpFileName = f"{file_id}.json"
dumpFilePath = f'/share/ai-files/{dumpFileName}'
with open(dumpFilePath, 'w') as f:
json.dump(data, f)
resp = postfile(dumpFilePath, dumpFileName)
return resp
@app.route("/data/query", methods=["GET"])
def query_data():
"""
Endpoint for loading data from triple store using a query file and converting it to json
Accepted request arguments:
- filename: filename that contains the query
- limit: limit the amount of data retrieved per query execution, allows for possible pagination
- global_limit: total amount of items to be retrieved
:return: response from storing data in triple store, contains virtual file id and uri
"""
# env arguments to restrict option usage
acceptFilename = os.environ.get('ACCEPT_FILENAME') or False
acceptOptions = os.environ.get('ACCEPT_OPTIONS') or False
# default filename
filename = "/config/input.sparql"
if acceptFilename:
f = request.args.get("filename")
if f:
filename = "/config/" + f
# default amount of items to retrieve per request
limit = 1000
globalLimit = float('inf')
if acceptOptions:
limit = int(request.args.get("limit") or 1000)
globalLimit = float(request.args.get("global_limit") or float("inf"))
if globalLimit < limit:
limit = globalLimit
# load query
q = ""
if os.path.isfile(filename):
with open(filename) as f:
q = f.read()
else:
return "Requested filename does not exist", 204
# iteratively retrieve requested amount of data
ret = {}
if q:
stop = False
index = 0
while not stop and (limit * index) <= globalLimit - 1:
stop = True
offset = limit * index
formatted = (q + f" LIMIT {limit} OFFSET {offset}")
resp = query(formatted)["results"]["bindings"]
# convert data to json
for val in resp:
stop = False
for k, v in val.items():
if k not in ret:
ret[k] = []
ret[k].append(v["value"])
index += 1
# store json data to file and in triple store
storeResp = store_json(ret)
return jsonify(storeResp)
@app.route("/data/file", methods=["GET"])
def file_data():
"""
Endpoint for loading data from a csv file and converting it to json
Accepted request arguments:
- filename: filename that contains the data
- columns: csv data columns to use
:return: response from storing data in triple store, contains virtual file id and uri
"""
# env arguments to restrict option usage
acceptFilename = os.environ.get('ACCEPT_FILENAME') or False
acceptOptions = os.environ.get('ACCEPT_OPTIONS') or False
# default filename
filename = "/share/input.csv"
if acceptFilename:
f = request.args.get("filename")
if f:
filename = "/share/" + f
columns = None
if acceptOptions:
columns = request.args.get("columns") or None
if not os.path.isfile(filename):
return "Data inaccessible", 204
data = pd.read_csv(filename).astype(str)
# select requested columns, all if not specified
if columns:
columns = list(columns.split(","))
dataColumns = list(data.columns)
for col in columns:
if col not in dataColumns:
return f"Invalid column {col} requested", 204
data = data[columns]
ret = {}
for col in data:
ret[col] = data[col].tolist()
# store json data to file and in triple store
storeResp = store_json(ret)
return jsonify(storeResp)
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def catch_all(path):
"""
Default endpoint/ catch all
:param path: requested path
:return: debug information
"""
return 'You want path: %s' % path, 404
if __name__ == '__main__':
debug = os.environ.get('MODE') == "development"
app.run(debug=debug, host='0.0.0.0', port=80)