Skip to content

Commit

Permalink
Import prometheus client to collect metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Kaiyuan Hu <kaiyuan.hu@zilliz.com>
  • Loading branch information
Chiiizzzy committed Aug 16, 2023
1 parent 5da7d04 commit 3cd3e9f
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 6 deletions.
Binary file added .coverage
Binary file not shown.
103 changes: 97 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import argparse
import os
import argparse
import time

import uvicorn
from fastapi import FastAPI, UploadFile
from fastapi.encoders import jsonable_encoder
from prometheus_client import start_http_server, Counter, Histogram, Gauge

from config import TEMP_DIR


os.makedirs(TEMP_DIR, exist_ok=True)

# Specify mode
Expand All @@ -31,17 +32,52 @@
app = FastAPI()
origins = ['*']

# Define metrcis
latencies = []
answer_latencies = []
add_latencies = []
drop_latencies = []

requests_total = Counter('requests_total', 'Cumulative requests')
requests_success_total = Counter('requests_success_total', 'Cumulative successful requests')
requests_failed_total = Counter('requests_failed_total', 'Cumulative failed requests')

endpoint_requests_total = Counter('endpoint_requests_total', 'Cumulative requests of each endpoint', ['endpoint'])
endpoint_requests_success_total = Counter('endpoint_requests_success_total', 'Cumulative successful requests of each endpoint', ['endpoint'])
endpoint_requests_failed_total = Counter('endpoint_requests_failed_total', 'Cumulative failed requests of each endpoint', ['endpoint'])

process_latency_seconds = Histogram('process_latency_seconds', 'Request process latency histogram')
endpoint_process_latency_seconds = Histogram('endpoint_process_latency_seconds', 'Request process latency histogram of each endpoint', ['endpoint'])

average_process_latency_seconds = Gauge('average_process_latency_seconds', 'Average request process latency')
endpoint_average_process_latency_seconds = Gauge(
'endpoint_average_process_latency_seconds', 'Average Request process latency of each endpoint', ['endpoint']
)


@app.get('/')
def check_api():
return jsonable_encoder({'status': True, 'msg': 'ok'}), 200
requests_total.inc()
endpoint_requests_total.labels('/').inc()

res = jsonable_encoder({'status': True, 'msg': 'ok'}), 200

requests_success_total.inc()
endpoint_requests_success_total.labels('/').inc()

return res

@app.get('/answer')
def do_answer_api(session_id: str, project: str, question: str):
try:
begin = time.time()
requests_total.inc()
endpoint_requests_total.labels('/answer').inc()

new_question, final_answer = chat(session_id=session_id,
project=project, question=question)
assert isinstance(final_answer, str)
return jsonable_encoder({
res = jsonable_encoder({
'status': True,
'msg': final_answer,
'debug': {
Expand All @@ -50,12 +86,31 @@ def do_answer_api(session_id: str, project: str, question: str):
'answer': final_answer,
}
}), 200

requests_success_total.inc()
endpoint_requests_success_total.labels('/answer').inc()
end = time.time()
latencies.append(end - begin)
answer_latencies.append(end - begin)
process_latency_seconds.observe(end - begin)
endpoint_process_latency_seconds.labels('/answer').observe(end - begin)
average_process_latency_seconds.set(sum(latencies) / len(latencies))
endpoint_average_process_latency_seconds.label('answer').set(sum(answer_latencies) / len(answer_latencies))

return res
except Exception as e: # pylint: disable=W0703
requests_failed_total.inc()
endpoint_requests_failed_total.labels('/answer').inc()

return jsonable_encoder({'status': False, 'msg': f'Failed to answer question:\n{e}', 'code': 400}), 400


@app.post('/project/add')
def do_project_add_api(project: str, url: str = None, file: UploadFile = None):
begin = time.time()
requests_total.inc()
endpoint_requests_total.labels('/project/add').inc()

assert url or file, 'You need to upload file or enter url of document to add data.'
try:
if url:
Expand All @@ -66,20 +121,56 @@ def do_project_add_api(project: str, url: str = None, file: UploadFile = None):
content = file.file.read()
f.write(content)
num = insert(data_src=temp_file, project=project, source_type='file')
return jsonable_encoder({'status': True, 'msg': f'Successfully inserted doc chunks: {num}'}), 200
res = jsonable_encoder({'status': True, 'msg': f'Successfully inserted doc chunks: {num}'}), 200

requests_success_total.inc()
endpoint_requests_success_total.labels('/project/add').inc()
end = time.time()
latencies.append(end - begin)
add_latencies.append(end - begin)
process_latency_seconds.observe(end - begin)
endpoint_process_latency_seconds.labels('/project/add').observe(end - begin)
average_process_latency_seconds.set(sum(latencies) / len(latencies))
endpoint_average_process_latency_seconds.labels('/project/add').set(sum(add_latencies) / len(add_latencies))

return res
except Exception as e: # pylint: disable=W0703
requests_failed_total.inc()
endpoint_requests_failed_total.labels('/project/add').inc()

return jsonable_encoder({'status': False, 'msg': f'Failed to load data:\n{e}'}), 400


@app.post('/project/drop')
def do_project_drop_api(project: str):
# Drop data in vector db
try:
begin = time.time()
requests_total.inc()
endpoint_requests_total.labels('/project/add').inc()

drop(project=project)
return jsonable_encoder({'status': True, 'msg': f'Dropped project: {project}'}), 200
res = jsonable_encoder({'status': True, 'msg': f'Dropped project: {project}'}), 200

requests_success_total.inc()
endpoint_requests_success_total.labels('/project/drop').inc()
end = time.time()
end = time.time()
latencies.append(end - begin)
drop_latencies.append(end - begin)
process_latency_seconds.observe(end - begin)
endpoint_process_latency_seconds.labels('/project/drop').observe(end - begin)
average_process_latency_seconds.set(sum(latencies) / len(latencies))
endpoint_average_process_latency_seconds.labels('/project/drop').set(sum(drop_latencies) / len(drop_latencies))

return res
except Exception as e: # pylint: disable=W0703
requests_failed_total.inc()
endpoint_requests_failed_total.labels('/project/drop').inc()

return jsonable_encoder({'status': False, 'msg': f'Failed to drop project:\n{e}'}), 400


if __name__ == '__main__':
start_http_server(8901)
uvicorn.run(app=app, host='0.0.0.0', port=8900)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ uvicorn
towhee>=1.1.0
pymilvus
elasticsearch>=8.0.0
prometheus-client

0 comments on commit 3cd3e9f

Please sign in to comment.