From c9e8c6a81e635cdb4a1d40407f257bb94cc176ed Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Mon, 14 Oct 2024 00:47:26 -0700 Subject: [PATCH 01/11] TC for Metric P0 nv_load_time per model --- qa/L0_metrics/general_metrics_test.py | 79 +++++++++++++++++++++++++++ qa/L0_metrics/test.sh | 31 ++++++++++- 2 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 qa/L0_metrics/general_metrics_test.py diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py new file mode 100644 index 0000000000..adbc78197f --- /dev/null +++ b/qa/L0_metrics/general_metrics_test.py @@ -0,0 +1,79 @@ +#!/usr/bin/python +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import re +import unittest +import requests + +_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") +MODEL_LOAD_TIME = "nv_model_load_time{model=" + +def get_model_load_times(): + r = requests.get(f"http://{_tritonserver_ipaddr}:8002/metrics") + r.raise_for_status() + pattern = re.compile(rf'{MODEL_LOAD_TIME}"(.*?)".*?\ (\d+\.\d+)') + model_load_times = {} + matches = pattern.findall(r.text) + for match in matches: + model_name, load_time = match + model_load_times[model_name] = float(load_time) + return model_load_times + +class TestGeneralMetrics(tu.TestResultCollector): + def setUp(self): + self.model_name = "libtorch_float32_float32_float32" + + def test_metrics_load_time(self): + model_load_times = get_model_load_times() + load_time = model_load_times.get(self.model_name) + + self.assertIsNotNone(load_time, "Model Load time not found") + + dict_size = len(model_load_times) + self.assertEqual(dict_size, 1, "Too many model_load_time entries found") + + def test_metrics_load_time_explicit_load(self): + model_load_times = get_model_load_times() + load_time = model_load_times.get(self.model_name) + + self.assertIsNotNone(load_time, "Model Load time not found") + + dict_size = len(model_load_times) + self.assertEqual(dict_size, 1, "Too many model_load_time entries found") + + def test_metrics_load_time_explicit_unload(self): + r = requests.get(f"http://localhost:8000/v2/repository/models/") + r.raise_for_status() + print(r.text) + model_load_times = get_model_load_times() + load_time = model_load_times.get(self.model_name) + + self.assertIsNone(load_time, "Model Load time found even after unload") + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index 76e99e7c48..f966b5fb23 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -45,7 +45,6 @@ SERVER=${TRITON_DIR}/bin/tritonserver BASE_SERVER_ARGS="--model-repository=${MODELDIR}" SERVER_ARGS="${BASE_SERVER_ARGS}" SERVER_LOG="./inference_server.log" -PYTHON_TEST="metrics_config_test.py" source ../common/util.sh CLIENT_LOG="client.log" @@ -132,12 +131,42 @@ fi kill_server set -e +### General metrics tests + +set +e +CLIENT_PY="./general_metrics_test.py" +CLIENT_LOG="general_metrics_test_client.log" +SERVER_LOG="general_metrics_test_server.log" +SERVER_ARGS="$BASE_SERVER_ARGS --log-verbose=1" +PYTHON_TEST="general_metrics_test.py" +run_and_check_server +# Test 1 for normal mode +python3 ${PYTHON_TEST} TestGeneralMetrics.test_metrics_load_time -v 2>&1 | tee ${CLIENT_LOG} +kill_server + +set +e +CLIENT_PY="./general_metrics_test.py" +CLIENT_LOG="general_metrics_test_client.log" +SERVER_LOG="general_metrics_test_server.log" +SERVER_ARGS="$BASE_SERVER_ARGS --model-control-mode=explicit --log-verbose=1" +run_and_check_server +export MODEL_NAME='libtorch_float32_float32_float32' +code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load` +# Test 2 for explicit mode LOAD +python3 ${PYTHON_TEST} TestGeneralMetrics.test_metrics_load_time_explicit_load -v 2>&1 | tee ${CLIENT_LOG} + +code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/unload` +# Test 3 for explicit mode UNLOAD +python3 ${PYTHON_TEST} TestGeneralMetrics.test_metrics_load_time_explicit_unload -v 2>&1 | tee ${CLIENT_LOG} +kill_server + ### Pinned memory metrics tests set +e CLIENT_PY="./pinned_memory_metrics_test.py" CLIENT_LOG="pinned_memory_metrics_test_client.log" SERVER_LOG="pinned_memory_metrics_test_server.log" SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=1 --model-control-mode=explicit --log-verbose=1" +PYTHON_TEST="metrics_config_test.py" run_and_check_server python3 ${PYTHON_TEST} MetricsConfigTest.test_pinned_memory_metrics_exist -v 2>&1 | tee ${CLIENT_LOG} check_unit_test From 5b1f62f32c8e8b726f1bf73fa64a8170015e29cb Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Mon, 14 Oct 2024 01:13:51 -0700 Subject: [PATCH 02/11] Fix Pre-Commit --- qa/L0_metrics/general_metrics_test.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index adbc78197f..a80b5dafa5 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +# /usr/bin/python # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -28,11 +28,13 @@ import os import re import unittest + import requests _tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") MODEL_LOAD_TIME = "nv_model_load_time{model=" + def get_model_load_times(): r = requests.get(f"http://{_tritonserver_ipaddr}:8002/metrics") r.raise_for_status() @@ -44,25 +46,26 @@ def get_model_load_times(): model_load_times[model_name] = float(load_time) return model_load_times + class TestGeneralMetrics(tu.TestResultCollector): def setUp(self): self.model_name = "libtorch_float32_float32_float32" - + def test_metrics_load_time(self): model_load_times = get_model_load_times() load_time = model_load_times.get(self.model_name) - + self.assertIsNotNone(load_time, "Model Load time not found") - + dict_size = len(model_load_times) self.assertEqual(dict_size, 1, "Too many model_load_time entries found") def test_metrics_load_time_explicit_load(self): model_load_times = get_model_load_times() load_time = model_load_times.get(self.model_name) - + self.assertIsNotNone(load_time, "Model Load time not found") - + dict_size = len(model_load_times) self.assertEqual(dict_size, 1, "Too many model_load_time entries found") @@ -72,8 +75,9 @@ def test_metrics_load_time_explicit_unload(self): print(r.text) model_load_times = get_model_load_times() load_time = model_load_times.get(self.model_name) - + self.assertIsNone(load_time, "Model Load time found even after unload") + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From d421e497bc2b1126131063874bc50da11e678593 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Mon, 14 Oct 2024 23:01:47 -0700 Subject: [PATCH 03/11] Fix review comments --- qa/L0_metrics/general_metrics_test.py | 2 +- qa/L0_metrics/test.sh | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index a80b5dafa5..1c452515d0 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -47,7 +47,7 @@ def get_model_load_times(): return model_load_times -class TestGeneralMetrics(tu.TestResultCollector): +class TestGeneralMetrics(unittest.TestCase): def setUp(self): self.model_name = "libtorch_float32_float32_float32" diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index f966b5fb23..1cb85f4004 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -140,8 +140,8 @@ SERVER_LOG="general_metrics_test_server.log" SERVER_ARGS="$BASE_SERVER_ARGS --log-verbose=1" PYTHON_TEST="general_metrics_test.py" run_and_check_server -# Test 1 for normal mode -python3 ${PYTHON_TEST} TestGeneralMetrics.test_metrics_load_time -v 2>&1 | tee ${CLIENT_LOG} +# Test 1 for default model control mode (all models loaded at startup) +python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time >> $CLIENT_LOG 2>&1 kill_server set +e @@ -150,14 +150,14 @@ CLIENT_LOG="general_metrics_test_client.log" SERVER_LOG="general_metrics_test_server.log" SERVER_ARGS="$BASE_SERVER_ARGS --model-control-mode=explicit --log-verbose=1" run_and_check_server -export MODEL_NAME='libtorch_float32_float32_float32' +MODEL_NAME='libtorch_float32_float32_float32' code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load` # Test 2 for explicit mode LOAD -python3 ${PYTHON_TEST} TestGeneralMetrics.test_metrics_load_time_explicit_load -v 2>&1 | tee ${CLIENT_LOG} +python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time_explicit_load.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time_explicit_load >> $CLIENT_LOG 2>&1 code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/unload` # Test 3 for explicit mode UNLOAD -python3 ${PYTHON_TEST} TestGeneralMetrics.test_metrics_load_time_explicit_unload -v 2>&1 | tee ${CLIENT_LOG} +python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time_explicit_unload.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time_explicit_unload >> $CLIENT_LOG 2>&1 kill_server ### Pinned memory metrics tests From d47ebe5068c25c2275c22bb86ff45baaa06f258d Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Thu, 17 Oct 2024 18:02:02 -0700 Subject: [PATCH 04/11] Update Docs for new metric model load time --- docs/user_guide/metrics.md | 9 +++++++++ qa/L0_metrics/general_metrics_test.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md index b8fc0d8ee0..88f8b49c3a 100644 --- a/docs/user_guide/metrics.md +++ b/docs/user_guide/metrics.md @@ -183,6 +183,15 @@ There are some places where a request would not be considered pending: generally brief, it will not be considered pending from Triton's perspective until Triton core has received the request from the frontend. +#### Load Time Per-Model +The *Model Load Duration* reflects the time to load a model from storage into GPU/CPU in seconds. +``` +# HELP nv_model_load_duration_secs Model load time in seconds +# TYPE nv_model_load_duration_secs gauge +nv_model_load_duration_secs{model="input_all_optional",version="2"} 1.532738387 +nv_model_load_duration_secs{model="input_all_optional",version="1"} 11.68753265 +``` + ### Latencies Starting in 23.04, Triton exposes the ability to choose the types of metrics diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index 1c452515d0..9dd1b672eb 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -32,7 +32,7 @@ import requests _tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") -MODEL_LOAD_TIME = "nv_model_load_time{model=" +MODEL_LOAD_TIME = "nv_model_load_duration_secs{model=" def get_model_load_times(): From 3fcc6499be0ab452d0d779048efdb3643552dda8 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Fri, 18 Oct 2024 15:08:28 -0700 Subject: [PATCH 05/11] Remove logs causing test to fail --- qa/L0_metrics/general_metrics_test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index 9dd1b672eb..f32739963d 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -70,9 +70,6 @@ def test_metrics_load_time_explicit_load(self): self.assertEqual(dict_size, 1, "Too many model_load_time entries found") def test_metrics_load_time_explicit_unload(self): - r = requests.get(f"http://localhost:8000/v2/repository/models/") - r.raise_for_status() - print(r.text) model_load_times = get_model_load_times() load_time = model_load_times.get(self.model_name) From 748d3c59a1352cba915ace58e56bb8372f468f47 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Sat, 19 Oct 2024 01:44:03 -0700 Subject: [PATCH 06/11] Fix review comments add new test for versions --- qa/L0_metrics/general_metrics_test.py | 108 ++++++++++++++++-- qa/L0_metrics/test.sh | 12 ++ .../input_all_optional/1/model.py | 49 ++++++++ .../input_all_optional/2/model.py | 47 ++++++++ .../input_all_optional/config.pbtxt | 59 ++++++++++ 5 files changed, 265 insertions(+), 10 deletions(-) create mode 100644 qa/L0_metrics/version_models/input_all_optional/1/model.py create mode 100644 qa/L0_metrics/version_models/input_all_optional/2/model.py create mode 100644 qa/L0_metrics/version_models/input_all_optional/config.pbtxt diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index f32739963d..7f135cb344 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -27,6 +27,7 @@ import os import re +import time import unittest import requests @@ -39,21 +40,60 @@ def get_model_load_times(): r = requests.get(f"http://{_tritonserver_ipaddr}:8002/metrics") r.raise_for_status() pattern = re.compile(rf'{MODEL_LOAD_TIME}"(.*?)".*?\ (\d+\.\d+)') - model_load_times = {} - matches = pattern.findall(r.text) - for match in matches: - model_name, load_time = match - model_load_times[model_name] = float(load_time) - return model_load_times + # Initialize an empty dictionary to store the data + model_data = {} + lines = r.text.strip().split("\n") + for line in lines: + # Use regex to extract model name, version, and load time + match = re.match( + r"nv_model_load_duration_secs\{model=\"(.*?)\",version=\"(.*?)\"\} (.*)", + line, + ) + if match: + model_name = match.group(1) + model_version = match.group(2) + load_time = float(match.group(3)) + # Store in dictionary + if model_name not in model_data: + model_data[model_name] = {} + model_data[model_name][model_version] = load_time + return model_data + + +def load_model_explicit(model_name, server_url="http://localhost:8000"): + endpoint = f"{server_url}/v2/repository/models/{model_name}/load" + response = requests.post(endpoint) + + if response.status_code == 200: + print(f"Model '{model_name}' loaded successfully.") + else: + print( + f"Failed to load model '{model_name}'. Status code: {response.status_code}" + ) + print("Response:", response.text) + + +def unload_model_explicit(model_name, server_url="http://localhost:8000"): + endpoint = f"{server_url}/v2/repository/models/{model_name}/unload" + response = requests.post(endpoint) + + if response.status_code == 200: + print(f"Model '{model_name}' unloaded successfully.") + else: + print( + f"Failed to load model '{model_name}'. Status code: {response.status_code}" + ) + print("Response:", response.text) class TestGeneralMetrics(unittest.TestCase): def setUp(self): self.model_name = "libtorch_float32_float32_float32" + self.model_name_multiple_versions = "input_all_optional" def test_metrics_load_time(self): model_load_times = get_model_load_times() - load_time = model_load_times.get(self.model_name) + load_time = model_load_times.get(self.model_name, {}).get("1") self.assertIsNotNone(load_time, "Model Load time not found") @@ -62,7 +102,7 @@ def test_metrics_load_time(self): def test_metrics_load_time_explicit_load(self): model_load_times = get_model_load_times() - load_time = model_load_times.get(self.model_name) + load_time = model_load_times.get(self.model_name, {}).get("1") self.assertIsNotNone(load_time, "Model Load time not found") @@ -71,10 +111,58 @@ def test_metrics_load_time_explicit_load(self): def test_metrics_load_time_explicit_unload(self): model_load_times = get_model_load_times() - load_time = model_load_times.get(self.model_name) - + load_time = model_load_times.get(self.model_name, {}).get("1") self.assertIsNone(load_time, "Model Load time found even after unload") + def test_metrics_load_time_multiple_version_reload(self): + # Part 1 load multiple versions of the same model and check if slow and fast models reflect the metric correctly + load_model_explicit(self.model_name_multiple_versions) + model_load_times = get_model_load_times() + load_time_slow = model_load_times.get( + self.model_name_multiple_versions, {} + ).get("1") + load_time_fast = model_load_times.get( + self.model_name_multiple_versions, {} + ).get("2") + # Fail the test if load_time_slow is less than load_time_fast + self.assertGreaterEqual( + load_time_slow, + load_time_fast, + "Slow load time should be greater than or equal to fast load time", + ) + # Fail the test if load_time_slow is less than 10 seconds as manual delay is 10 seconds + self.assertGreaterEqual( + load_time_slow, + 10, + "Slow load time should be greater than or equal to fast load time", + ) + + # Part 2 load multiple versions AGAIN and compare with prev values expect to be the same + # as triton does not actually load the model again. + load_model_explicit(self.model_name_multiple_versions) + model_load_times_new = get_model_load_times() + load_time_slow_new = model_load_times_new.get( + self.model_name_multiple_versions, {} + ).get("1") + load_time_fast_new = model_load_times_new.get( + self.model_name_multiple_versions, {} + ).get("2") + self.assertEqual(load_time_fast_new, load_time_fast) + self.assertEqual(load_time_slow_new, load_time_slow) + + # Part 3 unload the model and expect the metrics to go away as model is not loaded now + unload_model_explicit(self.model_name_multiple_versions) + time.sleep(1) + model_load_times_new = get_model_load_times() + load_time_slow_new = model_load_times_new.get( + self.model_name_multiple_versions, {} + ).get("1") + load_time_fast_new = model_load_times_new.get( + self.model_name_multiple_versions, {} + ).get("2") + self.assertIsNone(load_time_slow_new, "Model Load time found even after unload") + self.assertIsNone(load_time_fast_new, "Model Load time found even after unload") + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index 1cb85f4004..4ea1971f5c 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -160,6 +160,18 @@ code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/ python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time_explicit_unload.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time_explicit_unload >> $CLIENT_LOG 2>&1 kill_server +# Test 4 for explicit mode LOAD and UNLOAD with multiple versions +set +e +CLIENT_PY="./general_metrics_test.py" +CLIENT_LOG="general_metrics_test_client.log" +SERVER_LOG="general_metrics_test_server.log" +VERSION_DIR="${PWD}/version_models" +SERVER_ARGS="$BASE_SERVER_ARGS --model-repository=${VERSION_DIR} --model-control-mode=explicit --log-verbose=1" +run_and_check_server +python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time_multiple_version_reload.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time_multiple_version_reload >> $CLIENT_LOG 2>&1 + +kill_server + ### Pinned memory metrics tests set +e CLIENT_PY="./pinned_memory_metrics_test.py" diff --git a/qa/L0_metrics/version_models/input_all_optional/1/model.py b/qa/L0_metrics/version_models/input_all_optional/1/model.py new file mode 100644 index 0000000000..fecf42b66f --- /dev/null +++ b/qa/L0_metrics/version_models/input_all_optional/1/model.py @@ -0,0 +1,49 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json + +import time +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + def initialize(self, args): + time.sleep(10) + self.model_config = json.loads(args["model_config"]) + + def execute(self, requests): + """This function is called on inference request.""" + + responses = [] + for _ in requests: + # Include one of each specially parsed JSON value: nan, inf, and -inf + out_0 = np.array([1], dtype=np.float32) + out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0) + responses.append(pb_utils.InferenceResponse([out_tensor_0])) + + return responses diff --git a/qa/L0_metrics/version_models/input_all_optional/2/model.py b/qa/L0_metrics/version_models/input_all_optional/2/model.py new file mode 100644 index 0000000000..40f8b25579 --- /dev/null +++ b/qa/L0_metrics/version_models/input_all_optional/2/model.py @@ -0,0 +1,47 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json + +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + def initialize(self, args): + self.model_config = json.loads(args["model_config"]) + + def execute(self, requests): + """This function is called on inference request.""" + + responses = [] + for _ in requests: + # Include one of each specially parsed JSON value: nan, inf, and -inf + out_0 = np.array([1], dtype=np.float32) + out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0) + responses.append(pb_utils.InferenceResponse([out_tensor_0])) + + return responses diff --git a/qa/L0_metrics/version_models/input_all_optional/config.pbtxt b/qa/L0_metrics/version_models/input_all_optional/config.pbtxt new file mode 100644 index 0000000000..e3653342b4 --- /dev/null +++ b/qa/L0_metrics/version_models/input_all_optional/config.pbtxt @@ -0,0 +1,59 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "input_all_optional" +backend: "python" +input [ + { + name: "INPUT0" + data_type: TYPE_FP32 + dims: [ -1 ] + optional: true + }, + { + name: "INPUT1" + data_type: TYPE_FP32 + dims: [ -1 ] + optional: true + }, + { + name: "INPUT2" + data_type: TYPE_FP32 + dims: [ -1 ] + optional: true + } +] + +output [ + { + name: "OUTPUT0" + data_type: TYPE_FP32 + dims: [ 1 ] + } +] + +instance_group [{ kind: KIND_CPU }] +version_policy: { all { }} From 9f3f57703d1aaba60521c6855135eaae3d1626e7 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Sat, 19 Oct 2024 01:48:45 -0700 Subject: [PATCH 07/11] Pre-Commit Fix --- qa/L0_metrics/version_models/input_all_optional/1/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_metrics/version_models/input_all_optional/1/model.py b/qa/L0_metrics/version_models/input_all_optional/1/model.py index fecf42b66f..86cd368fe0 100644 --- a/qa/L0_metrics/version_models/input_all_optional/1/model.py +++ b/qa/L0_metrics/version_models/input_all_optional/1/model.py @@ -25,8 +25,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import json - import time + import numpy as np import triton_python_backend_utils as pb_utils From f74507387775f3824e698d55bbf15ccb9e54baf6 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Mon, 21 Oct 2024 17:53:31 -0700 Subject: [PATCH 08/11] Comments fixed --- qa/L0_metrics/general_metrics_test.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index 7f135cb344..f0002def65 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -39,7 +39,6 @@ def get_model_load_times(): r = requests.get(f"http://{_tritonserver_ipaddr}:8002/metrics") r.raise_for_status() - pattern = re.compile(rf'{MODEL_LOAD_TIME}"(.*?)".*?\ (\d+\.\d+)') # Initialize an empty dictionary to store the data model_data = {} lines = r.text.strip().split("\n") @@ -115,6 +114,11 @@ def test_metrics_load_time_explicit_unload(self): self.assertIsNone(load_time, "Model Load time found even after unload") def test_metrics_load_time_multiple_version_reload(self): + # Part 0 check start condistion, metric should not be present + model_load_times = get_model_load_times() + load_time = model_load_times.get(self.model_name, {}).get("1") + self.assertIsNone(load_time, "Model Load time found even before model load") + # Part 1 load multiple versions of the same model and check if slow and fast models reflect the metric correctly load_model_explicit(self.model_name_multiple_versions) model_load_times = get_model_load_times() @@ -136,6 +140,12 @@ def test_metrics_load_time_multiple_version_reload(self): 10, "Slow load time should be greater than or equal to fast load time", ) + # Fail the test if load_time_fast is greater than generous 2 seconds + self.assertLess( + load_time_fast, + 2, + "Model taking too much time to load", + ) # Part 2 load multiple versions AGAIN and compare with prev values expect to be the same # as triton does not actually load the model again. From f07f5efb2084cac624fa30c9b033b9aae83123c5 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Mon, 4 Nov 2024 15:47:01 -0800 Subject: [PATCH 09/11] Review Comments Fixed --- qa/L0_metrics/general_metrics_test.py | 19 ++++++++----------- qa/L0_metrics/test.sh | 12 +++++------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index f0002def65..e2cbb74f63 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -62,25 +62,22 @@ def get_model_load_times(): def load_model_explicit(model_name, server_url="http://localhost:8000"): endpoint = f"{server_url}/v2/repository/models/{model_name}/load" response = requests.post(endpoint) - - if response.status_code == 200: + try: + self.assertEqual(response.status_code, 200) print(f"Model '{model_name}' loaded successfully.") - else: - print( - f"Failed to load model '{model_name}'. Status code: {response.status_code}" - ) + except AssertionError: + print(f"Failed to load model '{model_name}'. Status code: {response.status_code}") print("Response:", response.text) - def unload_model_explicit(model_name, server_url="http://localhost:8000"): endpoint = f"{server_url}/v2/repository/models/{model_name}/unload" response = requests.post(endpoint) - - if response.status_code == 200: + try: + self.assertEqual(response.status_code, 200) print(f"Model '{model_name}' unloaded successfully.") - else: + except AssertionError: print( - f"Failed to load model '{model_name}'. Status code: {response.status_code}" + f"Failed to unload model '{model_name}'. Status code: {response.status_code}" ) print("Response:", response.text) diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index 4ea1971f5c..c12814abc7 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -143,34 +143,32 @@ run_and_check_server # Test 1 for default model control mode (all models loaded at startup) python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time >> $CLIENT_LOG 2>&1 kill_server +set -e set +e -CLIENT_PY="./general_metrics_test.py" -CLIENT_LOG="general_metrics_test_client.log" -SERVER_LOG="general_metrics_test_server.log" SERVER_ARGS="$BASE_SERVER_ARGS --model-control-mode=explicit --log-verbose=1" run_and_check_server MODEL_NAME='libtorch_float32_float32_float32' -code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load` +curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load # Test 2 for explicit mode LOAD python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time_explicit_load.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time_explicit_load >> $CLIENT_LOG 2>&1 -code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/unload` +curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/unload # Test 3 for explicit mode UNLOAD python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time_explicit_unload.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time_explicit_unload >> $CLIENT_LOG 2>&1 kill_server +set -e # Test 4 for explicit mode LOAD and UNLOAD with multiple versions set +e CLIENT_PY="./general_metrics_test.py" -CLIENT_LOG="general_metrics_test_client.log" -SERVER_LOG="general_metrics_test_server.log" VERSION_DIR="${PWD}/version_models" SERVER_ARGS="$BASE_SERVER_ARGS --model-repository=${VERSION_DIR} --model-control-mode=explicit --log-verbose=1" run_and_check_server python3 -m pytest --junitxml="general_metrics_test.test_metrics_load_time_multiple_version_reload.report.xml" $CLIENT_PY::TestGeneralMetrics::test_metrics_load_time_multiple_version_reload >> $CLIENT_LOG 2>&1 kill_server +set -e ### Pinned memory metrics tests set +e From b752a5be60f33764d083897bca179f438dacd172 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Mon, 4 Nov 2024 15:49:29 -0800 Subject: [PATCH 10/11] Pre-Commit Fix --- qa/L0_metrics/general_metrics_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qa/L0_metrics/general_metrics_test.py b/qa/L0_metrics/general_metrics_test.py index e2cbb74f63..7877e34332 100644 --- a/qa/L0_metrics/general_metrics_test.py +++ b/qa/L0_metrics/general_metrics_test.py @@ -66,9 +66,12 @@ def load_model_explicit(model_name, server_url="http://localhost:8000"): self.assertEqual(response.status_code, 200) print(f"Model '{model_name}' loaded successfully.") except AssertionError: - print(f"Failed to load model '{model_name}'. Status code: {response.status_code}") + print( + f"Failed to load model '{model_name}'. Status code: {response.status_code}" + ) print("Response:", response.text) + def unload_model_explicit(model_name, server_url="http://localhost:8000"): endpoint = f"{server_url}/v2/repository/models/{model_name}/unload" response = requests.post(endpoint) From 9329e5517385cfa156f6b00a11210233b54259f4 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Wed, 6 Nov 2024 11:59:24 -0800 Subject: [PATCH 11/11] Extra assignment removed --- qa/L0_metrics/test.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index c8fffd7a38..92b74036cf 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -161,7 +161,6 @@ set -e # Test 4 for explicit mode LOAD and UNLOAD with multiple versions set +e -CLIENT_PY="./general_metrics_test.py" VERSION_DIR="${PWD}/version_models" SERVER_ARGS="$BASE_SERVER_ARGS --model-repository=${VERSION_DIR} --model-control-mode=explicit --log-verbose=1" run_and_check_server