From 6590b60cee6ca11a00e7477f2272abb296230f20 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 07:58:03 +0000 Subject: [PATCH 01/13] Added performance values for L4/T4 R50 --- .../reproduce-mlperf-inference-nvidia/_cm.yaml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index dbe001b947..31e0ba0e56 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -900,9 +900,15 @@ variations: l4,resnet50,offline,run_harness: add_deps_recursive: build-engine: - tags: _batch_size.16 + tags: _batch_size.32 + default_env: + CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 10500 + CM_MLPERF_LOADGEN_SERVER_TARGET_QPS: 9000 + CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY: 0.35 + CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 1 + env: - CM_MODEL_BATCH_SIZE: "16" + CM_MODEL_BATCH_SIZE: "32" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "1" CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' @@ -956,6 +962,13 @@ variations: add_deps_recursive: build-engine: tags: _batch_size.256 + + default_env: + CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 4900 + CM_MLPERF_LOADGEN_SERVER_TARGET_QPS: 4000 + CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY: 0.6 + CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 2 + env: CM_MODEL_BATCH_SIZE: "256" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" From 8231f8e3b121fab11d5bd18cd6d3761cbc57c666 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 08:00:25 +0000 Subject: [PATCH 02/13] Recalibrate only for retinanet --- cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 31e0ba0e56..90f7caad69 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -479,9 +479,9 @@ variations: - dlrm-99.9 - tags: reproduce,mlperf,inference,nvidia,harness,_calibrate inherit_variation_tags: true - skip_if_env: + enable_if_env: CM_MODEL: - - rnnt + - retinanet force_cache: true skip_inherit_variation_groups: - run-mode From f3fb81b83a3b1597b9c47199f0c94035d4720ff3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 21 Jul 2023 14:19:33 +0100 Subject: [PATCH 03/13] Make use of CM_MLPERF_PERFORMANCE_SAMPLE_COUNT consistent --- cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml | 2 +- cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 90f7caad69..298c332e78 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -40,7 +40,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF devices: CM_MLPERF_NVIDIA_HARNESS_DEVICES diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index 026b9f0734..ef9f3c9fe1 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -240,7 +240,7 @@ def preprocess(i): if input_format: run_config += f" --input_format={input_format}" - performance_sample_count = env.get('CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT') + performance_sample_count = env.get('CM_MLPERF_PERFORMANCE_SAMPLE_COUNT') if performance_sample_count: run_config += f" --performance_sample_count={performance_sample_count}" From 13bc576a7379b2835394d8e9d2857cddc5c47366 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 23 Jul 2023 09:13:56 +0100 Subject: [PATCH 04/13] Improve console output for install-cuda --- cm-mlops/script/install-cuda-prebuilt/customize.py | 6 +++--- cm-mlops/script/install-cuda-prebuilt/run.sh | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cm-mlops/script/install-cuda-prebuilt/customize.py b/cm-mlops/script/install-cuda-prebuilt/customize.py index c5dcbc99ff..b9fd4586a5 100644 --- a/cm-mlops/script/install-cuda-prebuilt/customize.py +++ b/cm-mlops/script/install-cuda-prebuilt/customize.py @@ -17,10 +17,10 @@ def preprocess(i): env['WGET_URL']="https://developer.download.nvidia.com/compute/cuda/"+env['CM_VERSION']+"/local_installers/"+env['CM_CUDA_LINUX_FILENAME'] + extra_options = env.get('CUDA_ADDITIONAL_INSTALL_OPTIONS', '') if env.get('CM_CUDA_INSTALL_DRIVER','') == "yes": - env['CUDA_ADDITIONAL_INSTALL_OPTIONS'] = " --driver" - else: - env['CUDA_ADDITIONAL_INSTALL_OPTIONS'] = "" + extra_options += " --driver" + env['CUDA_ADDITIONAL_INSTALL_OPTIONS'] = extra_options env['CM_CUDA_INSTALLED_PATH'] = os.path.join(os.getcwd(), 'install') env['CM_NVCC_BIN_WITH_PATH'] = os.path.join(os.getcwd(), 'install', 'bin', nvcc_bin) diff --git a/cm-mlops/script/install-cuda-prebuilt/run.sh b/cm-mlops/script/install-cuda-prebuilt/run.sh index 5db1057696..74bf85693a 100644 --- a/cm-mlops/script/install-cuda-prebuilt/run.sh +++ b/cm-mlops/script/install-cuda-prebuilt/run.sh @@ -4,5 +4,7 @@ CUR=${PWD} INSTALL_DIR=${CUR}/install -${CM_SUDO} bash ${CM_CUDA_RUN_FILE_PATH} --toolkitpath=${INSTALL_DIR} --defaultroot=${INSTALL_DIR} --toolkit ${CUDA_ADDITIONAL_INSTALL_OPTIONS} --silent +cmd="${CM_SUDO} bash ${CM_CUDA_RUN_FILE_PATH} --toolkitpath=${INSTALL_DIR} --defaultroot=${INSTALL_DIR} --toolkit ${CUDA_ADDITIONAL_INSTALL_OPTIONS} --silent " +echo "${cmd}" +eval "${cmd}" From ebd4d6499b067095035e182cab1c8298d52db375 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 09:43:41 +0000 Subject: [PATCH 05/13] Fix for MS target_latency --- .../_cm.yaml | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 298c332e78..7f697fb7d3 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -897,16 +897,18 @@ variations: env: CM_NVIDIA_CUSTOM_GPU: "yes" - l4,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.32 + l4,resnet50: default_env: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 10500 CM_MLPERF_LOADGEN_SERVER_TARGET_QPS: 9000 CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY: 0.35 CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 1 + l4,resnet50,offline,run_harness: + add_deps_recursive: + build-engine: + tags: _batch_size.32 + env: CM_MODEL_BATCH_SIZE: "32" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" @@ -958,17 +960,18 @@ variations: env: CM_NVIDIA_CUSTOM_GPU: "yes" - t4,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - + t4,resnet50: default_env: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 4900 CM_MLPERF_LOADGEN_SERVER_TARGET_QPS: 4000 CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY: 0.6 CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 2 + t4,resnet50,offline,run_harness: + add_deps_recursive: + build-engine: + tags: _batch_size.256 + env: CM_MODEL_BATCH_SIZE: "256" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" From 8ad5bce38b2407852a276f3b81cdc649bfc9d92d Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 09:52:37 +0000 Subject: [PATCH 06/13] Skip accuracy generation for 3d-unet nvidia --- cm-mlops/script/app-mlperf-inference/_cm.yaml | 3 +++ .../script/generate-mlperf-inference-user-conf/customize.py | 2 +- cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index c2941e9c85..b9c6872605 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -327,6 +327,9 @@ variations: - all CM_MLPERF_ACCURACY_RESULTS_DIR: - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia-original names: - mlperf-accuracy-script - 3d-unet-accuracy-script diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py index 919775192e..9a06eef488 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py @@ -260,7 +260,7 @@ def preprocess(i): print("Output Dir: '" + OUTPUT_DIR + "'") print(user_conf) - if env.get('CM_MLPERF_POWER','') == "yes" and os.path.exists(env['CM_MLPERF_POWER_LOG_DIR']): + if env.get('CM_MLPERF_POWER','') == "yes" and os.path.exists(env.get('CM_MLPERF_POWER_LOG_DIR', '')): shutil.rmtree(env['CM_MLPERF_POWER_LOG_DIR']) else: print("Run files exist, skipping run...\n") diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 7f697fb7d3..3a7ad098ba 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -30,6 +30,7 @@ default_env: SKIP_POLICIES: '1' CM_SKIP_PREPROCESS_DATASET: 'no' CM_SKIP_MODEL_DOWNLOAD: 'no' + CM_SKIP_MLPERF_RUN: 'yes' CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: nvidia_original CM_CALL_RUNNER: 'yes' @@ -250,6 +251,7 @@ post_deps: - names: - runner + - mlperf-runner skip_if_env: CM_MLPERF_SKIP_RUN: - yes @@ -557,6 +559,7 @@ variations: env: CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: run_harness MLPERF_NVIDIA_RUN_COMMAND: run_harness + CM_SKIP_MLPERF_RUN: 'no' new_env_keys: - CM_MLPERF_* - CM_DATASET_* From 03a9ecc994e5dfe3292e39c0c537f1386cf8bba6 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 10:00:55 +0000 Subject: [PATCH 07/13] No accuracy check for retinanet nvidia --- cm-mlops/script/app-mlperf-inference/_cm.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index b9c6872605..c41044dafd 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -290,6 +290,9 @@ variations: - all CM_MLPERF_ACCURACY_RESULTS_DIR: - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia-original names: - mlperf-accuracy-script - openimages-accuracy-script From 18d0ec97088a769c5c64a3c77f4468c94331004d Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 12:12:35 +0000 Subject: [PATCH 08/13] Fixes to nvidia-harness --- .../script/reproduce-mlperf-inference-nvidia/_cm.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 3a7ad098ba..93d9be6b09 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -295,6 +295,7 @@ variations: group: model env: CM_MODEL: retinanet + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/6617981/files/resnext50_32x4d_fpn.pth" deps: - tags: get,generic-python-lib,_Pillow - tags: get,generic-python-lib,_torch @@ -312,6 +313,7 @@ variations: - bert_ env: CM_MODEL: bert-99 + CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/3750364/files/bert_large_v1_1_fake_quant.onnx" bert-99.9: group: model @@ -319,10 +321,13 @@ variations: - bert_ env: CM_MODEL: bert-99.9 + CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/3733910/files/model.onnx" 3d-unet_: deps: - tags: get,generic-python-lib,_transformers + - tags: get,generic-python-lib,_pandas + version_max: "1.5.3" 3d-unet-99: group: model @@ -330,6 +335,7 @@ variations: - 3d-unet_ env: CM_MODEL: 3d-unet-99 + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/5597155/files/3dunet_kits19_128x128x128.onnx" 3d-unet-99.9: group: model @@ -337,11 +343,13 @@ variations: - 3d-unet_ env: CM_MODEL: 3d-unet-99.9 + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/5597155/files/3dunet_kits19_128x128x128.onnx" rnnt: group: model env: CM_MODEL: rnnt + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt" deps: - tags: get,generic-python-lib,_toml - tags: get,generic-python-lib,_torchvision From e8ed3d30622fa959901eff1604985e4821dad3e1 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 13:14:41 +0000 Subject: [PATCH 09/13] Fix power runs for nvidia-harness --- .../reproduce-mlperf-inference-nvidia/_cm.yaml | 12 ++++++------ .../reproduce-mlperf-inference-nvidia/customize.py | 7 +++---- .../script/reproduce-mlperf-inference-nvidia/run.sh | 8 ++++++++ 3 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 cm-mlops/script/reproduce-mlperf-inference-nvidia/run.sh diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 93d9be6b09..67771c5141 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -30,9 +30,10 @@ default_env: SKIP_POLICIES: '1' CM_SKIP_PREPROCESS_DATASET: 'no' CM_SKIP_MODEL_DOWNLOAD: 'no' - CM_SKIP_MLPERF_RUN: 'yes' CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: nvidia_original - CM_CALL_RUNNER: 'yes' + CM_MLPERF_SKIP_RUN: 'no' +env: + CM_CALL_MLPERF_RUNNER: 'no' # Map script inputs to environment variables input_mapping: @@ -254,12 +255,11 @@ post_deps: - mlperf-runner skip_if_env: CM_MLPERF_SKIP_RUN: + - 'yes' - yes - CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: - - run_harness tags: benchmark-mlperf enable_if_env: - CM_CALL_RUNNER: + CM_CALL_MLPERF_RUNNER: - yes @@ -567,7 +567,7 @@ variations: env: CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: run_harness MLPERF_NVIDIA_RUN_COMMAND: run_harness - CM_SKIP_MLPERF_RUN: 'no' + CM_CALL_MLPERF_RUNNER: 'yes' new_env_keys: - CM_MLPERF_* - CM_DATASET_* diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index ef9f3c9fe1..e47e72ded7 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -143,7 +143,6 @@ def preprocess(i): if not os.path.exists(model_path): cmds.append(f"make download_model BENCHMARKS='{model_name}'") else: - env['CM_CALL_RUNNER'] = "no" return {'return':0} elif make_command == "preprocess_data": @@ -205,15 +204,15 @@ def preprocess(i): run_config += f" --use_triton --config_ver=triton" user_conf_path = env.get('CM_MLPERF_USER_CONF') - if user_conf_path: + if user_conf_path and make_command == "run_harness": run_config += f" --user_conf_path={user_conf_path}" mlperf_conf_path = env.get('CM_MLPERF_INFERENCE_CONF_PATH') - if mlperf_conf_path: + if mlperf_conf_path and make_command == "run_harness": run_config += f" --mlperf_conf_path={mlperf_conf_path}" power_setting = env.get('CM_MLPERF_NVIDIA_HARNESS_POWER_SETTING') - if power_setting: + if power_setting and make_command == "run_harness": run_config += f" --power_setting={power_setting}" gpu_copy_streams = env.get('CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS') diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/run.sh b/cm-mlops/script/reproduce-mlperf-inference-nvidia/run.sh new file mode 100644 index 0000000000..ddcd0b5504 --- /dev/null +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/run.sh @@ -0,0 +1,8 @@ +#!/bin/bash +if [[ ${CM_CALL_MLPERF_RUNNER} == "no" ]]; then + cd ${CM_RUN_DIR} + cmd=${CM_RUN_CMD} + echo "${cmd}" + eval "${cmd}" + test $? -eq 0 || exit $? +fi From 1d314a2437fc68f428f4e2887a27cc611bea1ebf Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 14:06:46 +0000 Subject: [PATCH 10/13] Fix compliance runs for nvidia-harness with power --- .../script/reproduce-mlperf-inference-nvidia/customize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index e47e72ded7..bc55d55dd2 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -204,15 +204,15 @@ def preprocess(i): run_config += f" --use_triton --config_ver=triton" user_conf_path = env.get('CM_MLPERF_USER_CONF') - if user_conf_path and make_command == "run_harness": + if user_conf_path and env['CM_MLPERF_NVIDIA_HARNESS_RUN_MODE'] == "run_harness": run_config += f" --user_conf_path={user_conf_path}" mlperf_conf_path = env.get('CM_MLPERF_INFERENCE_CONF_PATH') - if mlperf_conf_path and make_command == "run_harness": + if mlperf_conf_path and env['CM_MLPERF_NVIDIA_HARNESS_RUN_MODE'] == "run_harness": run_config += f" --mlperf_conf_path={mlperf_conf_path}" power_setting = env.get('CM_MLPERF_NVIDIA_HARNESS_POWER_SETTING') - if power_setting and make_command == "run_harness": + if power_setting and env['CM_MLPERF_NVIDIA_HARNESS_RUN_MODE'] == "run_harness": run_config += f" --power_setting={power_setting}" gpu_copy_streams = env.get('CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS') From 6c86cdb51508b3b99dba998165d15cf24cb74987 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 15:30:34 +0000 Subject: [PATCH 11/13] Fix complaince non-determinism rn --- cm-mlops/script/app-mlperf-inference/customize.py | 1 + cm-mlops/script/app-mlperf-inference/run.sh | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 cm-mlops/script/app-mlperf-inference/run.sh diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 779c78fee4..684476f731 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -29,6 +29,7 @@ def postprocess(i): env = i['env'] inp = i['input'] + env['CMD'] = '' state = i['state'] if env.get('CM_MLPERF_USER_CONF', '') == '': diff --git a/cm-mlops/script/app-mlperf-inference/run.sh b/cm-mlops/script/app-mlperf-inference/run.sh new file mode 100644 index 0000000000..1d0c1244c7 --- /dev/null +++ b/cm-mlops/script/app-mlperf-inference/run.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +cmd="${CMD}" +if [[ -n ${cmd} ]]; then + echo "$cmd" + eval "$cmd" + test $? -eq 0 || exit $? +fi From 417d3625816faaeeac7d338ab9807fae7549c235 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sun, 23 Jul 2023 18:40:53 +0000 Subject: [PATCH 12/13] Handle high accuracy variants in nvidia-harness --- .../reproduce-mlperf-inference-nvidia/customize.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index bc55d55dd2..1cb2efb4d7 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -199,9 +199,17 @@ def preprocess(i): multistream_target_latency_ns = int(float(multistream_target_latency) * 1000000) run_config += f" --multi_stream_expected_latency_ns={multistream_target_latency_ns}" + high_accuracy = "99.9" in env['CM_MODEL'] + use_triton = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_TRITON') if use_triton: - run_config += f" --use_triton --config_ver=triton" + if high_accuracy: + run_config += f" --use_triton --config_ver=high_accuracy_triton" + else: + run_config += f" --use_triton --config_ver=triton" + else: + if high_accuracy: + run_config += f" --config_ver=high_accuracy" user_conf_path = env.get('CM_MLPERF_USER_CONF') if user_conf_path and env['CM_MLPERF_NVIDIA_HARNESS_RUN_MODE'] == "run_harness": From 92337ec8e3e4a287133b5bb2b8ba5b929daa9608 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 24 Jul 2023 09:53:22 +0100 Subject: [PATCH 13/13] Added --update option for get,git,repo --- cm-mlops/script/get-git-repo/_cm.json | 1 + 1 file changed, 1 insertion(+) diff --git a/cm-mlops/script/get-git-repo/_cm.json b/cm-mlops/script/get-git-repo/_cm.json index 6ae4258108..c36043307c 100644 --- a/cm-mlops/script/get-git-repo/_cm.json +++ b/cm-mlops/script/get-git-repo/_cm.json @@ -16,6 +16,7 @@ "depth": "CM_GIT_DEPTH", "folder": "CM_GIT_CHECKOUT_FOLDER", "patch": "CM_GIT_PATCH", + "update": "CM_GIT_REPO_PULL", "submodules": "CM_GIT_RECURSE_SUBMODULES" }, "default_variation": "default",