introduce threshold bytes

Signed-off-by: Terry Kong <terryk@nvidia.com>
NVIDIA · Sep 20, 2024 · b05591a · b05591a
1 parent 067c484
commit b05591a
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env b/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env
@@ -1,12 +1,14 @@
 set -x
+THRESHOLD_BYTES=33554432
 export XLA_FLAGS="\
     --xla_gpu_enable_latency_hiding_scheduler=true \
     --xla_allow_excess_precision \
     --xla_gpu_enable_highest_priority_async_stream=true \
     --xla_gpu_enable_triton_softmax_fusion=false \
-    --xla_gpu_all_reduce_combine_threshold_bytes=33554432 \
+    --xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
     --xla_gpu_graph_level=0 \
     --xla_gpu_enable_cudnn_fmha=false \
     "
 export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8
+unset THRESHOLD_BYTES
 set +x
diff --git a/rosetta/rosetta/projects/pax/xla_flags/gpt-5b.env b/rosetta/rosetta/projects/pax/xla_flags/gpt-5b.env
@@ -1,11 +1,13 @@
 set -x
+THRESHOLD_BYTES=51200
 export XLA_FLAGS="\
     --xla_gpu_enable_latency_hiding_scheduler=true \
     --xla_allow_excess_precision \
     --xla_gpu_enable_highest_priority_async_stream=true \
     --xla_gpu_enable_triton_softmax_fusion=false \
-    --xla_gpu_all_reduce_combine_threshold_bytes=51200 \
+    --xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
     --xla_gpu_graph_level=0 \
     "
 export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8
+unset THRESHOLD_BYTES
 set +x