Skip to content

Commit

Permalink
introduce threshold bytes
Browse files Browse the repository at this point in the history
Signed-off-by: Terry Kong <terryk@nvidia.com>
  • Loading branch information
terrykong committed Sep 20, 2024
1 parent 067c484 commit b05591a
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 3 additions & 1 deletion rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
set -x
THRESHOLD_BYTES=33554432
export XLA_FLAGS="\
--xla_gpu_enable_latency_hiding_scheduler=true \
--xla_allow_excess_precision \
--xla_gpu_enable_highest_priority_async_stream=true \
--xla_gpu_enable_triton_softmax_fusion=false \
--xla_gpu_all_reduce_combine_threshold_bytes=33554432 \
--xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
--xla_gpu_graph_level=0 \
--xla_gpu_enable_cudnn_fmha=false \
"
export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8
unset THRESHOLD_BYTES
set +x
4 changes: 3 additions & 1 deletion rosetta/rosetta/projects/pax/xla_flags/gpt-5b.env
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
set -x
THRESHOLD_BYTES=51200
export XLA_FLAGS="\
--xla_gpu_enable_latency_hiding_scheduler=true \
--xla_allow_excess_precision \
--xla_gpu_enable_highest_priority_async_stream=true \
--xla_gpu_enable_triton_softmax_fusion=false \
--xla_gpu_all_reduce_combine_threshold_bytes=51200 \
--xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
--xla_gpu_graph_level=0 \
"
export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8
unset THRESHOLD_BYTES
set +x

0 comments on commit b05591a

Please sign in to comment.