Skip to content

Commit

Permalink
more variables
Browse files Browse the repository at this point in the history
Signed-off-by: Terry Kong <terryk@nvidia.com>
  • Loading branch information
terrykong committed Sep 20, 2024
1 parent b05591a commit a4c6092
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
set -x
ALL_REDUCE_THRESHOLD_BYTES=3221225472
ALL_GATHER_THRESHOLD_BYTES=3221225472
REDUCE_SCATTER_THRESHOLD_BYTES=402653184
export XLA_FLAGS="\
--xla_gpu_enable_latency_hiding_scheduler=true \
--xla_allow_excess_precision \
--xla_gpu_enable_highest_priority_async_stream=true \
--xla_gpu_enable_triton_softmax_fusion=false \
--xla_gpu_all_reduce_combine_threshold_bytes=3221225472 \
--xla_gpu_all_reduce_combine_threshold_bytes=${ALL_REDUCE_THRESHOLD_BYTES} \
--xla_gpu_graph_level=0 \
--xla_gpu_all_gather_combine_threshold_bytes=3221225472 \
--xla_gpu_reduce_scatter_combine_threshold_bytes=402653184 \
--xla_gpu_all_gather_combine_threshold_bytes=${ALL_GATHER_THRESHOLD_BYTES} \
--xla_gpu_reduce_scatter_combine_threshold_bytes=${REDUCE_SCATTER_THRESHOLD_BYTES} \
--xla_gpu_enable_pipelined_all_gather=true \
--xla_gpu_enable_pipelined_reduce_scatter=true \
--xla_gpu_enable_pipelined_all_reduce=true \
Expand All @@ -18,4 +21,5 @@ export XLA_FLAGS="\
--xla_gpu_enable_custom_fusions=true
"
export XLA_PYTHON_CLIENT_MEM_FRACTION=0.9
unset ALL_REDUCE_THRESHOLD_BYTES ALL_GATHER_THRESHOLD_BYTES REDUCE_SCATTER_THRESHOLD_BYTES
set +x

0 comments on commit a4c6092

Please sign in to comment.