Skip to content

Commit

Permalink
modified the formatting of train.py args
Browse files Browse the repository at this point in the history
Signed-off-by: Md Fahim Faysal Khan <mdfahimfaysa@nvidia.com>
  • Loading branch information
kocchop committed Oct 16, 2024
1 parent 2faa912 commit 6ed686d
Showing 1 changed file with 50 additions and 16 deletions.
66 changes: 50 additions & 16 deletions .github/container/test-maxtext.sh
Original file line number Diff line number Diff line change
Expand Up @@ -245,24 +245,58 @@ RUN_NAME="logdir" ## the RUN_NAME cannot be changed
if [ -z "$DECODER_BLOCK" ]; then

# this part could be used to test different model ootb
RUN_SETTINGS="MaxText/train.py MaxText/configs/base.yml run_name=${RUN_NAME} model_name=${MODEL}\
steps=$STEPS per_device_batch_size=${BATCH_PER_GPU} remat_policy=${REMAT_POLICY} enable_checkpointing=false\
base_output_directory=$OUTPUT dataset_path=local dataset_type=synthetic hardware=$HARDWARE\
enable_goodput_recording=false monitor_goodput=false\
dcn_fsdp_parallelism=$dcn_FSDP ici_fsdp_parallelism=$ici_FSDP\
ici_data_parallelism=$ici_DP dcn_data_parallelism=$dcn_DP\
ici_tensor_parallelism=$ici_TP dcn_tensor_parallelism=1 ${ADDITIONAL_ARGS}"

RUN_SETTINGS="MaxText/train.py \
MaxText/configs/base.yml \
run_name=${RUN_NAME} \
model_name=${MODEL} \
steps=${STEPS} \
per_device_batch_size=${BATCH_PER_GPU} \
remat_policy=${REMAT_POLICY} \
enable_checkpointing=false\
base_output_directory=${OUTPUT} \
dataset_path=local \
dataset_type=synthetic \
hardware=${HARDWARE} \
enable_goodput_recording=false \
monitor_goodput=false \
dcn_fsdp_parallelism=${dcn_FSDP} \
ici_fsdp_parallelism=${ici_FSDP} \
ici_data_parallelism=${ici_DP} \
dcn_data_parallelism=${dcn_DP} \
ici_tensor_parallelism=${ici_TP} \
dcn_tensor_parallelism=1 \
${ADDITIONAL_ARGS}"
else
# this is essentially used for CI run
RUN_SETTINGS="MaxText/train.py MaxText/configs/base.yml run_name=${RUN_NAME} logits_via_embedding=true decoder_block=${DECODER_BLOCK} \
steps=$STEPS per_device_batch_size=${BATCH_PER_GPU} base_emb_dim=2560 base_mlp_dim=8192 remat_policy=${REMAT_POLICY} attention=${ATTN_TYPE}\
base_num_query_heads=8 base_num_kv_heads=8 base_num_decoder_layers=8 head_dim=128 enable_checkpointing=false\
base_output_directory=$OUTPUT dataset_path=local dataset_type=synthetic hardware=$HARDWARE\
enable_goodput_recording=false monitor_goodput=false\
dcn_fsdp_parallelism=$dcn_FSDP ici_fsdp_parallelism=$ici_FSDP\
ici_data_parallelism=$ici_DP dcn_data_parallelism=$dcn_DP\
ici_tensor_parallelism=$ici_TP dcn_tensor_parallelism=1 ${ADDITIONAL_ARGS}"
RUN_SETTINGS="MaxText/train.py \
MaxText/configs/base.yml \
run_name=${RUN_NAME} \
decoder_block=${DECODER_BLOCK} \
steps=$STEPS \
per_device_batch_size=${BATCH_PER_GPU} \
base_emb_dim=2560 \
base_mlp_dim=8192 \
remat_policy=${REMAT_POLICY} \
attention=${ATTN_TYPE} \
base_num_query_heads=8 \
base_num_kv_heads=8 \
base_num_decoder_layers=8 \
head_dim=128 \
logits_via_embedding=true \
enable_checkpointing=false \
base_output_directory=${OUTPUT} \
dataset_path=local \
dataset_type=synthetic \
hardware=${HARDWARE} \
enable_goodput_recording=false \
monitor_goodput=false \
dcn_fsdp_parallelism=${dcn_FSDP} \
ici_fsdp_parallelism=${ici_FSDP} \
ici_data_parallelism=${ici_DP} \
dcn_data_parallelism=${dcn_DP} \
ici_tensor_parallelism=${ici_TP} \
dcn_tensor_parallelism=1 \
${ADDITIONAL_ARGS}"
fi

echo "Command: python3 $RUN_SETTINGS"
Expand Down

0 comments on commit 6ed686d

Please sign in to comment.