Skip to content

Commit

Permalink
improve logs in run-script
Browse files Browse the repository at this point in the history
  • Loading branch information
gargnitingoogle committed Aug 22, 2024
1 parent 5658c8e commit ffd4c52
Showing 1 changed file with 24 additions and 20 deletions.
44 changes: 24 additions & 20 deletions perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ function printRunParameters() {

# Install dependencies.
function installDependencies() {
printf "\nInstalling dependencies ...\n\n"
# Refresh software repositories.
sudo apt-get update
# Get some common software dependencies.
Expand Down Expand Up @@ -402,25 +403,26 @@ function ensureRequiredNodePoolConfiguration() {

function enableManagedCsiDriverIfNeeded() {
if ${use_custom_csi_driver}; then
echo "Disabling csi add-on ..."
printf "\nDisabling csi add-on ...\n\n"
gcloud -q container clusters update ${cluster_name} \
--update-addons GcsFuseCsiDriver=DISABLED \
--location=${zone}
else
echo "Enabling csi add-on ..."
printf "\nEnabling csi add-on ...\n\n"
gcloud -q container clusters update ${cluster_name} \
--update-addons GcsFuseCsiDriver=ENABLED \
--location=${zone}
fi
}

function activateCluster() {
echo "Configuring cluster credentials ..."
printf "\nConfiguring cluster credentials ...\n\n"
gcloud container clusters get-credentials ${cluster_name} --location=${zone}
kubectl config current-context
}

function createKubernetesServiceAccountForCluster() {
printf "\nCreating namespace and KSA ...\n\n"
log="$(kubectl create namespace ${appnamespace} 2>&1)" || [[ "$log" == *"already exists"* ]]
log="$(kubectl create serviceaccount ${ksa} --namespace ${appnamespace} 2>&1)" || [[ "$log" == *"already exists"* ]]
kubectl config set-context --current --namespace=${appnamespace}
Expand All @@ -429,6 +431,7 @@ function createKubernetesServiceAccountForCluster() {
}

function addGCSAccessPermissions() {
printf "\nAdding storage.ObjectUser permissions to all the relevant buckets to ksa=${ksa} in namespace=${appnamespace} ...\n\n"
test -f "${workload_config}"
grep -wh '\"bucket\"' "${workload_config}" | cut -d: -f2 | cut -d, -f1 | cut -d \" -f2 | sort | uniq | grep -v ' ' | while read workload_bucket; do
gcloud storage buckets add-iam-policy-binding gs://${workload_bucket} \
Expand All @@ -438,7 +441,7 @@ function addGCSAccessPermissions() {
}

function ensureGcsfuseCode() {
echo "Ensuring we have gcsfuse code ..."
printf "\nEnsuring we have gcsfuse code ...\n\n\n"
# clone gcsfuse code if needed
if ! test -d "${gcsfuse_src_dir}"; then
cd $(dirname "${gcsfuse_src_dir}") && git clone ${gcsfuse_github_path} && cd "${gcsfuse_src_dir}" && git switch ${gcsfuse_branch} && cd - && cd -
Expand All @@ -448,7 +451,7 @@ function ensureGcsfuseCode() {
}

function ensureGcsFuseCsiDriverCode() {
echo "Ensuring we have gcs-fuse-csi-driver code ..."
printf "\nEnsuring we have gcs-fuse-csi-driver code ...\n\n"
# clone csi-driver code if needed
if ! test -d "${csi_src_dir}"; then
cd $(dirname "${csi_src_dir}") && git clone ${csi_driver_github_path} && cd "${csi_src_dir}" && git switch ${csi_driver_branch} && cd - && cd -
Expand All @@ -460,13 +463,14 @@ function createCustomCsiDriverIfNeeded() {
echo "Disabling managed CSI driver ..."
gcloud -q container clusters update ${cluster_name} --update-addons GcsFuseCsiDriver=DISABLED --location=${zone}

echo "Building custom CSI driver ..."
printf "\nCreating a new custom CSI driver ...\n\n"

# Create a bucket for storing custom-csi driver.
test -n "${package_bucket}" || export package_bucket=${USER/google/}-gcsfuse-binary-package
(gcloud storage buckets list | grep -wqo ${package_bucket}) || (region=$(echo ${zone} | rev | cut -d- -f2- | rev) && gcloud storage buckets create gs://${package_bucket} --location=${region})

# Build a new gcsfuse binary
printf "\nBuilding a new GCSFuse binary from ${gcsfuse_src_dir} ...\n\n"
cd "${gcsfuse_src_dir}"
rm -rfv ./bin ./sbin
GOOS=linux GOARCH=amd64 go run tools/build_gcsfuse/main.go . . v3
Expand All @@ -477,12 +481,13 @@ function createCustomCsiDriverIfNeeded() {
rm -rfv "${gcsfuse_src_dir}"/bin "${gcsfuse_src_dir}"/sbin
cd -

echo "Installing custom CSI driver ..."
# Build and install csi driver
ensureGcsFuseCsiDriverCode
cd "${csi_src_dir}"
make uninstall || true
printf "\nBuilding a new custom CSI driver using the above GCSFuse binary ...\n\n"
make build-image-and-push-multi-arch REGISTRY=gcr.io/${project_id}/${USER} GCSFUSE_PATH=gs://${package_bucket}
printf "\nInstalling the new custom CSI driver built above ...\n\n"
make install PROJECT=${project_id} REGISTRY=gcr.io/${project_id}/${USER}
cd -
else
Expand All @@ -493,24 +498,24 @@ function createCustomCsiDriverIfNeeded() {
}

function deleteAllHelmCharts() {
echo "Deleting all existing helm charts ..."
printf "\nDeleting all existing helm charts ...\n\n"
helm ls --namespace=${appnamespace} | tr -s '\t' ' ' | cut -d' ' -f1 | tail -n +2 | while read helmchart; do helm uninstall ${helmchart} --namespace=${appnamespace}; done
}

function deleteAllPods() {
deleteAllHelmCharts

echo "Deleting all existing pods ..."
printf "\nDeleting all existing pods ...\n\n"
kubectl get pods --namespace=${appnamespace} | tail -n +2 | cut -d' ' -f1 | while read podname; do kubectl delete pods/${podname} --namespace=${appnamespace} --grace-period=0 --force || true; done
}

function deployAllFioHelmCharts() {
echo "Deploying all fio helm charts ..."
printf "\nDeploying all fio helm charts ...\n\n"
cd "${gke_testing_dir}"/examples/fio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" && cd -
}

function deployAllDlioHelmCharts() {
echo "Deploying all dlio helm charts ..."
printf "\nDeploying all dlio helm charts ...\n\n"
cd "${gke_testing_dir}"/examples/dlio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" && cd -
}

Expand All @@ -526,7 +531,7 @@ function listAllHelmCharts() {
}

function waitTillAllPodsComplete() {
echo "Scanning and waiting till all pods either complete or fail ..."
printf "\nScanning and waiting till all pods either complete or fail ...\n\n"
while true; do
printf "Checking pods status at "$(date +%s)":\n-----------------------------------\n"
podslist="$(kubectl get pods --namespace=${appnamespace} -o wide)"
Expand All @@ -545,16 +550,15 @@ function waitTillAllPodsComplete() {
break
else
printf "${num_noncompleted_pods} pod(s) is/are still pending or running. Will check again in "${pod_wait_time_in_seconds}" seconds. Sleeping for now.\n"
printf "You can take a break too if you want and kill this command. To refer back to this run for fetching/parsing outputs, use environment variables "
printf " \" only_parse=true output_dir=${output_dir} cluster_name=${cluster_name} workload_config=${workload_config} instance_id=${instance_id} \""
printf " with ${0}\n"
printf "To ssh to any specific pod, use the following command: \n"
printf "\nYou can take a break too if you want and kill this command. To continue this run, for fetching and parsing outputs, run: \n"
printf " only_parse=true instance_id=${instance_id} project_id=${project_id} project_number=${project_number} zone=${zone} machine_type=${machine_type} appnamespace=${appnamespace} gcsfuse_src_dir=\"${gcsfuse_src_dir}\" csi_src_dir=\"${csi_src_dir}\" pod_wait_time_in_seconds=${pod_wait_time_in_seconds} workload_config=\"${workload_config}\" cluster_name=${cluster_name} output_dir=\"${output_dir}\" $0 \n"
printf "\nTo ssh to any specific pod, use the following command: \n"
printf " gcloud container clusters get-credentials ${cluster_name} --location=${zone}\n"
printf " kubectl config set-context --current --namespace=${appnamespace}\n"
printf " kubectl exec -it pods/<podname> [-c {gke-gcsfuse-sidecar|fio-tester|dlio-tester}] --namespace=${appnamespace} -- /bin/bash \n"
printf "To view cpu/memory usage of different pods/containers: \n"
printf "\nTo view cpu/memory usage of different pods/containers: \n"
printf " kubectl top pod [<podname>] --namespace=${appnamespace} [--containers] \n"
printf "To view the latest status of the pods: \n"
printf "\nTo view the latest status of the pods: \n"
printf " kubectl get pods --namespace=${appnamespace} -o wide [--watch] \n"

printf "\n\n\n"
Expand All @@ -565,14 +569,14 @@ function waitTillAllPodsComplete() {
}

function fetchAndParseFioOutputs() {
echo "Fetching and parsing fio outputs ..."
printf "\nFetching and parsing fio outputs ...\n\n"
cd "${gke_testing_dir}"/examples/fio
python3 parse_logs.py --project-number=${project_number} --workload-config "${workload_config}" --instance-id ${instance_id} --output-file "${output_dir}"/fio/output.csv --project-id=${project_id} --cluster-name=${cluster_name} --namespace-name=${appnamespace}
cd -
}

function fetchAndParseDlioOutputs() {
echo "Fetching and parsing dlio outputs ..."
printf "\nFetching and parsing dlio outputs ...\n\n"
cd "${gke_testing_dir}"/examples/dlio
python3 parse_logs.py --project-number=${project_number} --workload-config "${workload_config}" --instance-id ${instance_id} --output-file "${output_dir}"/dlio/output.csv --project-id=${project_id} --cluster-name=${cluster_name} --namespace-name=${appnamespace}
cd -
Expand Down

0 comments on commit ffd4c52

Please sign in to comment.