Skip to content

feat: Enhance CI/CD (Build, E2E, Composite Action) with 1ES Migration and Phi-3 Integration #6

feat: Enhance CI/CD (Build, E2E, Composite Action) with 1ES Migration and Phi-3 Integration

feat: Enhance CI/CD (Build, E2E, Composite Action) with 1ES Migration and Phi-3 Integration #6

name: Build and Push Preset Models 1ES
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
on:
pull_request:
branches:
- main
paths:
- 'presets/inference/**'
- 'presets/models/supported_models.yaml'
push:
branches:
- main
paths:
- 'presets/inference/**'
- 'presets/models/supported_models.yaml'
workflow_dispatch:
inputs:
force-run-all:
type: boolean
default: false
description: "Run all models for build"
env:
GO_VERSION: "1.22"
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
WEIGHTS_DIR: "/mnt/storage"
permissions:
id-token: write
contents: write
jobs:
determine-models:
runs-on: ubuntu-latest
environment: preset-env
outputs:
matrix: ${{ steps.affected_models.outputs.matrix }}
is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }}
steps:
- name: Checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: true
fetch-depth: 0
- name: Set FORCE_RUN_ALL Flag
run: echo "FORCE_RUN_ALL=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}" >> $GITHUB_ENV
# This script should output a JSON array of model names
- name: Determine Affected Models
id: affected_models
run: |
PR_BRANCH=${{ env.BRANCH_NAME }} \
FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
python3 .github/workflows/kind-cluster/determine_models.py
- name: Print Determined Models
run: |
echo "Output from affected_models: ${{ steps.affected_models.outputs.matrix }}"
- name: Check if Matrix is Empty
id: check_matrix_empty
run: |
if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] || [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then
echo "is_empty=true" >> $GITHUB_OUTPUT
else
echo "is_empty=false" >> $GITHUB_OUTPUT
fi
build-models:
needs: determine-models
if: needs.determine-models.outputs.is_matrix_empty == 'false'
runs-on: [ "self-hosted", "1ES.Pool=1es-aks-kaito-image-build-agent-pool-ubuntu" ]
environment: preset-env
strategy:
fail-fast: false
matrix:
model: ${{fromJson(needs.determine-models.outputs.matrix)}}
max-parallel: 3
steps:
- name: Checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: true
fetch-depth: 0
- name: Install Azure CLI latest
run: |
if ! which az > /dev/null; then
echo "Azure CLI not found. Installing..."
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
else
echo "Azure CLI already installed."
fi
- name: List All Disks
run: |
lsblk
if ! mountpoint -q /mnt/storage; then
echo "Failed to find required storage partition /mnt/storage"
exit 1
fi
- name: Check Available Disk Space
run: df -h
- name: Ensure Python is Installed
run: |
if ! command -v python3 &> /dev/null; then
sudo apt-get update
sudo apt-get install -y python3
fi
- name: Ensure git and git LFS is Installed
run: |
if ! command -v git &> /dev/null; then
sudo apt-get update
sudo apt-get install -y git
fi
if ! git lfs --version &> /dev/null; then
sudo apt-get update
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
sudo apt-get install -y git-lfs
git lfs install
fi
- name: Ensure kubectl is Installed
run: |
if ! command -v kubectl &> /dev/null; then
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version --client --output=yaml
fi
- name: Ensure Docker is Installed
run: |
# Add Docker's official GPG key:
sudo apt-get update
sudo apt-get install ca-certificates curl -y
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y
# User Permissions
sudo usermod -aG docker $(whoami)
sudo systemctl restart docker
sudo chmod 666 /var/run/docker.sock
- name: Test Docker Access
run: |
ls -l /var/run/docker.sock
docker run hello-world
- name: Ensure Kind is Installed
run: |
if ! command -v kind &> /dev/null; then
if [ $(uname -m) = x86_64 ]; then
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-amd64
elif [ $(uname -m) = aarch64 ]; then
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-arm64
fi
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind
fi
- name: Authenticate to ACR
run: |
az login --identity
az acr login -n ${{ secrets.PROD_1ES_ACR_USERNAME }}
- name: 'Get ACR Info'
id: acr_info
run: |
ACR_NAME="${{ secrets.PROD_1ES_ACR_USERNAME }}"
ACR_USERNAME=${{ secrets.PROD_1ES_ACR_USERNAME }}
ACR_PASSWORD=${{ secrets.PROD_1ES_ACR_PASSWORD }}
echo "ACR_NAME=$ACR_NAME" >> $GITHUB_OUTPUT
echo "ACR_USERNAME=$ACR_USERNAME" >> $GITHUB_OUTPUT
echo "ACR_PASSWORD=$ACR_PASSWORD" >> $GITHUB_OUTPUT
- name: 'Check if Image exists in Test ACR'
id: check_test_image
run: |
ACR_NAME=${{ steps.acr_info.outputs.ACR_USERNAME }}
IMAGE_NAME=staging/aks/kaito/kaito-${{ matrix.model.name }}
TAG=${{ matrix.model.tag }}
# Use '|| true' to prevent script from exiting with an error if the repository is not found
TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv || true)
if [[ -z "$TAGS" ]]; then
echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME."
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
else
if echo "$TAGS" | grep -q "^$TAG$"; then
echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT
else
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME."
fi
fi
- name: Set Permissions for DataDrive /mnt/storage
run : |
sudo chown -R $(whoami) /mnt/storage
sudo chmod -R 775 /mnt/storage
sudo apt-get update
sudo apt-get install acl -y
sudo setfacl -dm u::rwx /mnt/storage # Default user permissions
sudo setfacl -dm g::rwx /mnt/storage # Default group permissions
sudo setfacl -dm o::rx /mnt/storage # Default others permissions
- name: Check and Create Kind Cluster
run: |
if ! kind get clusters | grep -q kind; then
echo "Creating directory for etcd storage"
sudo mkdir -p /mnt/storage/etcd
echo "Creating Kind cluster using kind-1es.yaml"
kind create cluster --config .github/workflows/kind-cluster/kind-1es.yaml
else
echo "Kind cluster already exists"
fi
- name: Launch Python Script to Kickoff Build Jobs
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'false'
id: launch_script
run: |
PR_BRANCH=${{ env.BRANCH_NAME }} \
ACR_NAME=${{ steps.acr_info.outputs.ACR_NAME }} \
ACR_USERNAME=${{ steps.acr_info.outputs.ACR_USERNAME }} \
ACR_PASSWORD=${{ steps.acr_info.outputs.ACR_PASSWORD }} \
MODEL_NAME=${{ matrix.model.name }} \
MODEL_TYPE=${{matrix.model.type}} \
MODEL_VERSION=${{ matrix.model.version }} \
MODEL_RUNTIME=${{ matrix.model.runtime }} \
MODEL_TAG=${{ matrix.model.tag }} \
WEIGHTS_DIR=${{ env.WEIGHTS_DIR }} \
python3 .github/workflows/kind-cluster/main.py
# Check the exit status of the Python script
- name: Check Python Script Status
if: ${{ always() }}
run: |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "true" ]]; then
echo "Image already exists; skipping the status step."
elif [[ "${{ steps.launch_script.outcome }}" != "success" ]]; then
echo "Python script failed to execute successfully."
exit 1 # Fail the job due to script failure
else
echo "Python script executed successfully."
fi
# Cleanup Resources
- name: Cleanup
if: ${{ always() }}
run: |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "false" ]]; then
kubectl get job --no-headers -o custom-columns=":metadata.name" | grep "^docker-build-job-${{ matrix.model.name }}-[0-9]" | xargs -r kubectl delete job
fi