diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index a9cb2cea6f71..76ae5ffdcbf6 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -48,69 +48,69 @@ jobs: - uses: actions/checkout@v4 - name: Setup docker container run: | - OS_IMAGE=ubuntu:20.04 ./tools/gluten-te/ubuntu/gha/gha-checkout/checkout.sh + OS_IMAGE=ubuntu OS_VERSION=20.04 ./tools/gluten-te/github_action/checkout.sh - name: Build Gluten velox third party run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --build_tests=ON' - name: Build Gluten CPP library run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --build_tests=ON --build_examples=ON --build_benchmarks=ON' - name: Run CPP unit test run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/cpp/build && \ ctest -V' - name: Run HBM CPP unit test run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/cpp/build && \ cmake -DBUILD_TESTS=ON -DENABLE_HBM=ON .. && \ cmake --build . --target hbw_allocator_test -- -j && \ ctest -V -R TestHbw' - name: Build and run unit test for Spark 3.2.2 (other tests) run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn test -Pspark-3.2 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' # Cpp micro benchmarks will use generated files from unit test in backends-velox module. - name: Run micro benchmarks run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build/velox/benchmarks && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/cpp/build/velox/benchmarks && \ ./generic_benchmark --with-shuffle --threads 1 --iterations 1' - name: Exit docker container if: ${{ always() }} run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/clean.sh || true + ./tools/gluten-te/github_action/clean.sh || true ubuntu2204-test: runs-on: velox-self-hosted steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - OS_IMAGE=ubuntu:22.04 ./tools/gluten-te/ubuntu/gha/gha-checkout/checkout.sh + OS_IMAGE=ubuntu OS_VERSION=22.04 ./tools/gluten-te/github_action/checkout.sh - name: Build Gluten velox third party run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build Gluten CPP library run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build for Spark 3.2.2 run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -118,7 +118,7 @@ jobs: --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=40g -s=10.0 --threads=32 --iterations=1' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh \ + ./tools/gluten-te/github_action/exec.sh \ 'mv /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-env.sh.template /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-env.sh && \ echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-env.sh && \ echo -e "celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64" > /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-defaults.conf \ @@ -132,12 +132,12 @@ jobs: && bash /opt/apache-celeborn-0.3.0-incubating-bin/sbin/stop-master.sh' - name: Build for Spark 3.3.1 run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.3 run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.3 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -145,12 +145,12 @@ jobs: --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' - name: Build for Spark 3.4.1 run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.4 run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.4 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -159,17 +159,17 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - ./tools/gluten-te/ubuntu/gha/gha-checkout/clean.sh || true + ./tools/gluten-te/github_action/clean.sh || true centos8-test: runs-on: velox-self-hosted steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - OS_IMAGE=centos:8 ./tools/gluten-te/centos/gha/gha-checkout/checkout.sh + OS_IMAGE=centos OS_VERSION=8 ./tools/gluten-te/github_action/checkout.sh - name: Build Gluten velox third party run: | - ./tools/gluten-te/centos/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' source /env.sh && \ sudo yum -y install patch && \ cd /opt/gluten/ep/build-velox/src && \ @@ -177,18 +177,18 @@ jobs: ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build Gluten CPP library run: | - ./tools/gluten-te/centos/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' source /env.sh && \ cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build for Spark 3.2.2 run: | - ./tools/gluten-te/centos/gha/gha-checkout/exec.sh ' + ./tools/gluten-te/github_action/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 run: | - ./tools/gluten-te/centos/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -196,7 +196,7 @@ jobs: --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 random kill tasks run: | - ./tools/gluten-te/centos/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --skip-data-gen --random-kill-tasks \ @@ -205,4 +205,143 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - ./tools/gluten-te/centos/gha/gha-checkout/clean.sh || true + ./tools/gluten-te/github_action/clean.sh || true + centos7-test: + runs-on: velox-self-hosted + steps: + - uses: actions/checkout@v4 + - name: Setup docker container + run: | + OS_IMAGE=centos OS_VERSION=7 ./tools/gluten-te/github_action/checkout.sh + - name: Build Gluten velox third party + run: | + ./tools/gluten-te/github_action/exec.sh ' + yum -y install epel-release centos-release-scl patch sudo && \ + cd /opt/gluten/ep/build-velox/src && \ + ./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF && \ + ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF --enable_hdfs=ON' + - name: Build Gluten CPP library + run: | + ./tools/gluten-te/github_action/exec.sh ' + cd /opt/gluten/cpp && \ + source /opt/rh/devtoolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' + - name: Build for Spark 3.2.2 + run: | + ./tools/gluten-te/github_action/exec.sh ' + cd /opt/gluten && \ + mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -DskipTests' + - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 + run: | + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + mvn clean install -Pspark-3.2 \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' + - name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off + run: | + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + mvn clean install -Pspark-3.2 \ + && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ + -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ + -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ + -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ + -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' + - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation on + run: | + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + mvn clean install -Pspark-3.2 \ + && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ + -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ + -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ + -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ + -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' || true + - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory + run: | + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ + -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ + -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ + -d=PARTIAL_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ + -d=PARTIAL_MODE:CACHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ + -d=PARTIAL_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' + - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size". + run: | + ./tools/gluten-te/github_action/exec.sh 'cd /opt/gluten/tools/gluten-it && \ + GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ + -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ + -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ + -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g' || true + - name: Exit docker container + if: ${{ always() }} + run: | + ./tools/gluten-te/github_action/clean.sh || true + + runs-on: velox-self-hosted + steps: + - uses: actions/checkout@v4 + - name: Setup docker container + run: | + docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ + -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/centos:7 \ + bash -c 'cd /opt/gluten && sleep 14400' + - name: Build Gluten CPP library + run: | + docker exec -i static-build-test-$GITHUB_RUN_ID bash -c ' + source /env.sh && \ + sudo yum -y install patch && \ + cd /opt/gluten && \ + sudo -E ./dev/vcpkg/setup-build-depends.sh && \ + source ./dev/vcpkg/env.sh && \ + ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=OFF' + - name: Build for Spark 3.2.2 + run: | + docker exec static-build-test-$GITHUB_RUN_ID bash -c ' + cd /opt/gluten && \ + mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -DskipTests && \ + cd /opt/gluten/tools/gluten-it && \ + mvn clean install -Pspark-3.2' + - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (centos 8) + run: | + docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ + -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.2.4:5000/gluten-dev/centos:8 \ + bash -c 'cd /opt/gluten/tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + && GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' + - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 20.04) + run: | + docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ + -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ + 'cd /opt/gluten/tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + && GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' + - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 22.04) + run: | + docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ + -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.2.4:5000/gluten-dev/ubuntu:22.04 \ + 'cd /opt/gluten/tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + && GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' + - name: Exit docker container + if: ${{ always() }} + run: | + docker stop static-build-test-$GITHUB_RUN_ID || true \ No newline at end of file diff --git a/tools/gluten-te/centos/buildenv.sh b/tools/gluten-te/centos/buildenv.sh index bd51192cf1f1..ab9c087e98e7 100755 --- a/tools/gluten-te/centos/buildenv.sh +++ b/tools/gluten-te/centos/buildenv.sh @@ -43,9 +43,6 @@ TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE} # Set operating system OS_IMAGE=${OS_IMAGE:-$DEFAULT_OS_IMAGE} -# Set os version -OS_VERSION=${OS_VERSION:-$DEFAULT_OS_VERSION} - # Build will result in this image DOCKER_TARGET_IMAGE_BUILDENV=${DOCKER_TARGET_IMAGE_BUILDENV:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV} @@ -65,7 +62,7 @@ BUILDENV_DOCKER_BUILD_ARGS= BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192" BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg BUILDKIT_INLINE_CACHE=1" BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg TIMEZONE=$TIMEZONE" -BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg OS_IMAGE=$OS_IMAGE --build-arg OS_VERSION=$OS_VERSION" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg OS_IMAGE=$OS_IMAGE" BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL" BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_HOST=$HTTP_PROXY_HOST" BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_PORT=$HTTP_PROXY_PORT" diff --git a/tools/gluten-te/centos/defaults.conf b/tools/gluten-te/centos/defaults.conf index 147ec4a0b75c..e26a5928c7d6 100755 --- a/tools/gluten-te/centos/defaults.conf +++ b/tools/gluten-te/centos/defaults.conf @@ -38,10 +38,10 @@ DEFAULT_HTTP_PROXY_PORT=913 DEFAULT_USE_ALI_MAVEN_MIRROR=ON # Base operator system image used in build scripts. -DEFAULT_OS_IMAGE=centos:8 +DEFAULT_OS_IMAGE= # Version ID of os image -DEFAULT_OS_VERSION=8 +DEFAULT_OS_VERSION= # Set timezone name DEFAULT_TIMEZONE=Asia/Shanghai diff --git a/tools/gluten-te/github_action/build.sh b/tools/gluten-te/github_action/build.sh new file mode 100644 index 000000000000..e70da87cf70e --- /dev/null +++ b/tools/gluten-te/github_action/build.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(dirname $0) + +source "$BASEDIR/buildenv.sh" + +## Debug build flags + +# Create debug build +DEBUG_BUILD=${DEBUG_BUILD:-$DEFAULT_DEBUG_BUILD} + +if [ -n $JDK_DEBUG_BUILD ] +then + echo "Do not set JDK_DEBUG_BUILD manually!" +fi + +if [ -n $GLUTEN_DEBUG_BUILD ] +then + echo "Do not set GLUTEN_DEBUG_BUILD manually!" +fi + +if [ "$DEBUG_BUILD" == "ON" ] +then + JDK_DEBUG_BUILD=OFF + GLUTEN_DEBUG_BUILD=ON +else + JDK_DEBUG_BUILD=OFF + GLUTEN_DEBUG_BUILD=OFF +fi + +# The branches used to prepare dependencies +CACHE_GLUTEN_REPO=${CACHE_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO} +CACHE_GLUTEN_BRANCH=${CACHE_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH} + +# Backend type +BUILD_BACKEND_TYPE=${BUILD_BACKEND_TYPE:-$DEFAULT_BUILD_BACKEND_TYPE} + +# Build will result in this image +DOCKER_TARGET_IMAGE_BUILD=${DOCKER_TARGET_IMAGE_BUILD:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILD} + +DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE:$OS_VERSION" + +## + +BUILD_DOCKER_BUILD_ARGS= + +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILDKIT_INLINE_CACHE=1" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE=$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg JDK_DEBUG_BUILD=$JDK_DEBUG_BUILD" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg GLUTEN_DEBUG_BUILD=$GLUTEN_DEBUG_BUILD" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_REPO=$CACHE_GLUTEN_REPO" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_BRANCH=$CACHE_GLUTEN_BRANCH" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILD_BACKEND_TYPE=$BUILD_BACKEND_TYPE" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-build" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --target gluten-build" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" + +if [ -n "$DOCKER_CACHE_REGISTRY" ] +then + BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" +fi + +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS $BASEDIR" + +docker build $BUILD_DOCKER_BUILD_ARGS + +if [ -n "$DOCKER_PUSH_REGISTRY" ] +then + docker tag "$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" + docker push "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" +fi + +# EOF diff --git a/tools/gluten-te/github_action/buildenv.sh b/tools/gluten-te/github_action/buildenv.sh new file mode 100644 index 000000000000..b0c5b823bf60 --- /dev/null +++ b/tools/gluten-te/github_action/buildenv.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(dirname $0) + +source "$BASEDIR/defaults.conf" + +# Enable buildkit +export DOCKER_BUILDKIT=1 +export BUILDKIT_PROGRESS=plain + +# Docker registry used to pull pre-built images to speed-up builds +DOCKER_CACHE_REGISTRY=${DOCKER_CACHE_REGISTRY:-$DEFAULT_DOCKER_CACHE_REGISTRY} + +# Docker registry to push pre-built images +DOCKER_PUSH_REGISTRY=${DOCKER_PUSH_REGISTRY:-$DEFAULT_DOCKER_PUSH_REGISTRY} + +# HTTP proxy +HTTP_PROXY_HOST=${HTTP_PROXY_HOST:-$DEFAULT_HTTP_PROXY_HOST} +HTTP_PROXY_PORT=${HTTP_PROXY_PORT:-$DEFAULT_HTTP_PROXY_PORT} + +# If on, use maven mirror settings for PRC's network environment +USE_ALI_MAVEN_MIRROR=${USE_ALI_MAVEN_MIRROR:-$DEFAULT_USE_ALI_MAVEN_MIRROR} + +# Set timezone name +TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE} + +# Set operating system +OS_IMAGE=${OS_IMAGE:-$DEFAULT_OS_IMAGE} +OS_VERSION=${OS_VERSION:-$DEFAULT_OS_VERSION} + +# Build will result in this image +DOCKER_TARGET_IMAGE_BUILDENV=${DOCKER_TARGET_IMAGE_BUILDENV:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV} + +DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILDENV-$OS_IMAGE:$OS_VERSION" + +if [ "$USE_ALI_MAVEN_MIRROR" == "ON" ] +then + MAVEN_MIRROR_URL='https://maven.aliyun.com/repository/public' +else + MAVEN_MIRROR_URL= +fi + +## + +BUILDENV_DOCKER_BUILD_ARGS= + +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg BUILDKIT_INLINE_CACHE=1" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg TIMEZONE=$TIMEZONE" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg OS_IMAGE=$OS_IMAGE --build-arg OS_VERSION=$OS_VERSION" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_HOST=$HTTP_PROXY_HOST" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_PORT=$HTTP_PROXY_PORT" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-buildenv-$OS_IMAGE" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --target gluten-buildenv" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" + +if [ -n "$DOCKER_CACHE_REGISTRY" ] +then + BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" +fi + +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS $BASEDIR" + +docker build $BUILDENV_DOCKER_BUILD_ARGS + +if [ -n "$DOCKER_PUSH_REGISTRY" ] +then + docker tag "$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" + docker push "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" +fi + +# EOF diff --git a/tools/gluten-te/github_action/cbash-build.sh b/tools/gluten-te/github_action/cbash-build.sh new file mode 100644 index 000000000000..b47f6635c839 --- /dev/null +++ b/tools/gluten-te/github_action/cbash-build.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(dirname $0) + +source "$BASEDIR/build.sh" + +# Non-interactive during docker run +NON_INTERACTIVE=${NON_INTERACTIVE:-$DEFAULT_NON_INTERACTIVE} + +# Do not remove stopped docker container +PRESERVE_CONTAINER=${PRESERVE_CONTAINER:-$DEFAULT_PRESERVE_CONTAINER} + +# Docker options +EXTRA_DOCKER_OPTIONS=${EXTRA_DOCKER_OPTIONS:-$DEFAULT_EXTRA_DOCKER_OPTIONS} + +# Whether to mount Maven cache +MOUNT_MAVEN_CACHE=${MOUNT_MAVEN_CACHE:-$DEFAULT_MOUNT_MAVEN_CACHE} + +CBASH_DOCKER_RUN_ARGS= +if [ "$NON_INTERACTIVE" != "ON" ] +then + CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -it" +fi +if [ "$PRESERVE_CONTAINER" != "ON" ] +then + CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --rm" +fi +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --init" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --privileged" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --ulimit nofile=65536:65536" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --ulimit core=-1" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --security-opt seccomp=unconfined" +if [ "$MOUNT_MAVEN_CACHE" == "ON" ] +then + CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -v $HOME/.m2/repository:/root/.m2/repository" +fi +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -v $HOME/.ccache:/root/.ccache" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS $EXTRA_DOCKER_OPTIONS" + +CBASH_BASH_ARGS="$*" +BASH_ARGS="$CBASH_BASH_ARGS" + +docker run $CBASH_DOCKER_RUN_ARGS $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE bash -c "cd /opt/gluten && $BASH_ARGS" diff --git a/tools/gluten-te/github_action/checkout.sh b/tools/gluten-te/github_action/checkout.sh new file mode 100644 index 000000000000..62788ec3fa86 --- /dev/null +++ b/tools/gluten-te/github_action/checkout.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(readlink -f $(dirname $0)) + +source "$BASEDIR/defaults.conf" + +if [ -z "$GITHUB_RUN_ID" ] +then + echo "Unable to parse GITHUB_RUN_ID." + exit 1 +fi + +export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts" +export NON_INTERACTIVE=ON + +$BASEDIR/cbash-build.sh 'sleep 14400' + +# The target branches +TARGET_GLUTEN_REPO=${TARGET_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO} +FALLBACK_GLUTEN_BRANCH=${FALLBACK_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH} +FALLBACK_GLUTEN_COMMIT="$(git ls-remote $TARGET_GLUTEN_REPO $FALLBACK_GLUTEN_BRANCH | awk '{print $1;}')" + +TARGET_GLUTEN_COMMIT="${GITHUB_SHA:-$FALLBACK_GLUTEN_COMMIT}" + +$BASEDIR/exec.sh "/opt/scripts/init.sh $TARGET_GLUTEN_REPO $TARGET_GLUTEN_COMMIT" diff --git a/tools/gluten-te/github_action/clean.sh b/tools/gluten-te/github_action/clean.sh new file mode 100644 index 000000000000..29a97351f517 --- /dev/null +++ b/tools/gluten-te/github_action/clean.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +if [ -z "$GITHUB_RUN_ID" ] +then + echo "Unable to parse GITHUB_RUN_ID." + exit 1 +fi + +docker stop gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID || true diff --git a/tools/gluten-te/github_action/defaults.conf b/tools/gluten-te/github_action/defaults.conf new file mode 100644 index 000000000000..9c225402be8f --- /dev/null +++ b/tools/gluten-te/github_action/defaults.conf @@ -0,0 +1,99 @@ +#!/bin/bash + +set -ex + +## For basic scripts + +# Non-interactive during docker run +DEFAULT_NON_INTERACTIVE=OFF + +# Do not remove stopped docker container +DEFAULT_PRESERVE_CONTAINER=OFF + +# The codes will be used in build +# for 10.0.0.25 test at PHILO-HE/gluten +DEFAULT_GLUTEN_REPO=https://github.com/PHILO-HE/gluten.git +DEFAULT_GLUTEN_BRANCH=main + +# Create debug build +DEFAULT_DEBUG_BUILD=OFF + +# Backend type (velox) +DEFAULT_BUILD_BACKEND_TYPE=velox + +# HTTP proxy +# If http proxy is http://child-prc.intel.com:913, write it as +# DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +# DEFAULT_HTTP_PROXY_PORT=913 +# do not write http:// +# DEFAULT_HTTP_PROXY_HOST= +# DEFAULT_HTTP_PROXY_PORT= +# for 10.0.0.25 test +DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +DEFAULT_HTTP_PROXY_PORT=913 + +# If on, use maven mirror settings for PRC's network environment +# DEFAULT_USE_ALI_MAVEN_MIRROR=OFF +# for 10.0.0.25 test +DEFAULT_USE_ALI_MAVEN_MIRROR=ON + +# Base operator system image used in build scripts. +# DEFAULT_OS_IMAGE=ubuntu:20.04 +DEFAULT_OS_IMAGE= + +# Version ID of os image +DEFAULT_OS_VERSION= + +# Set timezone name +DEFAULT_TIMEZONE=Asia/Shanghai + +# Build will result in this image +DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV=gluten-te/gluten-buildenv + +# Build will result in this image +DEFAULT_DOCKER_TARGET_IMAGE_BUILD=gluten-te/gluten-build + +# Docker registry used to pull pre-built images to speed-up builds +# DEFAULT_DOCKER_CACHE_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_CACHE_REGISTRY=10.0.0.25:5000 + +# Docker registry to push pre-built images +# DEFAULT_DOCKER_PUSH_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_PUSH_REGISTRY=10.0.0.25:5000 + +## For tpc.sh + +# Java options +DEFAULT_EXTRA_JAVA_OPTIONS="-Xmx2G" + +# Run GDB. +DEFAULT_RUN_GDB=OFF + +# Run GDB server. +DEFAULT_RUN_GDB_SERVER=OFF + +# GDB server bind port +DEFAULT_GDB_SERVER_PORT=2345 + +# Run JVM jdwp server. +DEFAULT_RUN_JDWP_SERVER=OFF + +# JVM jdwp bind port +DEFAULT_JDWP_SERVER_PORT=5005 + +# Docker options +DEFAULT_EXTRA_DOCKER_OPTIONS="--network bridge" + +# Build will result in this image +DEFAULT_DOCKER_TARGET_IMAGE_TPC=gluten-te/gluten-tpc +DEFAULT_DOCKER_TARGET_IMAGE_TPC_GDB=gluten-te/gluten-tpc-gdb +DEFAULT_DOCKER_TARGET_IMAGE_TPC_GDB_SERVER=gluten-te/gluten-tpc-gdb-server + +## For cbash.sh + +# Whether to mount Maven cache +DEFAULT_MOUNT_MAVEN_CACHE=OFF + +# EOF diff --git a/tools/gluten-te/github_action/dockerfile-build b/tools/gluten-te/github_action/dockerfile-build new file mode 100644 index 000000000000..4bcfbb90f656 --- /dev/null +++ b/tools/gluten-te/github_action/dockerfile-build @@ -0,0 +1,83 @@ +ARG DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE + +FROM $DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE AS gluten-build +MAINTAINER Hongze Zhang + +# Whether debug build is enabled +ARG JDK_DEBUG_BUILD +ARG GLUTEN_DEBUG_BUILD +RUN echo "JDK debug build is [$JDK_DEBUG_BUILD]!" +RUN echo "Gluten debug build is [$GLUTEN_DEBUG_BUILD]!" + +# If JDK debug is on +RUN if [ "$JDK_DEBUG_BUILD" == "ON" ]; \ + then \ + apt-get update; \ + DEBIAN_FRONTEND=noninteractive apt-get uninstall -y openjdk-8-jdk; \ + DEBIAN_FRONTEND=noninteractive apt-get uninstall -y maven; \ + mkdir -p /opt/jdk/ \ + && mkdir -p /opt/maven/ \ + && cd /opt/jdk/ \ + && wget https://builds.shipilev.net/openjdk-jdk8/openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && tar -xvf openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && rm -f openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && cd /opt/maven/ \ + && wget https://dlcdn.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \ + && tar -xvf apache-maven-3.6.3-bin.tar.gz \ + && rm -f apache-maven-3.6.3-bin.tar.gz \ + && cp -rs /opt/jdk/j2sdk-image/bin/* /usr/local/bin/ \ + && cp -rs /opt/maven/apache-maven-3.6.3/bin/mvn /usr/local/bin/ \ + && echo "JAVA_HOME=/opt/jdk/j2sdk-image" > ~/.mavenrc; \ + fi + +# These branches are mainly for pre-downloading dependencies to speed-up builds. +# Thus it should not be required to change these values every time when the build branch +# is changed. +ARG CACHE_GLUTEN_REPO +ARG CACHE_GLUTEN_BRANCH + +RUN test -n "$CACHE_GLUTEN_REPO" || (echo "CACHE_GLUTEN_REPO not set" && false) +RUN test -n "$CACHE_GLUTEN_BRANCH" || (echo "CACHE_GLUTEN_BRANCH not set" && false) + +RUN cd /opt/ \ + && git clone $CACHE_GLUTEN_REPO -b $CACHE_GLUTEN_BRANCH gluten + +# Set ccache size +RUN ccache -M 128G +RUN ccache -s + +# Default Gluten Maven build options (empty as of now) +ENV GLUTEN_MAVEN_OPTIONS= +#RUN set-login-env "GLUTEN_MAVEN_OPTIONS=" + +ARG BUILD_BACKEND_TYPE + +RUN test -n "$BUILD_BACKEND_TYPE" || (echo "BUILD_BACKEND_TYPE not set" && false) + +RUN if [ "$BUILD_BACKEND_TYPE" == "velox" ]; \ + then \ + if [ "$GLUTEN_DEBUG_BUILD" == "ON" ]; then GLUTEN_BUILD_TYPE="Debug"; else GLUTEN_BUILD_TYPE="Release"; fi; \ + DEPS_INSTALL_SCRIPT="source /env.sh && bash /opt/gluten/dev/builddeps-veloxbe.sh \ + --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON \ + --build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \ + EXTRA_MAVEN_OPTIONS="-Pspark-3.2 \ + -Pbackends-velox \ + -Prss \ + -DskipTests \ + -Dscalastyle.skip=true \ + -Dcheckstyle.skip=true"; \ + else \ + echo "Unrecognizable backend type: $BUILD_BACKEND_TYPE"; \ + exit 1; \ + fi \ + && echo $EXTRA_MAVEN_OPTIONS > ~/.gluten-mvn-options \ + && echo $DEPS_INSTALL_SCRIPT > ~/.gluten-deps-install-script + +# Prebuild Gluten +RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \ + DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \ + && cd /opt/gluten \ + && bash -c "$DEPS_INSTALL_SCRIPT" \ + && bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" + +# EOF diff --git a/tools/gluten-te/github_action/dockerfile-buildenv-centos b/tools/gluten-te/github_action/dockerfile-buildenv-centos new file mode 100644 index 000000000000..b0428dbf13ce --- /dev/null +++ b/tools/gluten-te/github_action/dockerfile-buildenv-centos @@ -0,0 +1,75 @@ +ARG OS_IMAGE + +FROM $OS_IMAGE AS gluten-buildenv +MAINTAINER Hongze Zhang + +SHELL ["/bin/bash", "-c"] + +# REQUIRED PROXYS: WGET, GIT, MAVEN (also Maven mirror) +ARG HTTP_PROXY_HOST +ARG HTTP_PROXY_PORT + +ENV http_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV https_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} + +ARG MAVEN_MIRROR_URL + +RUN if [ -n "$MAVEN_MIRROR_URL" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE="mavenmirrorcentralMavenMirror{{MAVEN_MIRROR_URL}}httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s@{{MAVEN_MIRROR_URL}}@$MAVEN_MIRROR_URL@g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE="httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + fi \ + && if [ -n "$HTTP_PROXY_HOST" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/true/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/$HTTP_PROXY_HOST/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/$HTTP_PROXY_PORT/g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/false/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/localhost/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/8888/g"); \ + fi \ + && MAVEN_SETTINGS=$MAVEN_SETTINGS_TEMPLATE \ + && mkdir -p /root/.m2/ \ + && echo $MAVEN_SETTINGS > /root/.m2/settings.xml + +# Display environment information +RUN ulimit -a +RUN env +RUN cat /root/.m2/settings.xml + +# Install deps from repo +ARG OS_VERSION +COPY centos-$OS_VERSION-deps.sh /tmp/deps.sh +RUN /tmp/deps.sh \ + && rm /tmp/deps.sh \ + && yum clean all \ + && dnf clean all \ + && rm -rf /var/cache/yum + +# Install deps from url +ENV PATH="$PATH:/usr/lib/jvm/java-1.8.0-openjdk/bin" +RUN wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz && \ + tar -xvf apache-maven-3.8.8-bin.tar.gz && \ + rm apache-maven-3.8.8-bin.tar.gz && \ + mv apache-maven-3.8.8 /usr/lib/maven + +# # Build & install Spark 3.2.2 +# RUN cd /opt && git clone --depth 1 --branch v3.2.2 https://github.com/apache/spark.git spark322 +# RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install + +# # Build & install Spark 3.3.1 +# RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331 +# RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install + +ENV PATH="$PATH:/usr/lib/maven/bin" +ENV LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:/lib64:/lib + +# Velox setup scripts require sudo +RUN yum -y install sudo \ + && yum clean all +RUN echo '%wheel ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +COPY scripts/env.sh /env.sh diff --git a/tools/gluten-te/github_action/dockerfile-buildenv-ubuntu b/tools/gluten-te/github_action/dockerfile-buildenv-ubuntu new file mode 100644 index 000000000000..fb88d170a8e5 --- /dev/null +++ b/tools/gluten-te/github_action/dockerfile-buildenv-ubuntu @@ -0,0 +1,119 @@ +ARG OS_IMAGE + +FROM $OS_IMAGE AS gluten-buildenv +MAINTAINER Hongze Zhang + +SHELL ["/bin/bash", "-l", "-c"] +ENTRYPOINT ["/bin/bash", "-l", "-c"] +CMD ["/bin/bash"] + +# Add script for adding environment variables for login-shell (e.g. a shell via ssh) +COPY scripts/set-login-env.sh /usr/local/sbin/set-login-env + +# REQUIRED PROXIES: APT, WGET, GIT, MAVEN (also Maven mirror) +ARG HTTP_PROXY_HOST +ARG HTTP_PROXY_PORT + +# Sometimes ENV a=b won't work when the shell is not docker-default, so we +# use both two ways to set the variables +ENV http_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV https_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV no_proxy=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16 +ENV HTTP_PROXY=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV HTTPS_PROXY=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV NO_PROXY=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16 +RUN set-login-env "http_proxy=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "https_proxy=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "no_proxy=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16" +RUN set-login-env "HTTP_PROXY=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "HTTPS_PROXY=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "NO_PROXY=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16" + +RUN if [ -n "$HTTP_PROXY_HOST" ]; then echo "Acquire::http::Proxy \"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT\";" >> /etc/apt/apt.conf; fi +RUN if [ -n "$HTTP_PROXY_HOST" ]; then echo "Acquire::https::Proxy \"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT\";" >> /etc/apt/apt.conf; fi + +ARG MAVEN_MIRROR_URL + +RUN if [ -n "$MAVEN_MIRROR_URL" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE="mavenmirrorcentralMavenMirror{{MAVEN_MIRROR_URL}}httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s@{{MAVEN_MIRROR_URL}}@$MAVEN_MIRROR_URL@g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE="httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + fi \ + && if [ -n "$HTTP_PROXY_HOST" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/true/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/$HTTP_PROXY_HOST/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/$HTTP_PROXY_PORT/g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/false/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/localhost/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/8888/g"); \ + fi \ + && MAVEN_SETTINGS=$MAVEN_SETTINGS_TEMPLATE \ + && mkdir -p /root/.m2/ \ + && echo $MAVEN_SETTINGS > /root/.m2/settings.xml + +# Display environment information +RUN ulimit -a +RUN env +RUN cat /etc/apt/apt.conf || (echo "Apt proxy not set" && true) +RUN cat /root/.m2/settings.xml + +## APT dependencies + +# Update, then install essentials +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev + +# install HBM dependencies +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y autoconf automake g++ libnuma-dev libtool numactl unzip libdaxctl-dev + +# Install OpenJDK 8 and Maven +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jdk +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y maven + +# Setup SSH server +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y openssh-server +RUN systemctl disable ssh +RUN ssh-keygen -A +RUN mkdir -p /run/sshd +RUN echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config.d/override.conf +RUN echo 'X11Forwarding yes' >> /etc/ssh/sshd_config.d/override.conf +RUN echo 'X11UseLocalhost no' >> /etc/ssh/sshd_config.d/override.conf +RUN echo -e "123\n123" | passwd + +ARG TIMEZONE +RUN test -n "$TIMEZONE" || (echo "TIMEZONE not set" && false) + +RUN TZ=$TIMEZONE \ + && ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \ + && echo $TZ > /etc/timezone \ + && dpkg-reconfigure -f noninteractive tzdata + +# Configure locale +RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \ + && locale-gen + +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 +RUN set-login-env "LANG=en_US.UTF-8" +RUN set-login-env "LANGUAGE=en_US:en" +RUN set-login-env "LC_ALL=en_US.UTF-8" + +# Build & install Spark 3.2.2 +RUN cd /opt && git clone --depth 1 --branch v3.2.2 https://github.com/apache/spark.git spark322 +RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install + +# Build & install Spark 3.3.1 +RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331 +RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install + +# Build & install Spark 3.4.1 +RUN cd /opt && git clone --depth 1 --branch v3.4.1 https://github.com/apache/spark.git spark341 +RUN cd /opt/spark341 && ./build/mvn -Pyarn -DskipTests clean install + +# Prepare entry command +COPY scripts/cmd.sh /root/.cmd.sh +CMD ["/root/.cmd.sh"] diff --git a/tools/gluten-te/github_action/exec.sh b/tools/gluten-te/github_action/exec.sh new file mode 100644 index 000000000000..3d0b9fe5f79c --- /dev/null +++ b/tools/gluten-te/github_action/exec.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +CBASH_BASH_ARGS="$*" +BASH_ARGS="$CBASH_BASH_ARGS" + +if [ -z "$GITHUB_RUN_ID" ] +then + echo "Unable to parse GITHUB_RUN_ID." + exit 1 +fi + +docker exec gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS" diff --git a/tools/gluten-te/github_action/scripts/init.sh b/tools/gluten-te/github_action/scripts/init.sh new file mode 100644 index 000000000000..7c70ccf0d406 --- /dev/null +++ b/tools/gluten-te/github_action/scripts/init.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +TARGET_GLUTEN_REPO=$1 +TARGET_GLUTEN_COMMIT=$2 + +cd /opt/gluten + +git fetch $TARGET_GLUTEN_REPO $TARGET_GLUTEN_COMMIT:build_$TARGET_GLUTEN_COMMIT +git checkout build_$TARGET_GLUTEN_COMMIT diff --git a/tools/gluten-te/ubuntu/defaults.conf b/tools/gluten-te/ubuntu/defaults.conf index 1c9602cc9f9a..cfdf959b586e 100644 --- a/tools/gluten-te/ubuntu/defaults.conf +++ b/tools/gluten-te/ubuntu/defaults.conf @@ -38,7 +38,7 @@ DEFAULT_HTTP_PROXY_PORT=913 DEFAULT_USE_ALI_MAVEN_MIRROR=ON # Base operator system image used in build scripts. -DEFAULT_OS_IMAGE=ubuntu:20.04 +DEFAULT_OS_IMAGE= # Set timezone name DEFAULT_TIMEZONE=Asia/Shanghai