Skip to content

Commit

Permalink
Implement cached build for external projects based on commit hash (oa…
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored Sep 21, 2022
1 parent 7ad8978 commit 789756d
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ jobs:
run: |
git clone -b main https://github.com/zhztheplayer/gluten-it.git gluten-it
cd gluten-it
mvn clean package -Dhttps.proxyHost=child-prc.intel.com -Dhttps.proxyPort=913 -Darrow.version=10.0.0-SNAPSHOT
mvn clean package -Dhttps.proxyHost=child-prc.intel.com -Dhttps.proxyPort=913 -Pgluten-velox -Darrow.version=10.0.0-SNAPSHOT
java -Xmx5G -XX:ErrorFile=/var/log/java/hs_err_pid%p.log -cp target/gluten-it-1.0-SNAPSHOT-jar-with-dependencies.jar io.glutenproject.integration.tpc.Tpc \
--backend-type=velox --benchmark-type=h --fixed-width-as-double --disable-aqe --off-heap-size=10g -s=1.0 --cpus=4 --iterations=1 \
--queries=q1,q2,q3,q4,q5,q6,q7,q8,q9,q10,q11,q12,q13,q14,q15,q16,q17,q18,q19,q20,q21,q22
Expand Down
4 changes: 4 additions & 0 deletions backends-velox/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
<velox.build_velox>${build_velox}</velox.build_velox>
<velox.build_velox_from_source>${build_velox_from_source}</velox.build_velox_from_source>
<velox.compile_velox>${compile_velox}</velox.compile_velox>
<velox.enable_ep_cache>${enable_ep_cache}</velox.enable_ep_cache>
<velox.velox_home>${velox_home}</velox.velox_home>
<velox.velox_build_type>${velox_build_type}</velox.velox_build_type>
<velox.debug_build>${debug_build}</velox.debug_build>
Expand Down Expand Up @@ -274,6 +275,9 @@
<argument>
--compile_velox=${velox.compile_velox}
</argument>
<argument>
--enable_ep_cache=${velox.enable_ep_cache}
</argument>
<argument>${velox.velox_home}</argument>
</arguments>
</configuration>
Expand Down
2 changes: 2 additions & 0 deletions jvm/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
<parquet.deps.scope>provided</parquet.deps.scope>
<jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars
</jars.target.dir>
<jvm.enable_ep_cache>${enable_ep_cache}</jvm.enable_ep_cache>
<jvm.build_arrow>${build_arrow}</jvm.build_arrow>
<jvm.cpp_tests>${cpp_tests}</jvm.cpp_tests>
<jvm.static_arrow>${static_arrow}</jvm.static_arrow>
Expand Down Expand Up @@ -394,6 +395,7 @@
<argument>--static_arrow=${jvm.static_arrow}</argument>
<argument>--arrow_root=${jvm.arrow_root}</argument>
<argument>--backend_type=${jvm.backend_type}</argument>
<argument>--enable_ep_cache=${jvm.enable_ep_cache}</argument>
</arguments>
</configuration>
</execution>
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
<build_arrow>OFF</build_arrow>
<arrow_root>${project.basedir}/../tools/build/arrow_install</arrow_root>
<arrow.bfs.dir>${project.basedir}/../tools/build/arrow_install</arrow.bfs.dir>
<enable_ep_cache>ON</enable_ep_cache>
<static_arrow>OFF</static_arrow>
<build_protobuf>ON</build_protobuf>
<build_jemalloc>OFF</build_jemalloc>
Expand Down
53 changes: 50 additions & 3 deletions tools/build_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ STATIC_ARROW=OFF
ARROW_ROOT=/usr/local
# option gazelle_cpp
BACKEND_TYPE=velox
ENABLE_EP_CACHE=OFF

for arg in "$@"
do
Expand All @@ -33,6 +34,10 @@ do
BACKEND_TYPE=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_ep_cache=*)
ENABLE_EP_CACHE=("${arg#*=}")
shift # Remove argument name from processing
;;
*)
OTHER_ARGUMENTS+=("$1")
shift # Remove generic argument from processing
Expand All @@ -42,7 +47,7 @@ done

function compile_velox_arrow {
echo "Compile velox arrow branch"
git clone https://github.com/oap-project/arrow.git -b backend_velox_main $ARROW_SOURCE_DIR
git clone $ARROW_REPO -b $ARROW_BRANCH $ARROW_SOURCE_DIR
pushd $ARROW_SOURCE_DIR

mkdir -p java/build
Expand Down Expand Up @@ -103,7 +108,7 @@ function compile_velox_arrow {

function compile_gazelle_arrow {
echo "Compile gazelle arrow branch"
git clone https://github.com/oap-project/arrow.git -b arrow-8.0.0-gluten-20220427a $ARROW_SOURCE_DIR
git clone $ARROW_REPO -b $ARROW_BRANCH $ARROW_SOURCE_DIR
pushd $ARROW_SOURCE_DIR

mkdir -p java/c/build
Expand Down Expand Up @@ -165,14 +170,52 @@ echo $CURRENT_DIR

cd ${CURRENT_DIR}

ARROW_REPO=https://github.com/oap-project/arrow.git

if [ $BACKEND_TYPE == "velox" ]; then
ARROW_BRANCH=backend_velox_main
elif [ $BACKEND_TYPE == "gazelle_cpp" ]; then
ARROW_BRANCH=arrow-8.0.0-gluten-20220427a
else
echo "Unrecognizable backend type: $BACKEND_TYPE."
exit 1
fi

if [ $BUILD_ARROW == "ON" ]; then

TARGET_BUILD_COMMIT="$(git ls-remote $ARROW_REPO $ARROW_BRANCH | awk '{print $1;}')"
if [ $ENABLE_EP_CACHE == "ON" ]; then
if [ -e ${CURRENT_DIR}/arrow-commit.cache ]; then
LAST_BUILT_COMMIT="$(cat ${CURRENT_DIR}/arrow-commit.cache)"
if [ -n $LAST_BUILT_COMMIT ]; then
if [ -z "$TARGET_BUILD_COMMIT" ]
then
echo "Unable to parse Arrow commit: $TARGET_BUILD_COMMIT."
exit 1
fi

if [ "$TARGET_BUILD_COMMIT" = "$LAST_BUILT_COMMIT" ]; then
echo "Arrow build of commit $TARGET_BUILD_COMMIT was cached, skipping build..."
exit 0
else
echo "Found cached commit $LAST_BUILT_COMMIT for Arrow which is different with target commit $TARGET_BUILD_COMMIT, creating brand-new build..."
fi
fi
fi
fi

if [ -d build/arrow_ep ]; then
rm -r build/arrow_ep
fi

if [ -d build/arrow_install ]; then
rm -r build/arrow_install
fi

if [ -e ${CURRENT_DIR}/arrow-commit.cache ]; then
rm -f ${CURRENT_DIR}/arrow-commit.cache
fi

echo "Building Arrow from Source ..."
mkdir -p build
cd build
Expand All @@ -187,11 +230,15 @@ if [ $BUILD_ARROW == "ON" ]; then

if [ $BACKEND_TYPE == "velox" ]; then
compile_velox_arrow
else # gazelle
elif [ $BACKEND_TYPE == "gazelle_cpp" ]; then
compile_gazelle_arrow
else
echo "Unrecognizable backend type: $BACKEND_TYPE."
exit 1
fi

echo "Finish to build Arrow from Source !!!"
echo $TARGET_BUILD_COMMIT > "${CURRENT_DIR}/arrow-commit.cache"
else
echo "Use ARROW_ROOT as Arrow Library Path"
echo "ARROW_ROOT=${ARROW_ROOT}"
Expand Down
43 changes: 40 additions & 3 deletions tools/build_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,26 @@ NPROC=$(nproc)
BUILD_VELOX_FROM_SOURCE=OFF
COMPILE_VELOX=OFF
VELOX_HOME=${3:-/root/velox}
ENABLE_EP_CACHE=OFF

VELOX_REPO=https://github.com/oap-project/velox.git
VELOX_BRANCH=main

for arg in "$@"
do
case $arg in
-v=*|--build_velox_from_source=*)
--build_velox_from_source=*)
BUILD_VELOX_FROM_SOURCE=("${arg#*=}")
shift # Remove argument name from processing
;;
-v=*|--compile_velox=*)
--compile_velox=*)
COMPILE_VELOX=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_ep_cache=*)
ENABLE_EP_CACHE=("${arg#*=}")
shift # Remove argument name from processing
;;
*)
OTHER_ARGUMENTS+=("$1")
shift # Remove generic argument from processing
Expand Down Expand Up @@ -52,13 +61,40 @@ cd ${CURRENT_DIR}


if [ $BUILD_VELOX_FROM_SOURCE == "ON" ]; then

TARGET_BUILD_COMMIT="$(git ls-remote $VELOX_REPO $VELOX_BRANCH | awk '{print $1;}')"
if [ $ENABLE_EP_CACHE == "ON" ]; then
if [ -e ${CURRENT_DIR}/velox-commit.cache ]; then
LAST_BUILT_COMMIT="$(cat ${CURRENT_DIR}/velox-commit.cache)"
if [ -n $LAST_BUILT_COMMIT ]; then
if [ -z "$TARGET_BUILD_COMMIT" ]
then
echo "Unable to parse Velox commit: $TARGET_BUILD_COMMIT."
exit 1
fi

if [ "$TARGET_BUILD_COMMIT" = "$LAST_BUILT_COMMIT" ]; then
echo "Velox build of commit $TARGET_BUILD_COMMIT was cached, skipping build..."
exit 0
else
echo "Found cached commit $LAST_BUILT_COMMIT for Velox which is different with target commit $TARGET_BUILD_COMMIT, creating brand-new build..."
fi
fi
fi
fi

if [ -d build/velox_ep ]; then
rm -r build/velox_ep
fi

if [ -d build/velox_install ]; then
rm -r build/velox_install
fi

if [ -e ${CURRENT_DIR}/velox-commit.cache ]; then
rm -f ${CURRENT_DIR}/velox-commit.cache
fi

echo "Building Velox from Source ..."
mkdir -p build
cd build
Expand All @@ -71,7 +107,7 @@ if [ $BUILD_VELOX_FROM_SOURCE == "ON" ]; then
echo "VELOX_INSTALL_DIR=${VELOX_INSTALL_DIR}"
mkdir -p $VELOX_SOURCE_DIR
mkdir -p $VELOX_INSTALL_DIR
git clone https://github.com/oap-project/velox.git -b main $VELOX_SOURCE_DIR
git clone $VELOX_REPO -b $VELOX_BRANCH $VELOX_SOURCE_DIR
pushd $VELOX_SOURCE_DIR
#sync submodules
git submodule sync --recursive
Expand All @@ -80,6 +116,7 @@ if [ $BUILD_VELOX_FROM_SOURCE == "ON" ]; then
process_script
compile
echo "Finish to build Velox from Source !!!"
echo $TARGET_BUILD_COMMIT > "${CURRENT_DIR}/velox-commit.cache"
else
VELOX_SOURCE_DIR=${VELOX_HOME}
if [ $COMPILE_VELOX == "ON" ]; then
Expand Down

0 comments on commit 789756d

Please sign in to comment.