diff --git a/.github/workflows/sparkucx-ci.yml b/.github/workflows/sparkucx-ci.yml
index 01339492..bc060fd1 100755
--- a/.github/workflows/sparkucx-ci.yml
+++ b/.github/workflows/sparkucx-ci.yml
@@ -1,18 +1,15 @@
name: SparkUCX CI
on:
- push:
- branches:
- - master
pull_request:
branches:
- master
-jobs:
+jobs:
build-sparkucx:
strategy:
matrix:
- spark_version: [2.4, 3.0]
+ spark_version: ["2.4", "3.0"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
@@ -21,7 +18,7 @@ jobs:
with:
java-version: 1.8
- name: Build with Maven
- run: mvn -B package -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
+ run: mvn -B package -Pspark-${{ matrix.spark_version }} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
--file pom.xml
- name: Run Sonar code analysis
run: mvn -B sonar:sonar -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -Dsonar.projectKey=openucx:spark-ucx -Dsonar.organization=openucx -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=97f4df88ff4fa04e2d5b061acf07315717f1f08b -Pspark-${{ matrix.spark_version }}
diff --git a/.github/workflows/sparkucx-release.yml b/.github/workflows/sparkucx-release.yml
new file mode 100644
index 00000000..09766096
--- /dev/null
+++ b/.github/workflows/sparkucx-release.yml
@@ -0,0 +1,42 @@
+on:
+ push:
+ # Sequence of patterns matched against refs/tags
+ tags:
+ - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
+
+name: Upload Release Asset
+
+env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+jobs:
+ release:
+ strategy:
+ matrix:
+ spark_version: ["2.4", "3.0"]
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v2
+
+ - name: Set up JDK 1.8
+ uses: actions/setup-java@v1
+ with:
+ java-version: 1.8
+
+ - name: Build with Maven
+ id: maven_package
+ run: |
+ mvn -B -Pspark-${{ matrix.spark_version }} clean package \
+ -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \
+ --file pom.xml
+ cd target
+ echo "::set-output name=jar_name::$(echo spark-ucx-*-jar-with-dependencies.jar)"
+
+ - name: Upload Release Jars
+ uses: svenstaro/upload-release-action@v1-release
+ with:
+ repo_token: ${{ secrets.GITHUB_TOKEN }}
+ file: ./target/${{ steps.maven_package.outputs.jar_name }}
+ asset_name: ${{ steps.maven_package.outputs.jar_name }}
+ tag: ${{ github.ref }}
diff --git a/README.md b/README.md
index a3531ef4..e7a2b9a7 100755
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ for your spark version (e.g. spark-ucx-1.0-for-spark-2.4.0-jar-with-dependencies
Put SparkUCX jar file in $SPARK_UCX_HOME on all the nodes in your cluster.
If you would like to build the project yourself, please refer to the ["Build"](https://github.com/openucx/sparkucx#build) section below.
-Ucx binaries **must** be in `java.library.path` on every Spark Master and Worker.
+Ucx binaries **must** be in Spark classpath on every Spark Master and Worker.
It can be obtained by installing latest version of [Mellanox OFED](http://www.mellanox.com/page/products_dyn?product_family=26)
or following [ucx build instruction](https://github.com/openucx/ucx#using-ucx). E.g.:
@@ -37,19 +37,16 @@ Provide Spark the location of the SparkUCX plugin jars and ucx shared binaries b
spark.driver.extraClassPath $SPARK_UCX_HOME/spark-ucx-1.0-for-spark-2.4.0-jar-with-dependencies.jar:$UCX_PREFIX/lib
spark.executor.extraClassPath $SPARK_UCX_HOME/spark-ucx-1.0-for-spark-2.4.0-jar-with-dependencies.jar:$UCX_PREFIX/lib
```
-
-Add UCX shared binaries to `java.library.path` for Spark driver and executors:
-```
-spark.driver.extraJavaOptions -Djava.library.path=$UCX_PREFIX/lib
-spark.executor.extraJavaOptions -Djava.library.path=$UCX_PREFIX/lib
-```
-
To enable the SparkUCX Shuffle Manager plugin, add the following configuration property
to spark (e.g. in $SPARK_HOME/conf/spark-defaults.conf):
```
spark.shuffle.manager org.apache.spark.shuffle.UcxShuffleManager
```
+For spark-3.0 version add SparkUCX ShuffleIO plugin:
+```
+spark.shuffle.sort.io.plugin.class org.apache.spark.shuffle.compat.spark_3_0.UcxLocalDiskShuffleDataIO
+```
### Build
@@ -60,6 +57,12 @@ Build instructions:
```
% git clone https://github.com/openucx/sparkucx
% cd sparkucx
-% mvn -DskipTests clean package -Pspark-2.3
+% mvn -DskipTests clean package -Pspark-2.4
```
+### Performance
+
+SparkUCX plugin is built to provide the best performance out-of-the-box, and provides multiple configuration options to further tune SparkUCX per-job. For more information on how to setup [HiBench](https://github.com/Intel-bigdata/HiBench) benchmark and reproduce results, please refer to [Accelerated Apache SparkUCX 2.4/3.0 cluster deployment](https://docs.mellanox.com/pages/releaseview.action?pageId=19819236).
+
+![Performance results](https://docs.mellanox.com/download/attachments/19819236/image2020-1-23_15-39-14.png)
+
diff --git a/pom.xml b/pom.xml
index f251d82c..42336715 100755
--- a/pom.xml
+++ b/pom.xml
@@ -70,7 +70,7 @@ See file LICENSE for terms.
org.openucx
jucx
- 1.9.0-SNAPSHOT
+ 1.8.0