diff --git a/.travis.yml b/.travis.yml index 2be358fb0..dae191963 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,9 @@ +dist: trusty sudo: required language: java jdk: + - openjdk11 + - openjdk8 - openjdk7 before_install: - cat /etc/hosts # optionally check the content *before* @@ -10,32 +13,53 @@ before_install: - cat /proc/cpuinfo | grep cores | wc -l - free -h install: - - hibench=$(pwd) - - cd /opt/ - - wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.0-bin-hadoop2.6.tgz - - tar -xzf spark-1.6.0-bin-hadoop2.6.tgz - - wget https://archive.apache.org/dist/hadoop/core/hadoop-2.6.5/hadoop-2.6.5.tar.gz - - tar -xzf hadoop-2.6.5.tar.gz - - cd ${hibench} - - cp ./travis/spark-env.sh /opt/spark-1.6.0-bin-hadoop2.6/conf/ - - cp ./travis/core-site.xml /opt/hadoop-2.6.5/etc/hadoop/ - - cp ./travis/hdfs-site.xml /opt/hadoop-2.6.5/etc/hadoop/ - - cp ./travis/mapred-site.xml /opt/hadoop-2.6.5/etc/hadoop/ - - cp ./travis/yarn-site.xml /opt/hadoop-2.6.5/etc/hadoop/ - - cp ./travis/hibench.conf ./conf/ - - cp ./travis/benchmarks.lst ./conf/ + - | + export java_ver=$(./travis/jdk_ver.sh) + if [[ "$java_ver" == 11 ]]; then + export HADOOP_VER=3.2.0 + export SPARK_VER=2.4.3 + export SPARK_PACKAGE_TYPE=without-hadoop-scala-2.12 + elif [[ "$java_ver" == 8 ]]; then + export HADOOP_VER=3.2.0 + export SPARK_VER=2.4.3 + export SPARK_PACKAGE_TYPE=without-hadoop + elif [[ "$java_ver" == 7 ]]; then + export HADOOP_VER=2.6.5 + export SPARK_VER=1.6.0 + export SPARK_PACKAGE_TYPE=hadoop2.6 + else + exit 1 + fi + + # Folders where are stored Spark and Hadoop depending on version required + export SPARK_BINARIES_FOLDER=spark-$SPARK_VER-bin-$SPARK_PACKAGE_TYPE + export HADOOP_BINARIES_FOLDER=hadoop-$HADOOP_VER + export HADOOP_CONF_DIR=/opt/$HADOOP_BINARIES_FOLDER/etc/hadoop/ + export HADOOP_HOME=/opt/$HADOOP_BINARIES_FOLDER + + sudo -E ./travis/install_hadoop_spark.sh + sudo -E ./travis/config_hadoop_spark.sh before_script: - "export JAVA_OPTS=-Xmx512m" cache: directories: - $HOME/.m2 script: - - mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.2 -Dscala=2.11 - - mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.0 -Dscala=2.11 - - mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=1.6 -Dscala=2.10 - - sudo -E ./travis/configssh.sh - - sudo -E ./travis/restart_hadoop_spark.sh - - cp ./travis/hadoop.conf ./conf/ - - cp ./travis/spark.conf ./conf/ - - /opt/hadoop-2.6.5/bin/yarn node -list 2 - - sudo -E ./bin/run_all.sh + - | + if [[ "$java_ver" == 11 ]]; then + mvn clean package -Psparkbench -Phadoopbench -Dhadoop=3.2 -Dspark=2.4 -Dscala=2.12 -Dmaven-compiler-plugin.version=3.8.0 -Dexclude-streaming + elif [[ "$java_ver" == 8 ]]; then + mvn clean package -q -Dmaven.javadoc.skip=true -Dhadoop=3.2 -Dspark=2.4 -Dscala=2.11 + sudo -E ./travis/configssh.sh + sudo -E ./travis/restart_hadoop_spark.sh + sudo -E ./bin/run_all.sh + elif [[ "$java_ver" == 7 ]]; then + mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.2 -Dscala=2.11 + mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.0 -Dscala=2.11 + mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=1.6 -Dscala=2.10 + sudo -E ./travis/configssh.sh + sudo -E ./travis/restart_hadoop_spark.sh + sudo -E ./bin/run_all.sh + else + exit 1 + fi diff --git a/README.md b/README.md index e3c433eb0..7dc09a259 100644 --- a/README.md +++ b/README.md @@ -135,12 +135,12 @@ There are totally 19 workloads in HiBench. The workloads are divided into 6 cate 4. Fixwindow (fixwindow) The workloads performs a window based aggregation. It tests the performance of window operation in the streaming frameworks. - - -### Supported Hadoop/Spark/Flink/Storm/Gearpump releases: ### - - Hadoop: Apache Hadoop 2.x, CDH5, HDP - - Spark: Spark 1.6.x, Spark 2.0.x, Spark 2.1.x, Spark 2.2.x +### Supported Hadoop/Spark releases: ### + - Hadoop: Apache Hadoop 2.x, 3.2, CDH5, HDP + - Spark: Spark 1.6.x, Spark 2.0.x, Spark 2.1.x, Spark 2.2.x, Spark 2.4.x + +### Supported Flink/Storm/Gearpump releases: ### - Flink: 1.0.3 - Storm: 1.0.1 - Gearpump: 0.8.1 diff --git a/autogen/pom.xml b/autogen/pom.xml index fb1d9b46a..701aad835 100644 --- a/autogen/pom.xml +++ b/autogen/pom.xml @@ -57,7 +57,20 @@ ${hadoop.mr2.version} - + + + hadoop3.2 + + 3.2.0 + + + + hadoop + 3.2 + + + + diff --git a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIO.java b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIO.java index 6b25e3e24..02b40ee1b 100644 --- a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIO.java +++ b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIO.java @@ -23,7 +23,6 @@ import java.util.Date; import java.util.StringTokenizer; -import org.apache.commons.logging.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.mapred.*; @@ -33,6 +32,8 @@ import org.apache.hadoop.conf.*; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Distributed i/o benchmark. @@ -69,8 +70,7 @@ public class TestDFSIO extends Configured implements Tool { private static final int DEFAULT_BUFFER_SIZE = 1000000; private static final String BASE_FILE_NAME = "test_io_"; private static final String DEFAULT_RES_FILE_NAME = "TestDFSIO_results.log"; - - private static final Log LOG = FileInputFormat.LOG; + private static final Logger LOG = LoggerFactory.getLogger(FileInputFormat.class); private static Configuration fsConfig = new Configuration(); private static final long MEGA = 0x100000; private static String TEST_ROOT_DIR = System.getProperty("test.build.data","/benchmarks/TestDFSIO"); diff --git a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java index b5a0e52c4..1f44108ab 100644 --- a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java +++ b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java @@ -22,11 +22,13 @@ import java.util.Date; import java.util.StringTokenizer; +import java.util.Arrays; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import org.apache.commons.logging.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.*; import org.apache.hadoop.mapreduce.Job; @@ -85,7 +87,7 @@ public class TestDFSIOEnh extends Configured implements Tool { - private static final Log LOG = LogFactory.getLog(TestDFSIOEnh.class); + private static final Logger LOG = LoggerFactory.getLogger(TestDFSIOEnh.class); private static final int TEST_TYPE_READ = 0; private static final int TEST_TYPE_WRITE = 1; private static final int TEST_TYPE_CLEANUP = 2; @@ -952,7 +954,7 @@ protected static void runAnalyse(FileSystem fs, Configuration fsConfig, e.printStackTrace(); } finally { fs.delete(DfsioeConfig.getInstance().getReportTmp(fsConfig), true); - FileUtil.copyMerge(fs, DfsioeConfig.getInstance().getReportDir(fsConfig), fs, DfsioeConfig.getInstance().getReportTmp(fsConfig), false, fsConfig, null); + copyMerge(fs, DfsioeConfig.getInstance().getReportDir(fsConfig), fs, DfsioeConfig.getInstance().getReportTmp(fsConfig), false, fsConfig, null); LOG.info("remote report file " + DfsioeConfig.getInstance().getReportTmp(fsConfig) + " merged."); BufferedReader lines = new BufferedReader(new InputStreamReader(new DataInputStream(fs.open(DfsioeConfig.getInstance().getReportTmp(fsConfig))))); String line = null; @@ -1001,8 +1003,60 @@ else if (sampleUnit == GIGA) } res.println("\n-- Result Analyse -- : " + ((System.currentTimeMillis() - t1)/1000) + "s"); res.close(); - } - + } + + /** Copy all files in a directory to one output file (merge). */ + @Deprecated + public static boolean copyMerge(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstFile, boolean deleteSource, + Configuration conf, String addString) throws IOException { + dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false); + + if (!srcFS.getFileStatus(srcDir).isDirectory()) + return false; + + OutputStream out = dstFS.create(dstFile); + + try { + FileStatus contents[] = srcFS.listStatus(srcDir); + Arrays.sort(contents); + for (int i = 0; i < contents.length; i++) { + if (contents[i].isFile()) { + InputStream in = srcFS.open(contents[i].getPath()); + try { + IOUtils.copyBytes(in, out, conf, false); + if (addString != null) + out.write(addString.getBytes("UTF-8")); + + } finally { + in.close(); + } + } + } + } finally { + out.close(); + } + + if (deleteSource) { + return srcFS.delete(srcDir, true); + } else { + return true; + } + } + + private static Path checkDest(String srcName, FileSystem dstFS, Path dst, boolean overwrite) throws IOException { + if (dstFS.exists(dst)) { + FileStatus sdst = dstFS.getFileStatus(dst); + if (sdst.isDirectory()) { + if (null == srcName) { + throw new IOException("Target " + dst + " is a directory"); + } + return checkDest(null, dstFS, new Path(dst, srcName), overwrite); + } else if (!overwrite) { + throw new IOException("Target " + dst + " already exists"); + } + } + return dst; + } @Deprecated protected static void analyzeResult( FileSystem fs, int testType, diff --git a/common/pom.xml b/common/pom.xml index 3bb0c8f12..25f89b76f 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -60,7 +60,7 @@ process-resources add-source - compile + ${maven.assembly.plugin.goal} @@ -99,6 +99,7 @@ 2.11.8 2.11 + compile @@ -112,6 +113,7 @@ 2.10.4 2.10 + compile @@ -126,6 +128,7 @@ 2.11.8 2.11 + compile @@ -134,5 +137,18 @@ + + + exclude-streaming + + doc + + + + exclude-streaming + + + + diff --git a/docs/build-hibench.md b/docs/build-hibench.md index 4709f90cf..51353419e 100644 --- a/docs/build-hibench.md +++ b/docs/build-hibench.md @@ -28,7 +28,7 @@ Because some Maven plugins cannot support Scala version perfectly, there are som ### Specify Spark Version ### -To specify the spark version, use -Dspark=xxx(1.6, 2.0, 2.1 or 2.2). By default, it builds for spark 2.0 +To specify the spark version, use -Dspark=xxx(1.6, 2.0, 2.1, 2.2 or 2.4). By default, it builds for spark 2.0 mvn -Psparkbench -Dspark=1.6 -Dscala=2.11 clean package tips: @@ -37,6 +37,11 @@ default . For example , if we want use spark2.0 and scala2.11 to build hibench. package` , but for spark2.0 and scala2.10 , we need use the command `mvn -Dspark=2.0 -Dscala=2.10 clean package` . Similarly , the spark1.6 is associated with the scala2.10 by default. +### Specify Hadoop Version ### +To specify the spark version, use -Dhadoop=xxx(3.2). By default, it builds for hadoop 2.4 + + mvn -Psparkbench -Dhadoop=3.2 -Dspark=2.4 clean package + ### Build a single module ### If you are only interested in a single workload in HiBench. You can build a single module. For example, the below command only builds the SQL workloads for Spark. @@ -48,3 +53,13 @@ Supported modules includes: micro, ml(machine learning), sql, websearch, graph, For Spark 2.0 and Spark 2.1, we add the benchmark support for Structured Streaming. This is a new module which cannot be compiled in Spark 1.6. And it won't get compiled by default even if you specify the spark version as 2.0 or 2.1. You must explicitly specify it like this: mvn -Psparkbench -Dmodules -PstructuredStreaming clean package + +### Build using JDK 1.11 +**For Java 11 it is suitable to be built for Spark 2.4 _(Compiled with Scala 2.12)_ and/or Hadoop 3.2 only** + +If you are interested in building using Java 11 indicate that streaming benchmarks won't be compiled, and specify scala, spark, hadoop and maven compiler version as below + + mvn clean package -Psparkbench -Phadoopbench -Dhadoop=3.2 -Dspark=2.4 -Dscala=2.12 -Dexclude-streaming -Dmaven-compiler-plugin.version=3.8.0 + +Supported frameworks only: hadoopbench, sparkbench (Does not support flinkbench, stormbench, gearpumpbench) +Supported modules includes: micro, ml(machine learning), websearch and graph (does not support streaming, structuredStreaming and sql) diff --git a/sparkbench/assembly/pom.xml b/sparkbench/assembly/pom.xml index 785b432b1..bff5072af 100644 --- a/sparkbench/assembly/pom.xml +++ b/sparkbench/assembly/pom.xml @@ -159,7 +159,43 @@ - !modules + !exclude-streaming + + + + + + exclude-streaming + + + com.intel.hibench.sparkbench + sparkbench-micro + ${project.version} + + + com.intel.hibench.sparkbench + sparkbench-ml + ${project.version} + + + com.intel.hibench.sparkbench + sparkbench-websearch + ${project.version} + + + com.intel.hibench.sparkbench + sparkbench-graph + ${project.version} + + + com.intel.hibench.sparkbench + sparkbench-sql + ${project.version} + + + + + exclude-streaming diff --git a/sparkbench/pom.xml b/sparkbench/pom.xml index 41c230b37..6f5154281 100644 --- a/sparkbench/pom.xml +++ b/sparkbench/pom.xml @@ -117,7 +117,23 @@ - !modules + !exclude-streaming + + + + + + no-streaming + + micro + ml + websearch + graph + sql + + + + exclude-streaming @@ -191,6 +207,20 @@ + + spark2.4 + + 2.4.3 + 2.4 + + + + spark + 2.4 + + + + defaultScalaVersion @@ -232,5 +262,31 @@ + + scala2.12 + + 2.12.9 + 2.12 + + + + scala + 2.12 + + + + + + hadoop3.2 + + 3.2.0 + + + + hadoop + 3.2 + + + diff --git a/sparkbench/streaming/pom.xml b/sparkbench/streaming/pom.xml index 513b9bad8..88a5d2e12 100644 --- a/sparkbench/streaming/pom.xml +++ b/sparkbench/streaming/pom.xml @@ -119,5 +119,22 @@ + + spark2.4 + + + org.apache.spark + spark-streaming-kafka-0-8_2.11 + 2.4.0 + + + + + spark + 2.4 + + + + diff --git a/travis/core-site.xml b/travis/artifacts/core-site.xml old mode 100644 new mode 100755 similarity index 100% rename from travis/core-site.xml rename to travis/artifacts/core-site.xml diff --git a/travis/mapred-site.xml b/travis/artifacts/hadoop26/mapred-site.xml old mode 100644 new mode 100755 similarity index 100% rename from travis/mapred-site.xml rename to travis/artifacts/hadoop26/mapred-site.xml diff --git a/travis/spark-env.sh b/travis/artifacts/hadoop26/spark-env.sh similarity index 99% rename from travis/spark-env.sh rename to travis/artifacts/hadoop26/spark-env.sh index 8706c9070..2eccddecb 100755 --- a/travis/spark-env.sh +++ b/travis/artifacts/hadoop26/spark-env.sh @@ -46,7 +46,7 @@ # - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job. # Options for the daemons used in the standalone deploy mode -export SPARK_MASTER_IP= localhost +export SPARK_MASTER_IP= localhost #i, to bind the master to a different IP address or hostname # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") diff --git a/travis/yarn-site.xml b/travis/artifacts/hadoop26/yarn-site.xml old mode 100644 new mode 100755 similarity index 100% rename from travis/yarn-site.xml rename to travis/artifacts/hadoop26/yarn-site.xml diff --git a/travis/artifacts/hadoop32/hadoop-env.sh b/travis/artifacts/hadoop32/hadoop-env.sh new file mode 100644 index 000000000..5c7acefd8 --- /dev/null +++ b/travis/artifacts/hadoop32/hadoop-env.sh @@ -0,0 +1,436 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set Hadoop-specific environment variables here. + +## +## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS. +## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS. THEREFORE, +## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE +## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh. +## +## Precedence rules: +## +## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults +## +## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults +## + +# Many of the options here are built from the perspective that users +# may want to provide OVERWRITING values on the command line. +# For example: +# +# JAVA_HOME=/usr/java/testing hdfs dfs -ls +# +# Therefore, the vast majority (BUT NOT ALL!) of these defaults +# are configured for substitution and not append. If append +# is preferable, modify this file accordingly. + +### +# Generic settings for HADOOP +### + +# Technically, the only required environment variable is JAVA_HOME. +# All others are optional. However, the defaults are probably not +# preferred. Many sites configure these options outside of Hadoop, +# such as in /etc/profile.d + +# The java implementation to use. By default, this environment +# variable is REQUIRED on ALL platforms except OS X! +# export JAVA_HOME= + +# Location of Hadoop. By default, Hadoop will attempt to determine +# this location based upon its execution path. +# export HADOOP_HOME= + +# Location of Hadoop's configuration information. i.e., where this +# file is living. If this is not defined, Hadoop will attempt to +# locate it based upon its execution path. +# +# NOTE: It is recommend that this variable not be set here but in +# /etc/profile.d or equivalent. Some options (such as +# --config) may react strangely otherwise. +# +# export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop + +# The maximum amount of heap to use (Java -Xmx). If no unit +# is provided, it will be converted to MB. Daemons will +# prefer any Xmx setting in their respective _OPT variable. +# There is no default; the JVM will autoscale based upon machine +# memory size. +# export HADOOP_HEAPSIZE_MAX= + +# The minimum amount of heap to use (Java -Xms). If no unit +# is provided, it will be converted to MB. Daemons will +# prefer any Xms setting in their respective _OPT variable. +# There is no default; the JVM will autoscale based upon machine +# memory size. +# export HADOOP_HEAPSIZE_MIN= + +# Enable extra debugging of Hadoop's JAAS binding, used to set up +# Kerberos security. +# export HADOOP_JAAS_DEBUG=true + +# Extra Java runtime options for all Hadoop commands. We don't support +# IPv6 yet/still, so by default the preference is set to IPv4. +# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true" +# For Kerberos debugging, an extended option set logs more information +# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug" + +# Some parts of the shell code may do special things dependent upon +# the operating system. We have to set this here. See the next +# section as to why.... +export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} + + +# Under certain conditions, Java on OS X will throw SCDynamicStore errors +# in the system logs. +# See HADOOP-8719 for more information. If one needs Kerberos +# support on OS X, one will want to change/remove this extra bit. +case ${HADOOP_OS_TYPE} in + Darwin*) + export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= " + export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= " + export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= " + ;; +esac + +# Extra Java runtime options for some Hadoop commands +# and clients (i.e., hdfs dfs -blah). These get appended to HADOOP_OPTS for +# such commands. In most cases, # this should be left empty and +# let users supply it on the command line. +# export HADOOP_CLIENT_OPTS="" + +# +# A note about classpaths. +# +# By default, Apache Hadoop overrides Java's CLASSPATH +# environment variable. It is configured such +# that it starts out blank with new entries added after passing +# a series of checks (file/dir exists, not already listed aka +# de-deduplication). During de-deduplication, wildcards and/or +# directories are *NOT* expanded to keep it simple. Therefore, +# if the computed classpath has two specific mentions of +# awesome-methods-1.0.jar, only the first one added will be seen. +# If two directories are in the classpath that both contain +# awesome-methods-1.0.jar, then Java will pick up both versions. + +# An additional, custom CLASSPATH. Site-wide configs should be +# handled via the shellprofile functionality, utilizing the +# hadoop_add_classpath function for greater control and much +# harder for apps/end-users to accidentally override. +# Similarly, end users should utilize ${HOME}/.hadooprc . +# This variable should ideally only be used as a short-cut, +# interactive way for temporary additions on the command line. +# export HADOOP_CLASSPATH="/some/cool/path/on/your/machine" + +# Should HADOOP_CLASSPATH be first in the official CLASSPATH? +# export HADOOP_USER_CLASSPATH_FIRST="yes" + +# If HADOOP_USE_CLIENT_CLASSLOADER is set, the classpath along +# with the main jar are handled by a separate isolated +# client classloader when 'hadoop jar', 'yarn jar', or 'mapred job' +# is utilized. If it is set, HADOOP_CLASSPATH and +# HADOOP_USER_CLASSPATH_FIRST are ignored. +# export HADOOP_USE_CLIENT_CLASSLOADER=true + +# HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES overrides the default definition of +# system classes for the client classloader when HADOOP_USE_CLIENT_CLASSLOADER +# is enabled. Names ending in '.' (period) are treated as package names, and +# names starting with a '-' are treated as negative matches. For example, +# export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop." + +# Enable optional, bundled Hadoop features +# This is a comma delimited list. It may NOT be overridden via .hadooprc +# Entries may be added/removed as needed. +# export HADOOP_OPTIONAL_TOOLS="hadoop-openstack,hadoop-kafka,hadoop-azure,hadoop-azure-datalake,hadoop-aws,hadoop-aliyun" + +### +# Options for remote shell connectivity +### + +# There are some optional components of hadoop that allow for +# command and control of remote hosts. For example, +# start-dfs.sh will attempt to bring up all NNs, DNS, etc. + +# Options to pass to SSH when one of the "log into a host and +# start/stop daemons" scripts is executed +# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s" + +# The built-in ssh handler will limit itself to 10 simultaneous connections. +# For pdsh users, this sets the fanout size ( -f ) +# Change this to increase/decrease as necessary. +# export HADOOP_SSH_PARALLEL=10 + +# Filename which contains all of the hosts for any remote execution +# helper scripts # such as workers.sh, start-dfs.sh, etc. +# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers" + +### +# Options for all daemons +### +# + +# +# Many options may also be specified as Java properties. It is +# very common, and in many cases, desirable, to hard-set these +# in daemon _OPTS variables. Where applicable, the appropriate +# Java property is also identified. Note that many are re-used +# or set differently in certain contexts (e.g., secure vs +# non-secure) +# + +# Where (primarily) daemon log files are stored. +# ${HADOOP_HOME}/logs by default. +# Java property: hadoop.log.dir +# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs + +# A string representing this instance of hadoop. $USER by default. +# This is used in writing log and pid files, so keep that in mind! +# Java property: hadoop.id.str +# export HADOOP_IDENT_STRING=$USER + +# How many seconds to pause after stopping a daemon +# export HADOOP_STOP_TIMEOUT=5 + +# Where pid files are stored. /tmp by default. +# export HADOOP_PID_DIR=/tmp + +# Default log4j setting for interactive commands +# Java property: hadoop.root.logger +# export HADOOP_ROOT_LOGGER=INFO,console + +# Default log4j setting for daemons spawned explicitly by +# --daemon option of hadoop, hdfs, mapred and yarn command. +# Java property: hadoop.root.logger +# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA + +# Default log level and output location for security-related messages. +# You will almost certainly want to change this on a per-daemon basis via +# the Java property (i.e., -Dhadoop.security.logger=foo). (Note that the +# defaults for the NN and 2NN override this by default.) +# Java property: hadoop.security.logger +# export HADOOP_SECURITY_LOGGER=INFO,NullAppender + +# Default process priority level +# Note that sub-processes will also run at this level! +# export HADOOP_NICENESS=0 + +# Default name for the service level authorization file +# Java property: hadoop.policy.file +# export HADOOP_POLICYFILE="hadoop-policy.xml" + +# +# NOTE: this is not used by default! <----- +# You can define variables right here and then re-use them later on. +# For example, it is common to use the same garbage collection settings +# for all the daemons. So one could define: +# +# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps" +# +# .. and then use it as per the b option under the namenode. + +### +# Secure/privileged execution +### + +# +# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons +# on privileged ports. This functionality can be replaced by providing +# custom functions. See hadoop-functions.sh for more information. +# + +# The jsvc implementation to use. Jsvc is required to run secure datanodes +# that bind to privileged ports to provide authentication of data transfer +# protocol. Jsvc is not required if SASL is configured for authentication of +# data transfer protocol using non-privileged ports. +# export JSVC_HOME=/usr/bin + +# +# This directory contains pids for secure and privileged processes. +#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR} + +# +# This directory contains the logs for secure and privileged processes. +# Java property: hadoop.log.dir +# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR} + +# +# When running a secure daemon, the default value of HADOOP_IDENT_STRING +# ends up being a bit bogus. Therefore, by default, the code will +# replace HADOOP_IDENT_STRING with HADOOP_xx_SECURE_USER. If one wants +# to keep HADOOP_IDENT_STRING untouched, then uncomment this line. +# export HADOOP_SECURE_IDENT_PRESERVE="true" + +### +# NameNode specific parameters +### + +# Default log level and output location for file system related change +# messages. For non-namenode daemons, the Java property must be set in +# the appropriate _OPTS if one wants something other than INFO,NullAppender +# Java property: hdfs.audit.logger +# export HDFS_AUDIT_LOGGER=INFO,NullAppender + +# Specify the JVM options to be used when starting the NameNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# a) Set JMX options +# export HDFS_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026" +# +# b) Set garbage collection logs +# export HDFS_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" +# +# c) ... or set them directly +# export HDFS_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" + +# this is the default: +# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" + +### +# SecondaryNameNode specific parameters +### +# Specify the JVM options to be used when starting the SecondaryNameNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# This is the default: +# export HDFS_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" + +### +# DataNode specific parameters +### +# Specify the JVM options to be used when starting the DataNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# This is the default: +# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS" + +# On secure datanodes, user to run the datanode as after dropping privileges. +# This **MUST** be uncommented to enable secure HDFS if using privileged ports +# to provide authentication of data transfer protocol. This **MUST NOT** be +# defined if SASL is configured for authentication of data transfer protocol +# using non-privileged ports. +# This will replace the hadoop.id.str Java property in secure mode. +# export HDFS_DATANODE_SECURE_USER=hdfs + +# Supplemental options for secure datanodes +# By default, Hadoop uses jsvc which needs to know to launch a +# server jvm. +# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server" + +### +# NFS3 Gateway specific parameters +### +# Specify the JVM options to be used when starting the NFS3 Gateway. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_NFS3_OPTS="" + +# Specify the JVM options to be used when starting the Hadoop portmapper. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_PORTMAP_OPTS="-Xmx512m" + +# Supplemental options for priviliged gateways +# By default, Hadoop uses jsvc which needs to know to launch a +# server jvm. +# export HDFS_NFS3_SECURE_EXTRA_OPTS="-jvm server" + +# On privileged gateways, user to run the gateway as after dropping privileges +# This will replace the hadoop.id.str Java property in secure mode. +# export HDFS_NFS3_SECURE_USER=nfsserver + +### +# ZKFailoverController specific parameters +### +# Specify the JVM options to be used when starting the ZKFailoverController. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_ZKFC_OPTS="" + +### +# QuorumJournalNode specific parameters +### +# Specify the JVM options to be used when starting the QuorumJournalNode. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_JOURNALNODE_OPTS="" + +### +# HDFS Balancer specific parameters +### +# Specify the JVM options to be used when starting the HDFS Balancer. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_BALANCER_OPTS="" + +### +# HDFS Mover specific parameters +### +# Specify the JVM options to be used when starting the HDFS Mover. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_MOVER_OPTS="" + +### +# Router-based HDFS Federation specific parameters +# Specify the JVM options to be used when starting the RBF Routers. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_DFSROUTER_OPTS="" + +### +# HDFS StorageContainerManager specific parameters +### +# Specify the JVM options to be used when starting the HDFS Storage Container Manager. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HDFS_STORAGECONTAINERMANAGER_OPTS="" + +### +# Advanced Users Only! +### + +# +# When building Hadoop, one can add the class paths to the commands +# via this special env var: +# export HADOOP_ENABLE_BUILD_PATHS="true" + +# +# To prevent accidents, shell commands be (superficially) locked +# to only allow certain users to execute certain subcommands. +# It uses the format of (command)_(subcommand)_USER. +# +# For example, to limit who can execute the namenode command, +# export HDFS_NAMENODE_USER=hdfs +export JAVA_HOME=$JAVA_HOME +export HDFS_NAMENODE_USER=$USER +export HDFS_DATANODE_USER=$USER +export HDFS_SECONDARYNAMENODE_USER=$USER +export YARN_RESOURCEMANAGER_USER=$USER +export YARN_NODEMANAGER_USER=$USER diff --git a/travis/artifacts/hadoop32/mapred-site.xml b/travis/artifacts/hadoop32/mapred-site.xml new file mode 100755 index 000000000..2a40c21ea --- /dev/null +++ b/travis/artifacts/hadoop32/mapred-site.xml @@ -0,0 +1,53 @@ + + + + + + + + + mapreduce.framework.name + yarn + + + + mapreduce.application.classpath + $HADOOP_HOME/share/hadoop/common/*, + $HADOOP_HOME/share/hadoop/common/lib/*, + $HADOOP_HOME/share/hadoop/hdfs/*, + $HADOOP_HOME/share/hadoop/hdfs/lib/*, + $HADOOP_HOME/share/hadoop/yarn/*, + $HADOOP_HOME/share/hadoop/yarn/lib/*, + $HADOOP_HOME/share/hadoop/mapreduce/*, + $HADOOP_HOME/share/hadoop/mapreduce/lib/* + + + + + yarn.app.mapreduce.am.env + HADOOP_MAPRED_HOME=$HADOOP_HOME + + + + mapreduce.map.env + HADOOP_MAPRED_HOME=$HADOOP_HOME + + + + mapreduce.reduce.env + HADOOP_MAPRED_HOME=$HADOOP_HOME + + + \ No newline at end of file diff --git a/travis/artifacts/hadoop32/spark-env.sh b/travis/artifacts/hadoop32/spark-env.sh new file mode 100755 index 000000000..ce69d7193 --- /dev/null +++ b/travis/artifacts/hadoop32/spark-env.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is sourced when running various Spark programs. +# Copy it as spark-env.sh and edit that to configure Spark for your site. + +# Options read when launching programs locally with +# ./bin/run-example or ./bin/spark-submit +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program +# - SPARK_CLASSPATH, default classpath entries to append + +# Options read by executors and drivers running inside the cluster +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program +# - SPARK_CLASSPATH, default classpath entries to append +# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data +# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos + +# Options read in YARN client mode +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2) +# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). +# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) +# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) +# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark) +# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’) +# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job. +# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job. + +# Options for the daemons used in the standalone deploy mode +export SPARK_MASTER_IP= localhost +export SPARK_DIST_CLASSPATH=$(/opt/hadoop-3.2.0/bin/hadoop classpath) +#i, to bind the master to a different IP address or hostname +# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master +# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") +# - SPARK_WORKER_CORES, to set the number of cores to use on this machine +# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) +# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker +# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node +# - SPARK_WORKER_DIR, to set the working directory of worker processes +# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") +# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). +# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") +# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") +# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers + +# Generic options for the daemons used in the standalone deploy mode +# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) +# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) +# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) +# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) +# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) diff --git a/travis/artifacts/hadoop32/yarn-site.xml b/travis/artifacts/hadoop32/yarn-site.xml new file mode 100755 index 000000000..08255aaa8 --- /dev/null +++ b/travis/artifacts/hadoop32/yarn-site.xml @@ -0,0 +1,75 @@ + + + + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.nodemanager.aux-services.mapreduce.shuffle.class + org.apache.hadoop.mapred.ShuffleHandler + + + yarn.resourcemanager.hostname + localhost + + + yarn.resourcemanager.address + localhost:8032 + + + + yarn.nodemanager.resource.memory-mb + 7168 + + + yarn.nodemanager.resource.cpu-vcores + 2 + + + yarn.nodemanager.vmem-check-enabled + false + + + yarn.nodemanager.vmem-pmem-ratio + 2.1 + + + yarn.scheduler.minimum-allocation-mb + 1024 + + + yarn.scheduler.maximum-allocation-mb + 7168 + + + yarn.scheduler.minimum-allocation-vcores + 1 + + + yarn.scheduler.maximum-allocation-vcores + 2 + + + yarn.application.classpath + $JAVA_HOME, + $HADOOP_CONF_DIR, + $HADOOP_MAPRED_HOME + + + + diff --git a/travis/hdfs-site.xml b/travis/artifacts/hdfs-site.xml old mode 100644 new mode 100755 similarity index 100% rename from travis/hdfs-site.xml rename to travis/artifacts/hdfs-site.xml diff --git a/travis/config_hadoop_spark.sh b/travis/config_hadoop_spark.sh new file mode 100755 index 000000000..48146ef40 --- /dev/null +++ b/travis/config_hadoop_spark.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copy configuration depending on the Java, Spark and Hadoop versions. +if [[ "$java_ver" == 11 ]]; then + cp ./travis/artifacts/hadoop32/spark-env.sh /opt/$SPARK_BINARIES_FOLDER/conf/ + cp ./travis/artifacts/hadoop32/hadoop-env.sh $HADOOP_CONF_DIR + # Java 11 removed java.activation module completely. [JEP 320](http://openjdk.java.net/jeps/320) + # Including it in Hadoop in order to enable Hadoop 3.2.0 using Java 11. + URL=https://jcenter.bintray.com/javax/activation/javax.activation-api/1.2.0/javax.activation-api-1.2.0.jar + wget $URL -P $HADOOP_HOME/share/hadoop/common + echo 'export HADOOP_CLASSPATH+=" $HADOOP_HOME/share/hadoop/common/*.jar"' >> $HADOOP_CONF_DIR/hadoop-env.sh + cp ./travis/artifacts/hadoop32/mapred-site.xml $HADOOP_CONF_DIR + cp ./travis/artifacts/hadoop32/yarn-site.xml $HADOOP_CONF_DIR + sed -i "s|1.6|11|g" pom.xml + sed -i "s|1.6|11|g" pom.xml + sed -i "s|sql.scan|#sql.scan|g" ./travis/benchmarks.lst +elif [[ "$java_ver" == 8 ]]; then + cp ./travis/artifacts/hadoop32/spark-env.sh /opt/$SPARK_BINARIES_FOLDER/conf/ + cp ./travis/artifacts/hadoop32/hadoop-env.sh $HADOOP_CONF_DIR + cp ./travis/artifacts/hadoop32/mapred-site.xml $HADOOP_CONF_DIR + cp ./travis/artifacts/hadoop32/yarn-site.xml $HADOOP_CONF_DIR + sed -i "s|1.6|1.8|g" pom.xml + sed -i "s|1.6|1.8|g" pom.xml + sed -i "s|sql.scan|#sql.scan|g" ./travis/benchmarks.lst +elif [[ "$java_ver" == 7 ]]; then + cp ./travis/artifacts/hadoop26/spark-env.sh /opt/$SPARK_BINARIES_FOLDER/conf/ + cp ./travis/artifacts/hadoop26/mapred-site.xml $HADOOP_CONF_DIR + cp ./travis/artifacts/hadoop26/yarn-site.xml $HADOOP_CONF_DIR + sed -i "s|1.6|1.7|g" pom.xml + sed -i "s|1.6|1.7|g" pom.xml +else + echo "No configuration setting for this Java version" +fi + +# Copy common configuration files to hadoop conf dir +cp ./travis/artifacts/core-site.xml $HADOOP_CONF_DIR +cp ./travis/artifacts/hdfs-site.xml $HADOOP_CONF_DIR + +# Copy Hi-bench configuration +cp ./travis/hibench.conf ./conf/ +cp ./travis/benchmarks.lst ./conf/ +cp ./travis/hadoop.conf ./conf/ +cp ./travis/spark.conf ./conf/ + +# Set the actual version of hadoop and spark in Hi-bench config files +sed -i "s|/opt/|/opt/$HADOOP_BINARIES_FOLDER|g" ./conf/hadoop.conf +sed -i "s|/opt/|/opt/$SPARK_BINARIES_FOLDER/|g" ./conf/spark.conf +sed -i "s|sparkx.x|spark$(echo $SPARK_VER | cut -d. -f-2)|g" ./conf/spark.conf diff --git a/travis/hadoop.conf b/travis/hadoop.conf index 33bf6ba57..4e93d4608 100644 --- a/travis/hadoop.conf +++ b/travis/hadoop.conf @@ -1,5 +1,5 @@ # Hadoop home -hibench.hadoop.home /opt/hadoop-2.6.5 +hibench.hadoop.home /opt/ # The path of hadoop executable hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop diff --git a/travis/install_hadoop_spark.sh b/travis/install_hadoop_spark.sh new file mode 100755 index 000000000..5156368b4 --- /dev/null +++ b/travis/install_hadoop_spark.sh @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Download binaries +SPARK_URL=https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-$SPARK_PACKAGE_TYPE.tgz +HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz +echo $SPARK_URL +echo $HADOOP_URL +cd /opt +wget $SPARK_URL +wget $HADOOP_URL +# Uncompress tarballs +tar -xzf /opt/$(ls /opt | grep $SPARK_VER) +tar -xzf /opt/$(ls /opt | grep $HADOOP_VER) diff --git a/travis/jdk_ver.sh b/travis/jdk_ver.sh new file mode 100755 index 000000000..eb0216f18 --- /dev/null +++ b/travis/jdk_ver.sh @@ -0,0 +1,18 @@ +#!/bin/bash +java_cmd="$JAVA_HOME/bin/java" +IFS=$'\n' +lines=$("$java_cmd" -Xms32M -Xmx32M -version 2>&1 | tr '\r' '\n') +for line in $lines; do + if [[ (-z $result) && ($line = *"version \""*) ]] + then + ver=$(echo $line | sed -e 's/.*version "\(.*\)"\(.*\)/\1/; 1q') + if [[ $ver = "1."* ]] + then + result=$(echo $ver | sed -e 's/1\.\([0-9]*\)\(.*\)/\1/; 1q') + else + result=$(echo $ver | sed -e 's/\([0-9]*\)\(.*\)/\1/; 1q') + fi + fi +done + +echo "$result" diff --git a/travis/restart_hadoop_spark.sh b/travis/restart_hadoop_spark.sh index ff7433238..848361b3c 100755 --- a/travis/restart_hadoop_spark.sh +++ b/travis/restart_hadoop_spark.sh @@ -14,9 +14,9 @@ # limitations under the License. # stop process -/opt/hadoop-2.6.5/sbin/stop-dfs.sh -/opt/hadoop-2.6.5/sbin/stop-yarn.sh -/opt/hadoop-2.6.5/sbin/stop-all.sh +/opt/$HADOOP_BINARIES_FOLDER/sbin/stop-dfs.sh +/opt/$HADOOP_BINARIES_FOLDER/sbin/stop-yarn.sh +/opt/$HADOOP_BINARIES_FOLDER/sbin/stop-all.sh # clear data directories mkdir -p /usr/local/hdfs/namenode/ @@ -25,16 +25,19 @@ rm -fr /usr/local/hdfs/namenode/* rm -fr /usr/local/hdfs/datanode/* # remove related logs -rm -fr /opt/hadoop-2.6.5/logs/* +rm -fr /opt/$HADOOP_BINARIES_FOLDER/logs/* # hdfs format -/opt/hadoop-2.6.5/bin/hdfs namenode -format +/opt/$HADOOP_BINARIES_FOLDER/bin/hdfs namenode -format # restart hdfs -/opt/hadoop-2.6.5/sbin/start-dfs.sh +/opt/$HADOOP_BINARIES_FOLDER/sbin/start-dfs.sh # restart yarn -/opt/hadoop-2.6.5/sbin/start-yarn.sh +/opt/$HADOOP_BINARIES_FOLDER/sbin/start-yarn.sh # restart spark -/opt/spark-1.6.0-bin-hadoop2.6/sbin/start-all.sh +/opt/$SPARK_BINARIES_FOLDER/sbin/start-all.sh + +# list nodes +/opt/$HADOOP_BINARIES_FOLDER/bin/yarn node -list 2 diff --git a/travis/spark.conf b/travis/spark.conf index db2e51b0e..1ad6d25c7 100644 --- a/travis/spark.conf +++ b/travis/spark.conf @@ -1,8 +1,8 @@ # Spark home -hibench.spark.home /opt/spark-1.6.0-bin-hadoop2.6/ +hibench.spark.home /opt/ -# Spark version. Supported value: spark1.6, spark2.0, spark2.1, spark2.2 -hibench.spark.version spark1.6 +# Spark version. Supported value: spark1.6, spark2.0, spark2.1, spark2.2, spark2.4 +hibench.spark.version sparkx.x # Spark master # standalone mode: `spark://xxx:7077`