Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Scala 2.13 compatible build #860

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ services:

language: scala
scala:
- 2.12.18
- 2.13.11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we'll need to have both a scala 2.12 and a scala 2.13 distribution installed in travis (so the cross builds can run). Refer to travis docs for how to do this.

jdk:
- openjdk11

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ object HadoopBundleFileSystem {
}

def createSchemes(config: Config): Seq[String] = if (config.hasPath("schemes")) {
config.getStringList("schemes").asScala
config.getStringList("schemes").asScala.toSeq
} else { Seq("hdfs") }
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ trait JsonSupport {
obj.ds.foreach(ds => fb += ("data_shape" -> ds.toJson))
obj.m.foreach(m => fb += ("model" -> m.toJson))

JsObject(fb.result(): _*)
JsObject(fb.result().toSeq: _*)
}

override def read(json: JsValue): Scalar = json match {
Expand Down Expand Up @@ -220,7 +220,7 @@ trait JsonSupport {
if(obj.ds.nonEmpty) { fb += ("data_shape" -> obj.ds.toJson) }
if(obj.m.nonEmpty) { fb += ("model" -> obj.m.toJson) }

JsObject(fb.result(): _*)
JsObject(fb.result().toSeq: _*)
}

override def read(json: JsValue): List = json match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ object FormatModelSerializer {

/** Object for serializing/deserializing model definitions with JSON.
*/
case class JsonFormatModelSerializer(implicit hr: HasBundleRegistry) extends FormatModelSerializer {
case class JsonFormatModelSerializer()(implicit hr: HasBundleRegistry) extends FormatModelSerializer {
override def write(path: Path, model: Model): Unit = {
Files.write(path, model.asBundle.toJson.prettyPrint.getBytes("UTF-8"))
}
Expand All @@ -55,7 +55,7 @@ case class JsonFormatModelSerializer(implicit hr: HasBundleRegistry) extends For

/** Object for serializing/deserializing model definitions with Protobuf.
*/
case class ProtoFormatModelSerializer(implicit hr: HasBundleRegistry) extends FormatModelSerializer {
case class ProtoFormatModelSerializer()(implicit hr: HasBundleRegistry) extends FormatModelSerializer {
override def write(path: Path, model: Model): Unit = {
Files.write(path, model.asBundle.toByteArray)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import java.nio.file.{FileVisitResult, Files, FileSystems, Path, SimpleFileVisit
import java.util.Comparator
import java.util.stream.Collectors
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
import scala.jdk.CollectionConverters.iterableAsScalaIterableConverter
import scala.jdk.CollectionConverters._
import scala.util.{Failure, Success, Try, Using}
/**
* Created by hollinwilkins on 12/24/16.
Expand Down Expand Up @@ -46,7 +46,7 @@ object FileUtil {
.sorted(Comparator.reverseOrder())
.collect(Collectors.toList())
.asScala
.map(removeElement)
.map(removeElement(_))
.toArray
}.getOrElse(Array.empty)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ class DefaultFrameReader extends FrameReader {
var rows = mutable.Seq[Row]()
while(Try(reader.hasNext).getOrElse(false)) {
record = reader.next(record)
val row = ArrayRow(new Array[Any](schema.fields.length))
val row = ArrayRow((new Array[Any](schema.fields.length)).toSeq)
for(i <- schema.fields.indices) { row.set(i, readers(i)(record.get(i))) }
rows :+= row
}

DefaultLeapFrame(schema, rows)
DefaultLeapFrame(schema, rows.toSeq)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class DefaultRowReader(override val schema: StructType) extends RowReader {
override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[Row] = Try {
decoder = DecoderFactory.get().binaryDecoder(bytes, decoder)
record = datumReader.read(record, decoder)
val row = ArrayRow(new Array[Any](schema.fields.length))
val row = ArrayRow((new Array[Any](schema.fields.length)).toSeq)
for(i <- schema.fields.indices) { row.set(i, readers(i)(record.get(i))) }
row
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ object SchemaConverter {
(implicit context: MleapContext): StructType = schema.getType match {
case Schema.Type.RECORD =>
val fields = schema.getFields.asScala.map(avroToMleapField)
StructType(fields).get
StructType(fields.toSeq).get
case _ => throw new IllegalArgumentException("invalid avro record type")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,21 @@ case class ValueConverter() {

tt.base match {
case BasicType.Boolean =>
Tensor.create(values.asInstanceOf[java.util.List[Boolean]].asScala.toArray, dimensions, indices)
Tensor.create(values.asInstanceOf[java.util.List[Boolean]].asScala.toArray, dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case BasicType.Byte =>
Tensor.create(values.asInstanceOf[ByteBuffer].array(), dimensions, indices)
Tensor.create(values.asInstanceOf[ByteBuffer].array(), dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case BasicType.Short =>
Tensor.create(values.asInstanceOf[java.util.List[Int]].asScala.map(_.toShort).toArray, dimensions, indices)
Tensor.create(values.asInstanceOf[java.util.List[Int]].asScala.map(_.toShort).toArray, dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case BasicType.Int =>
Tensor.create(values.asInstanceOf[java.util.List[Int]].asScala.toArray, dimensions, indices)
Tensor.create(values.asInstanceOf[java.util.List[Int]].asScala.toArray, dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case BasicType.Long =>
Tensor.create(values.asInstanceOf[java.util.List[Long]].asScala.toArray, dimensions, indices)
Tensor.create(values.asInstanceOf[java.util.List[Long]].asScala.toArray, dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case BasicType.Float =>
Tensor.create(values.asInstanceOf[java.util.List[Float]].asScala.toArray, dimensions, indices)
Tensor.create(values.asInstanceOf[java.util.List[Float]].asScala.toArray, dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case BasicType.Double =>
Tensor.create(values.asInstanceOf[java.util.List[Double]].asScala.toArray, dimensions, indices)
Tensor.create(values.asInstanceOf[java.util.List[Double]].asScala.toArray, dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case BasicType.String =>
Tensor.create(values.asInstanceOf[java.util.List[String]].asScala.toArray, dimensions, indices)
Tensor.create(values.asInstanceOf[java.util.List[String]].asScala.toArray, dimensions.toSeq, indices.map(_.toSeq.map(_.toSeq)))
case tpe => throw new IllegalArgumentException(s"invalid base type for tensor $tpe")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import org.apache.spark.ml.linalg.mleap.VectorUtil._
case class VectorSlicerModel(indices: Array[Int],
namedIndices: Array[(String, Int)] = Array(),
inputSize: Int) extends Model {
val allIndices: Array[Int] = indices.union(namedIndices.map(_._2))
val allIndices: Array[Int] = indices.union(namedIndices.map(_._2)).toArray

def apply(features: Vector): Vector = features match {
case features: DenseVector => Vectors.dense(allIndices.map(features.apply))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ case class WordToVectorModel(wordIndex: Map[String, Int],
wordIndex.map { case (word, ind) =>
(word, wordVectors.slice(vectorSize * ind, vectorSize * ind + vectorSize))
}
}.mapValues(Vectors.dense).map(identity)
}.mapValues(Vectors.dense).map(identity).toMap

def apply(sentence: Seq[String]): Vector = {
if (sentence.isEmpty) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,6 @@ object MultiRepositoryProvider extends RepositoryProvider {
Repository.fromConfig(rConfig)
}

new MultiRepository(repositories)
new MultiRepository(repositories.toSeq)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class LeapFrameBuilderSupport {
def createByteString(): BasicType = BasicType.ByteString

def createTensorDimensions(dims : java.util.List[Integer]): Option[Seq[Int]] = {
Some(dims.asScala.map(_.intValue()))
Some(dims.asScala.toSeq.map(_.intValue()))
}

def createSchema(json: String): StructType = json.parseJson.convertTo[StructType]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ class LeapFrameSupport {
}

def select(frame: DefaultLeapFrame, fieldNames: java.util.List[String]): DefaultLeapFrame = {
frame.select(fieldNames.asScala: _*).get
frame.select(fieldNames.asScala.toSeq: _*).get
}

def drop(frame: DefaultLeapFrame, names: java.util.List[String]): DefaultLeapFrame = {
frame.drop(names.asScala: _*).get
frame.drop(names.asScala.toSeq: _*).get
}

def getFields(schema: StructType): java.util.List[StructField] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class DefaultFrameReader extends FrameReader {
rows(i) = row
}

DefaultLeapFrame(schema, rows)
DefaultLeapFrame(schema, rows.toSeq)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class DefaultRowReader(override val schema: StructType) extends RowReader {
override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[Row] = {
Using(new ByteArrayInputStream(bytes)) { in =>
val din = new DataInputStream(in)
val row = ArrayRow(new Array[Any](schema.fields.length))
val row = ArrayRow((new Array[Any](schema.fields.length)).toSeq)
var i = 0
for(s <- serializers) {
row.set(i, s.read(din))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ case class ArrayRow(values: mutable.WrappedArray[Any]) extends Row {
override def withValue(value: Any): ArrayRow = ArrayRow(values :+ value)
override def withValues(values: Seq[Any]): ArrayRow = ArrayRow(this.values ++ values)

override def selectIndices(indices: Int *): ArrayRow = ArrayRow(indices.toArray.map(values))
override def selectIndices(indices: Int *): ArrayRow = ArrayRow(indices.toArray.map(values).toSeq)

override def dropIndices(indices: Int *): ArrayRow = {
val drops = Set(indices: _*)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ object Row {
* @param values values in the row
* @return default row implementation with values
*/
def apply(values: Any *): Row = ArrayRow(values.toArray)
def apply(values: Any *): Row = ArrayRow(values.toArray.toSeq)
}

/** Base trait for row data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ case class RowTransformer private (inputSchema: StructType,
def transformOption(row: Row): Option[ArrayRow] = {
val arr = new Array[Any](maxSize)
row.toArray.copyToArray(arr)
val arrRow = ArrayRow(arr)
val arrRow = ArrayRow(arr.toSeq)

val r = transforms.foldLeft(Option(arrRow)) {
(r, transform) => r.flatMap(transform)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import java.util.UUID
import ml.combust.mleap.core.annotation.SparkCode

import scala.collection.JavaConverters._
import scala.collection.parallel.CollectionConverters._

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@db-scnakandala There is an error while importing has this has been moved in Scala 2.13 to separate module [scala/scala-parallel-collection]
Need to update the library dependency such as for scala 2.13 onwards.
libraryDependencies += "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.0"

This is where one build is failing.
[error] /home/travis/build/combust/mleap/mleap-spark-extension/src/main/scala/org/apache/spark/ml/mleap/classification/OneVsRest.scala:26:34: object CollectionConverters is not a member of package scala.collection.parallel [error] import scala.collection.parallel.CollectionConverters._

import scala.language.existentials
import org.apache.hadoop.fs.Path
import org.json4s.{DefaultFormats, JObject, _}
Expand Down Expand Up @@ -355,7 +356,7 @@ final class OneVsRest @Since("1.4.0") (
}

// create k columns, one for each binary classifier.
val models = Range(0, numClasses).par.map { index =>
val models = (0 until numClasses).par.map { index =>
// generate new label metadata for the binary problem.
val newLabelMeta = BinaryAttribute.defaultAttr.withName("label").toMetadata()
val labelColName = "mc2b$" + index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import java.nio.file.Files
import java.util.zip.ZipInputStream
import scala.jdk.CollectionConverters._
import scala.collection.mutable
import scala.reflect.ClassTag
import scala.util.Try

/**
Expand All @@ -28,7 +29,7 @@ case class TensorflowModel( @transient var graph: Option[tensorflow.Graph] = Non
) extends Model with AutoCloseable {

def apply(values: Tensor[_] *): Seq[Any] = {
val garbage: mutable.ArrayBuilder[tensorflow.Tensor] = mutable.ArrayBuilder.make[tensorflow.Tensor]()
val garbage: mutable.ArrayBuilder[tensorflow.Tensor] = mutable.ArrayBuilder.make[tensorflow.Tensor]

val result = Try {
val tensors = values.zip(inputs).map {
Expand Down
6 changes: 4 additions & 2 deletions project/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ object Common {
lazy val defaultSettings = buildSettings ++ sonatypeSettings

lazy val buildSettings: Seq[Def.Setting[_]] = Seq(
scalaVersion := "2.12.18",
scalaVersion := "2.13.11",
crossScalaVersions := Seq("2.12.13", "2.13.11"),
scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature"),
ThisBuild / libraryDependencySchemes +=
"org.scala-lang.modules" %% "scala-collection-compat" % VersionScheme.Always,
Expand All @@ -31,7 +32,8 @@ object Common {
} else {
Seq()
}
}
},
resolvers += "XGBoost4J Snapshot Repo" at "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/snapshot/"
)

lazy val mleapSettings: Seq[Def.Setting[_]] = Seq(organization := "ml.combust.mleap")
Expand Down
12 changes: 7 additions & 5 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ object Dependencies {
lazy val awsSdkVersion = "1.12.470"
lazy val scalaCollectionCompat = "2.8.1"
val tensorflowJavaVersion = "0.5.0" // Match Tensorflow 2.10.1 https://github.com/tensorflow/java/#tensorflow-version-support
val xgboostVersion = "1.7.6"
val xgboostVersion = "2.0.0-SNAPSHOT"
val breezeVersion = "2.1.0"
val hadoopVersion = "3.3.4" // matches spark version
val platforms = "windows-x86_64,linux-x86_64,macosx-x86_64"
Expand All @@ -37,7 +37,9 @@ object Dependencies {
)
val avroDep = "org.apache.avro" % "avro" % "1.11.1"
val sprayJson = "io.spray" %% "spray-json" % "1.3.6"
val config = "com.typesafe" % "config" % "1.4.2"
// https://github.com/jsuereth/scala-arm/issues/79
val arm = "com.michaelpollmeier" %% "scala-arm" % "2.1"
val config = "com.typesafe" % "config" % "1.3.0"
val scalaReflect = ScalaVersionDependentModuleID.versioned("org.scala-lang" % "scala-reflect" % _)
val scalaTest = "org.scalatest" %% "scalatest" % scalaTestVersion
val jTransform = "com.github.rwl" % "jtransforms" % "2.4.0" exclude("junit", "junit")
Expand Down Expand Up @@ -80,8 +82,8 @@ object Dependencies {

val breeze = "org.scalanlp" %% "breeze" % breezeVersion

val xgboostDep = "ml.dmlc" %% "xgboost4j" % xgboostVersion
val xgboostSparkDep = "ml.dmlc" %% "xgboost4j-spark" % xgboostVersion
val xgboostDep = "ml.dmlc" %% "xgboost4j" % xgboostVersion exclude("org.scala-lang.modules", "scala-collection-compat_2.12")
val xgboostSparkDep = "ml.dmlc" %% "xgboost4j-spark" % xgboostVersion exclude("org.scala-lang.modules", "scala-collection-compat_2.12") exclude("ml.dmlc", "xgboost4j_2.12")
val xgboostPredictorDep = "ai.h2o" % "xgboost-predictor" % "0.3.18" exclude("com.esotericsoftware.kryo", "kryo")

val hadoop = "org.apache.hadoop" % "hadoop-client" % hadoopVersion
Expand Down Expand Up @@ -136,7 +138,7 @@ object Dependencies {

val xgboostRuntime = l ++= Seq(xgboostDep) ++ Seq(xgboostPredictorDep) ++ Test.spark ++ Test.sparkTest ++ Seq(Test.scalaTest)

val xgboostSpark = l ++= Seq(xgboostSparkDep) ++ Provided.spark ++ Test.spark ++ Test.sparkTest
val xgboostSpark = l ++= Seq(xgboostDep) ++ Seq(xgboostSparkDep) ++ Provided.spark ++ Test.spark ++ Test.sparkTest

val serving = l ++= Seq(akkaHttp, akkaHttpSprayJson, config, Test.scalaTest, Test.akkaHttpTestkit)

Expand Down