incubator-toree git commit: Updated branch information in readme
Repository: incubator-toree Updated Branches: refs/heads/master 01936c1da -> 87a9eb8ad Updated branch information in readme Project: http://git-wip-us.apache.org/repos/asf/incubator-toree/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-toree/commit/87a9eb8a Tree: http://git-wip-us.apache.org/repos/asf/incubator-toree/tree/87a9eb8a Diff: http://git-wip-us.apache.org/repos/asf/incubator-toree/diff/87a9eb8a Branch: refs/heads/master Commit: 87a9eb8ad08406ce0747e92f7714d4eb54153293 Parents: 01936c1 Author: Gino BusteloAuthored: Thu Sep 8 09:28:30 2016 -0500 Committer: Gino Bustelo Committed: Thu Sep 8 09:28:30 2016 -0500 -- README.md | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/87a9eb8a/README.md -- diff --git a/README.md b/README.md index 8024ff1..148f2ed 100644 --- a/README.md +++ b/README.md @@ -94,9 +94,8 @@ As it stands, we maintain several branches for legacy versions of Spark. The tab Branch | Apache Spark Version | -[master][master] | 1.5.1+ -[branch-0.1.4][branch-0.1.4] | 1.4.1 -[branch-0.1.3][branch-0.1.3] | 1.3.1 +[master][master] | 2.0 +[0.1.x][0.1.x] | 1.6+ Please note that for the most part, new features will mainly be added to the `master` branch. @@ -124,5 +123,4 @@ We are working on porting our documentation into Apache. For the time being, you [mail-list]: mailto:d...@toree.incubator.apache.org [master]: https://github.com/apache/incubator-toree -[branch-0.1.4]: https://github.com/apache/incubator-toree/tree/branch-0.1.4 -[branch-0.1.3]: https://github.com/apache/incubator-toree/tree/branch-0.1.3 +[0.1.x]: https://github.com/apache/incubator-toree/tree/0.1.x
[08/10] incubator-toree git commit: Squashed work for Apache Spark 2.0 and Scala 2.11
http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/01936c1d/scala-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/scala/ScalaInterpreter.scala -- diff --git a/scala-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/scala/ScalaInterpreter.scala b/scala-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/scala/ScalaInterpreter.scala index b7d40f1..33fa861 100644 --- a/scala-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/scala/ScalaInterpreter.scala +++ b/scala-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/scala/ScalaInterpreter.scala @@ -17,18 +17,17 @@ package org.apache.toree.kernel.interpreter.scala -import java.io.{BufferedReader, ByteArrayOutputStream, InputStreamReader, PrintStream} +import java.io.ByteArrayOutputStream import java.net.{URL, URLClassLoader} import java.nio.charset.Charset import java.util.concurrent.ExecutionException -import com.typesafe.config.{ConfigFactory, Config} +import com.typesafe.config.{Config, ConfigFactory} import org.apache.spark.SparkContext -import org.apache.spark.repl.{SparkCommandLine, SparkIMain, SparkJLineCompletion} -import org.apache.spark.sql.SQLContext -import org.apache.toree.global.StreamState +import org.apache.spark.sql.SparkSession +import org.apache.spark.repl.Main + import org.apache.toree.interpreter._ -import org.apache.toree.interpreter.imports.printers.{WrapperConsole, WrapperSystem} import org.apache.toree.kernel.api.{KernelLike, KernelOptions} import org.apache.toree.utils.{MultiOutputStream, TaskManager} import org.slf4j.LoggerFactory @@ -36,571 +35,306 @@ import org.slf4j.LoggerFactory import scala.annotation.tailrec import scala.concurrent.{Await, Future} import scala.language.reflectiveCalls -import scala.tools.nsc.backend.JavaPlatform -import scala.tools.nsc.interpreter.{IR, InputStream, JPrintWriter, OutputStream} -import scala.tools.nsc.io.AbstractFile -import scala.tools.nsc.util.{ClassPath, MergedClassPath} -import scala.tools.nsc.{Global, Settings, io} +import scala.tools.nsc.Settings +import scala.tools.nsc.interpreter.{IR, OutputStream} +import scala.tools.nsc.util.ClassPath import scala.util.{Try => UtilTry} -class ScalaInterpreter(private val config:Config = ConfigFactory.load) extends Interpreter { - - protected val logger = LoggerFactory.getLogger(this.getClass.getName) - - private val ExecutionExceptionName = "lastException" - protected val _thisClassloader = this.getClass.getClassLoader - - protected val _runtimeClassloader = -new URLClassLoader(Array(), _thisClassloader) { - def addJar(url: URL) = this.addURL(url) -} - protected val lastResultOut = new ByteArrayOutputStream() +class ScalaInterpreter(private val config:Config = ConfigFactory.load) extends Interpreter with ScalaInterpreterSpecific { + protected val logger = LoggerFactory.getLogger(this.getClass.getName) + protected val _thisClassloader = this.getClass.getClassLoader - protected val multiOutputStream = MultiOutputStream(List(Console.out, lastResultOut)) - private var taskManager: TaskManager = _ - var sparkIMain: SparkIMain = _ - protected var jLineCompleter: SparkJLineCompletion = _ + protected val lastResultOut = new ByteArrayOutputStream() - protected var settings: Settings = newSettings(interpreterArgs()) + protected val multiOutputStream = MultiOutputStream(List(Console.out, lastResultOut)) + private[scala] var taskManager: TaskManager = _ - settings.classpath.value = buildClasspath(_thisClassloader) - settings.embeddedDefaults(_runtimeClassloader) - - private val maxInterpreterThreads: Int = { -if(config.hasPath("max_interpreter_threads")) - config.getInt("max_interpreter_threads") -else - TaskManager.DefaultMaximumWorkers + /** Since the ScalaInterpreter can be started without a kernel, we need to ensure that we can compile things. + Adding in the default classpaths as needed. +*/ + def appendClassPath(settings: Settings): Settings = { +settings.classpath.value = buildClasspath(_thisClassloader) +settings.embeddedDefaults(_runtimeClassloader) +settings } - protected def newSparkIMain( -settings: Settings, out: JPrintWriter - ): SparkIMain = { -val s = new SparkIMain(settings, out) -s.initializeSynchronous() + protected var settings: Settings = newSettings(List()) + settings = appendClassPath(settings) -s - } - protected def newTaskManager(): TaskManager = -new TaskManager(maximumWorkers = maxInterpreterThreads) + private val maxInterpreterThreads: Int = { + if(config.hasPath("max_interpreter_threads")) + config.getInt("max_interpreter_threads") + else + TaskManager.DefaultMaximumWorkers + } - protected def newSettings(args: List[String]): Settings = -new SparkCommandLine(args).settings + protected def newTaskManager():
[03/10] incubator-toree git commit: Squashed work for Apache Spark 2.0 and Scala 2.11
http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/01936c1d/sparkr-interpreter/src/main/resources/R/pkg/R/sparkR.R -- diff --git a/sparkr-interpreter/src/main/resources/R/pkg/R/sparkR.R b/sparkr-interpreter/src/main/resources/R/pkg/R/sparkR.R deleted file mode 100644 index c445d1b..000 --- a/sparkr-interpreter/src/main/resources/R/pkg/R/sparkR.R +++ /dev/null @@ -1,360 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -.sparkREnv <- new.env() - -# Utility function that returns TRUE if we have an active connection to the -# backend and FALSE otherwise -connExists <- function(env) { - tryCatch({ -exists(".sparkRCon", envir = env) && isOpen(env[[".sparkRCon"]]) - }, - error = function(err) { -return(FALSE) - }) -} - -#' Stop the Spark context. -#' -#' Also terminates the backend this R session is connected to -sparkR.stop <- function() { - env <- .sparkREnv - if (exists(".sparkRCon", envir = env)) { -# cat("Stopping SparkR\n") -if (exists(".sparkRjsc", envir = env)) { - sc <- get(".sparkRjsc", envir = env) - callJMethod(sc, "stop") - rm(".sparkRjsc", envir = env) -} - -if (exists(".backendLaunched", envir = env)) { - callJStatic("SparkRHandler", "stopBackend") -} - -# Also close the connection and remove it from our env -conn <- get(".sparkRCon", envir = env) -close(conn) - -rm(".sparkRCon", envir = env) -rm(".scStartTime", envir = env) - } - - if (exists(".monitorConn", envir = env)) { -conn <- get(".monitorConn", envir = env) -close(conn) -rm(".monitorConn", envir = env) - } - - # Clear all broadcast variables we have - # as the jobj will not be valid if we restart the JVM - clearBroadcastVariables() - - # Clear jobj maps - clearJobjs() -} - -#' Initialize a new Spark Context. -#' -#' This function initializes a new SparkContext. -#' -#' @param master The Spark master URL. -#' @param appName Application name to register with cluster manager -#' @param sparkHome Spark Home directory -#' @param sparkEnvir Named list of environment variables to set on worker nodes. -#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors. -#' @param sparkJars Character string vector of jar files to pass to the worker nodes. -#' @param sparkPackages Character string vector of packages from spark-packages.org -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark") -#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark", -#' list(spark.executor.memory="1g")) -#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark", -#' list(spark.executor.memory="1g"), -#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"), -#' c("jarfile1.jar","jarfile2.jar")) -#'} - -sparkR.init <- function( - master = "", - appName = "SparkR", - sparkHome = Sys.getenv("SPARK_HOME"), - sparkEnvir = list(), - sparkExecutorEnv = list(), - sparkJars = "", - sparkPackages = "") { - - if (exists(".sparkRjsc", envir = .sparkREnv)) { -cat(paste("Re-using existing Spark Context.", - "Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n")) -return(get(".sparkRjsc", envir = .sparkREnv)) - } - - jars <- suppressWarnings(normalizePath(as.character(sparkJars))) - - # Classpath separator is ";" on Windows - # URI needs four /// as from http://stackoverflow.com/a/18522792 - if (.Platform$OS.type == "unix") { -uriSep <- "//" - } else { -uriSep <- "" - } - - existingPort <- Sys.getenv("EXISTING_SPARKR_BACKEND_PORT", "") - if (existingPort != "") { -backendPort <- existingPort - } else { -path <- tempfile(pattern = "backend_port") -launchBackend( -args = path, -sparkHome = sparkHome, -jars = jars, -sparkSubmitOpts = Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"), -packages = sparkPackages) -# wait atmost 100 seconds for JVM to launch -wait <- 0.1 -for (i in 1:25) { - Sys.sleep(wait) - if (file.exists(path)) { -
[02/10] incubator-toree git commit: Squashed work for Apache Spark 2.0 and Scala 2.11
http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/01936c1d/sparkr-interpreter/src/main/resources/R/pkg/inst/tests/test_rdd.R -- diff --git a/sparkr-interpreter/src/main/resources/R/pkg/inst/tests/test_rdd.R b/sparkr-interpreter/src/main/resources/R/pkg/inst/tests/test_rdd.R deleted file mode 100644 index 71aed2b..000 --- a/sparkr-interpreter/src/main/resources/R/pkg/inst/tests/test_rdd.R +++ /dev/null @@ -1,793 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -context("basic RDD functions") - -# JavaSparkContext handle -sc <- sparkR.init() - -# Data -nums <- 1:10 -rdd <- parallelize(sc, nums, 2L) - -intPairs <- list(list(1L, -1), list(2L, 100), list(2L, 1), list(1L, 200)) -intRdd <- parallelize(sc, intPairs, 2L) - -test_that("get number of partitions in RDD", { - expect_equal(numPartitions(rdd), 2) - expect_equal(numPartitions(intRdd), 2) -}) - -test_that("first on RDD", { - expect_equal(first(rdd), 1) - newrdd <- lapply(rdd, function(x) x + 1) - expect_equal(first(newrdd), 2) -}) - -test_that("count and length on RDD", { - expect_equal(count(rdd), 10) - expect_equal(length(rdd), 10) -}) - -test_that("count by values and keys", { - mods <- lapply(rdd, function(x) { x %% 3 }) - actual <- countByValue(mods) - expected <- list(list(0, 3L), list(1, 4L), list(2, 3L)) - expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - - actual <- countByKey(intRdd) - expected <- list(list(2L, 2L), list(1L, 2L)) - expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) -}) - -test_that("lapply on RDD", { - multiples <- lapply(rdd, function(x) { 2 * x }) - actual <- collect(multiples) - expect_equal(actual, as.list(nums * 2)) -}) - -test_that("lapplyPartition on RDD", { - sums <- lapplyPartition(rdd, function(part) { sum(unlist(part)) }) - actual <- collect(sums) - expect_equal(actual, list(15, 40)) -}) - -test_that("mapPartitions on RDD", { - sums <- mapPartitions(rdd, function(part) { sum(unlist(part)) }) - actual <- collect(sums) - expect_equal(actual, list(15, 40)) -}) - -test_that("flatMap() on RDDs", { - flat <- flatMap(intRdd, function(x) { list(x, x) }) - actual <- collect(flat) - expect_equal(actual, rep(intPairs, each=2)) -}) - -test_that("filterRDD on RDD", { - filtered.rdd <- filterRDD(rdd, function(x) { x %% 2 == 0 }) - actual <- collect(filtered.rdd) - expect_equal(actual, list(2, 4, 6, 8, 10)) - - filtered.rdd <- Filter(function(x) { x[[2]] < 0 }, intRdd) - actual <- collect(filtered.rdd) - expect_equal(actual, list(list(1L, -1))) - - # Filter out all elements. - filtered.rdd <- filterRDD(rdd, function(x) { x > 10 }) - actual <- collect(filtered.rdd) - expect_equal(actual, list()) -}) - -test_that("lookup on RDD", { - vals <- lookup(intRdd, 1L) - expect_equal(vals, list(-1, 200)) - - vals <- lookup(intRdd, 3L) - expect_equal(vals, list()) -}) - -test_that("several transformations on RDD (a benchmark on PipelinedRDD)", { - rdd2 <- rdd - for (i in 1:12) -rdd2 <- lapplyPartitionsWithIndex( - rdd2, function(partIndex, part) { -part <- as.list(unlist(part) * partIndex + i) - }) - rdd2 <- lapply(rdd2, function(x) x + x) - actual <- collect(rdd2) - expected <- list(24, 24, 24, 24, 24, - 168, 170, 172, 174, 176) - expect_equal(actual, expected) -}) - -test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkpoint()", { - # RDD - rdd2 <- rdd - # PipelinedRDD - rdd2 <- lapplyPartitionsWithIndex( -rdd2, -function(partIndex, part) { - part <- as.list(unlist(part) * partIndex) -}) - - cache(rdd2) - expect_true(rdd2@env$isCached) - rdd2 <- lapply(rdd2, function(x) x) - expect_false(rdd2@env$isCached) - - unpersist(rdd2) - expect_false(rdd2@env$isCached) - - persist(rdd2, "MEMORY_AND_DISK") - expect_true(rdd2@env$isCached) - rdd2 <- lapply(rdd2, function(x) x) - expect_false(rdd2@env$isCached) - - unpersist(rdd2) - expect_false(rdd2@env$isCached) - - tempDir <- tempfile(pattern = "checkpoint") - setCheckpointDir(sc, tempDir) - checkpoint(rdd2) -
[04/10] incubator-toree git commit: Squashed work for Apache Spark 2.0 and Scala 2.11
http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/01936c1d/sparkr-interpreter/src/main/resources/R/pkg/R/generics.R -- diff --git a/sparkr-interpreter/src/main/resources/R/pkg/R/generics.R b/sparkr-interpreter/src/main/resources/R/pkg/R/generics.R deleted file mode 100644 index 43dd8d2..000 --- a/sparkr-interpreter/src/main/resources/R/pkg/R/generics.R +++ /dev/null @@ -1,985 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - RDD Actions and Transformations - -# @rdname aggregateRDD -# @seealso reduce -# @export -setGeneric("aggregateRDD", - function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") }) - -# @rdname cache-methods -# @export -setGeneric("cache", function(x) { standardGeneric("cache") }) - -# @rdname coalesce -# @seealso repartition -# @export -setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") }) - -# @rdname checkpoint-methods -# @export -setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") }) - -# @rdname collect-methods -# @export -setGeneric("collect", function(x, ...) { standardGeneric("collect") }) - -# @rdname collect-methods -# @export -setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") }) - -# @rdname collect-methods -# @export -setGeneric("collectPartition", - function(x, partitionId) { - standardGeneric("collectPartition") - }) - -# @rdname count -# @export -setGeneric("count", function(x) { standardGeneric("count") }) - -# @rdname countByValue -# @export -setGeneric("countByValue", function(x) { standardGeneric("countByValue") }) - -# @rdname statfunctions -# @export -setGeneric("crosstab", function(x, col1, col2) { standardGeneric("crosstab") }) - -# @rdname distinct -# @export -setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") }) - -# @rdname filterRDD -# @export -setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") }) - -# @rdname first -# @export -setGeneric("first", function(x) { standardGeneric("first") }) - -# @rdname flatMap -# @export -setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") }) - -# @rdname fold -# @seealso reduce -# @export -setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") }) - -# @rdname foreach -# @export -setGeneric("foreach", function(x, func) { standardGeneric("foreach") }) - -# @rdname foreach -# @export -setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachPartition") }) - -# The jrdd accessor function. -setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") }) - -# @rdname glom -# @export -setGeneric("glom", function(x) { standardGeneric("glom") }) - -# @rdname keyBy -# @export -setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") }) - -# @rdname lapplyPartition -# @export -setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") }) - -# @rdname lapplyPartitionsWithIndex -# @export -setGeneric("lapplyPartitionsWithIndex", - function(X, FUN) { - standardGeneric("lapplyPartitionsWithIndex") - }) - -# @rdname lapply -# @export -setGeneric("map", function(X, FUN) { standardGeneric("map") }) - -# @rdname lapplyPartition -# @export -setGeneric("mapPartitions", function(X, FUN) { standardGeneric("mapPartitions") }) - -# @rdname lapplyPartitionsWithIndex -# @export -setGeneric("mapPartitionsWithIndex", - function(X, FUN) { standardGeneric("mapPartitionsWithIndex") }) - -# @rdname maximum -# @export -setGeneric("maximum", function(x) { standardGeneric("maximum") }) - -# @rdname minimum -# @export -setGeneric("minimum", function(x) { standardGeneric("minimum") }) - -# @rdname sumRDD -# @export -setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") }) - -# @rdname name -# @export -setGeneric("name", function(x) { standardGeneric("name") }) - -# @rdname numPartitions -# @export -setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") }) - -# @rdname persist -# @export -setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) - -#