[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user zsxwing commented on a diff in the pull request: https://github.com/apache/spark/pull/9922#discussion_r45803191 --- Diff: streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala --- @@ -59,18 +74,32 @@ private[python] class TransformFunction(@transient var pfunc: PythonTransformFun extends function.Function2[JList[JavaRDD[_]], Time, JavaRDD[Array[Byte]]] { def apply(rdd: Option[RDD[_]], time: Time): Option[RDD[Array[Byte]]] = { -Option(pfunc.call(time.milliseconds, List(rdd.map(JavaRDD.fromRDD(_)).orNull).asJava)) - .map(_.rdd) +val resultRDD = pfunc.call(time.milliseconds, List(rdd.map(JavaRDD.fromRDD(_)).orNull).asJava) +val failure = pfunc.getLastFailure +if (failure != null) { + throw new SparkException("An exception was raised by Python:\n" + failure) +} +Option(resultRDD).map(_.rdd) } def apply(rdd: Option[RDD[_]], rdd2: Option[RDD[_]], time: Time): Option[RDD[Array[Byte]]] = { val rdds = List(rdd.map(JavaRDD.fromRDD(_)).orNull, rdd2.map(JavaRDD.fromRDD(_)).orNull).asJava -Option(pfunc.call(time.milliseconds, rdds)).map(_.rdd) +val resultRDD = pfunc.call(time.milliseconds, rdds) +val failure = pfunc.getLastFailure +if (failure != null) { + throw new SparkException("An exception was raised by Python:\n" + failure) +} +Option(resultRDD).map(_.rdd) } // for function.Function2 def call(rdds: JList[JavaRDD[_]], time: Time): JavaRDD[Array[Byte]] = { --- End diff -- This one is not used in any place. So I cannot write a test for it. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45803244 --- Diff: core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala --- @@ -208,46 +193,52 @@ private[netty] class NettyRpcEnv( clientFactory.createClient(address.host, address.port) } - private[netty] def ask(message: RequestMessage): Future[Any] = { + private[netty] def ask[T: ClassTag](message: RequestMessage, timeout: RpcTimeout): Future[T] = { val promise = Promise[Any]() val remoteAddr = message.receiver.address + +def onFailure(e: Throwable): Unit = { + if (!promise.tryFailure(e)) { +logWarning(s"Ignored failure: $e") + } +} + +def onSuccess(reply: Any): Unit = reply match { + case RpcFailure(e) => onFailure(e) + case rpcReply => +if (!promise.trySuccess(rpcReply)) { + logWarning(s"Ignored message: $reply") +} +} + if (remoteAddr == address) { val p = Promise[Any]() - dispatcher.postLocalMessage(message, p) p.future.onComplete { -case Success(response) => - val reply = response.asInstanceOf[AskResponse] - if (reply.reply.isInstanceOf[RpcFailure]) { -if (!promise.tryFailure(reply.reply.asInstanceOf[RpcFailure].e)) { - logWarning(s"Ignore failure: ${reply.reply}") -} - } else if (!promise.trySuccess(reply.reply)) { -logWarning(s"Ignore message: ${reply}") - } -case Failure(e) => - if (!promise.tryFailure(e)) { -logWarning("Ignore Exception", e) - } +case Success(response) => onSuccess(response) +case Failure(e) => onFailure(e) }(ThreadUtils.sameThread) + dispatcher.postLocalMessage(message, p) } else { - postToOutbox(message.receiver, OutboxMessage(serialize(message), -(e) => { - if (!promise.tryFailure(e)) { -logWarning("Ignore Exception", e) - } -}, -(client, response) => { - val reply = deserialize[AskResponse](client, response) - if (reply.reply.isInstanceOf[RpcFailure]) { -if (!promise.tryFailure(reply.reply.asInstanceOf[RpcFailure].e)) { - logWarning(s"Ignore failure: ${reply.reply}") -} - } else if (!promise.trySuccess(reply.reply)) { -logWarning(s"Ignore message: ${reply}") - } -})) + val rpcMessage = RpcOutboxMessage(serialize(message), +onFailure, +(client, response) => onSuccess(deserialize[Any](client, response))) + postToOutbox(message.receiver, rpcMessage) + promise.future.onFailure { +case _: TimeoutException => rpcMessage.onTimeout() --- End diff -- I see, it's because you don't have the `requestId` here anymore now that you moved it into the message class. I also found the pattern `message.send(client)` kind of strange, but it does simplify the code a little so it's probably OK to keep. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11140][CORE] Transfer files using netwo...
Github user vanzin commented on the pull request: https://github.com/apache/spark/pull/9947#issuecomment-159427177 I'll revert all the doc changes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45806519 --- Diff: network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java --- @@ -48,10 +54,40 @@ public abstract void receive( public abstract StreamManager getStreamManager(); /** + * Receives an RPC message that does not expect a reply. The default implementation will + * call "{@link receive(TransportClient, byte[], RpcResponseCallback}" and log a warning if + * any of the callback methods are called. + * + * @param client A channel client which enables the handler to make requests back to the sender + * of this RPC. This will always be the exact same object for a particular channel. + * @param message The serialized bytes of the RPC. + */ + public void receive(TransportClient client, byte[] message) { +receive(client, message, ONE_WAY_CALLBACK); + } + + /** * Invoked when the connection associated with the given client has been invalidated. * No further requests will come from this client. */ public void connectionTerminated(TransportClient client) { } public void exceptionCaught(Throwable cause, TransportClient client) { } + + private static class OneWayRpcCallback implements RpcResponseCallback { + +private final Logger logger = LoggerFactory.getLogger(OneWayRpcCallback.class); + +@Override +public void onSuccess(byte[] response) { + logger.warn("Response provided for one-way RPC."); --- End diff -- isn't this going to be really noisy for non-netty `RpcHandler`s? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11783][SQL] Fixes execution Hive client...
Github user yhuai commented on the pull request: https://github.com/apache/spark/pull/9895#issuecomment-159432435 I am going to merge it to master and branch 1.6. I will modify the comment to point to hive's code and doc. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11969] [SQL] [PYSPARK] visualization of...
GitHub user davies opened a pull request: https://github.com/apache/spark/pull/9949 [SPARK-11969] [SQL] [PYSPARK] visualization of SQL query for pyspark Currently, we does not have visualization for SQL query from Python, this PR fix that. cc @zsxwing You can merge this pull request into a Git repository by running: $ git pull https://github.com/davies/spark pyspark_sql_ui Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/9949.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #9949 commit e3fe7c25ec418248d155208de7d2febd6ce284ba Author: Davies LiuDate: 2015-11-24T23:47:21Z visualization of SQL query for pyspark --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user zsxwing commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159423701 Just fixed other overloads of `TransformFunction` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45802966 --- Diff: core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala --- @@ -208,46 +193,52 @@ private[netty] class NettyRpcEnv( clientFactory.createClient(address.host, address.port) } - private[netty] def ask(message: RequestMessage): Future[Any] = { + private[netty] def ask[T: ClassTag](message: RequestMessage, timeout: RpcTimeout): Future[T] = { val promise = Promise[Any]() val remoteAddr = message.receiver.address + +def onFailure(e: Throwable): Unit = { + if (!promise.tryFailure(e)) { +logWarning(s"Ignored failure: $e") + } +} + +def onSuccess(reply: Any): Unit = reply match { + case RpcFailure(e) => onFailure(e) + case rpcReply => +if (!promise.trySuccess(rpcReply)) { + logWarning(s"Ignored message: $reply") +} +} + if (remoteAddr == address) { val p = Promise[Any]() - dispatcher.postLocalMessage(message, p) p.future.onComplete { -case Success(response) => - val reply = response.asInstanceOf[AskResponse] - if (reply.reply.isInstanceOf[RpcFailure]) { -if (!promise.tryFailure(reply.reply.asInstanceOf[RpcFailure].e)) { - logWarning(s"Ignore failure: ${reply.reply}") -} - } else if (!promise.trySuccess(reply.reply)) { -logWarning(s"Ignore message: ${reply}") - } -case Failure(e) => - if (!promise.tryFailure(e)) { -logWarning("Ignore Exception", e) - } +case Success(response) => onSuccess(response) +case Failure(e) => onFailure(e) }(ThreadUtils.sameThread) + dispatcher.postLocalMessage(message, p) } else { - postToOutbox(message.receiver, OutboxMessage(serialize(message), -(e) => { - if (!promise.tryFailure(e)) { -logWarning("Ignore Exception", e) - } -}, -(client, response) => { - val reply = deserialize[AskResponse](client, response) - if (reply.reply.isInstanceOf[RpcFailure]) { -if (!promise.tryFailure(reply.reply.asInstanceOf[RpcFailure].e)) { - logWarning(s"Ignore failure: ${reply.reply}") -} - } else if (!promise.trySuccess(reply.reply)) { -logWarning(s"Ignore message: ${reply}") - } -})) + val rpcMessage = RpcOutboxMessage(serialize(message), +onFailure, +(client, response) => onSuccess(deserialize[Any](client, response))) + postToOutbox(message.receiver, rpcMessage) + promise.future.onFailure { +case _: TimeoutException => rpcMessage.onTimeout() --- End diff -- this doesn't seem like a method that belongs to a message. Isn't it clearer if you just do `client.removeRpc(rpcMessage)` here? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10621][SQL] Consistent naming for funct...
Github user yhuai commented on the pull request: https://github.com/apache/spark/pull/9948#issuecomment-159423533 LGTM --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159427209 **[Test build #46624 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46624/consoleFull)** for PR 9922 at commit [`6f56c36`](https://github.com/apache/spark/commit/6f56c362146c9a144d6a80b4531ee186c30c5acc). * This patch passes all tests. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159427419 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159427420 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46624/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11805] free the array in UnsafeExternal...
Github user JoshRosen commented on a diff in the pull request: https://github.com/apache/spark/pull/9793#discussion_r45804928 --- Diff: core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java --- @@ -489,10 +495,6 @@ public void loadNext() throws IOException { } upstream = nextUpstream; nextUpstream = null; - - assert(inMemSorter != null); --- End diff -- Ah, I see that `getSortedIterator()`'s contract specifies that the caller should call `cleanupResources()` after consuming the iterator: ``` /** * Returns a sorted iterator. It is the caller's responsibility to call `cleanupResources()` * after consuming this iterator. */ ``` Even if we're merging an in-memory iterator with a bunch of on-disk spills, there isn't an advantage to trying to free the in-memory iterator's array as soon as we hit the end of that in-memory iterator, since in expectation I think that we would hit the end of that iterator at about the same time that we hit the end of the other iterators / the merged iterator as a whole. Therefore, LGTM. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11805] free the array in UnsafeExternal...
Github user JoshRosen commented on the pull request: https://github.com/apache/spark/pull/9793#issuecomment-159427506 Thanks for clarifying my question upthread. This looks good to me, so I'm going to merge to master and branch-1.6. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10266][Documentation, ML] Fixed @Since ...
Github user noel-smith commented on the pull request: https://github.com/apache/spark/pull/9338#issuecomment-159428812 Hi @jkbradley - the version numbers look OK - but I've been unable to get the Scala API docs building in my environment (not just this branch - I get the same problems on master) - so I can't fully verify it. If you can review it locally - that would probably be quicker in this case. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11140][CORE] Transfer files using netwo...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9947#issuecomment-159433435 **[Test build #46634 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46634/consoleFull)** for PR 9947 at commit [`bea3fda`](https://github.com/apache/spark/commit/bea3fdaa6d801c6c8955a0896c8c27995528f247). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11871] Add save/load for MLPC
Github user avulanov commented on the pull request: https://github.com/apache/spark/pull/9854#issuecomment-159433478 LGTM --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159436709 **[Test build #46639 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46639/consoleFull)** for PR 9922 at commit [`d9935d5`](https://github.com/apache/spark/commit/d9935d5d2cb6a8a95893e9ade23e8de0c3e386ce). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159445339 **[Test build #46645 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46645/consoleFull)** for PR 9922 at commit [`d9935d5`](https://github.com/apache/spark/commit/d9935d5d2cb6a8a95893e9ade23e8de0c3e386ce). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11821] Propagate Kerberos keytab for al...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9859#issuecomment-159447241 **[Test build #46625 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46625/consoleFull)** for PR 9859 at commit [`7802c0c`](https://github.com/apache/spark/commit/7802c0ca3cf4fb6d19180dbda48291d5a9f50bf5). * This patch passes all tests. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11934] [SQL] Adding joinType into joinW...
Github user gatorsmile commented on the pull request: https://github.com/apache/spark/pull/9921#issuecomment-159447230 Sure, Will do it! Thank you very much! --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45802883 --- Diff: core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala --- @@ -22,13 +22,51 @@ import javax.annotation.concurrent.GuardedBy import scala.util.control.NonFatal -import org.apache.spark.SparkException +import org.apache.spark.{Logging, SparkException} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.rpc.RpcAddress -private[netty] case class OutboxMessage(content: Array[Byte], +private[netty] sealed trait OutboxMessage { + + def send(client: TransportClient): Unit + + def onFailure(e: Throwable): Unit + +} + +private[netty] case class OneWayOutboxMessage(content: Array[Byte]) extends OutboxMessage + with Logging { + + override def send(client: TransportClient): Unit = { +client.send(content) + } + + override def onFailure(e: Throwable): Unit = { +logWarning(s"Failed to send one-way RPC.", e) + } + +} + +private[netty] case class RpcOutboxMessage(content: Array[Byte], _onFailure: (Throwable) => Unit, - _onSuccess: (TransportClient, Array[Byte]) => Unit) { + _onSuccess: (TransportClient, Array[Byte]) => Unit) extends OutboxMessage { + + private var client: TransportClient = _ + private var requestId: Long = _ + + override def send(client: TransportClient): Unit = { +this.client = client +this.requestId = client.sendRpc(content, createCallback(client)) + } + + override def onFailure(e: Throwable): Unit = { +_onFailure(e) + } + + def onTimeout(): Unit = { +require(client != null, "TransportClient has not yet been set.") +client.removeRpcRequest(requestId) + } def createCallback(client: TransportClient): RpcResponseCallback = new RpcResponseCallback() { --- End diff -- this can be private now that you only use it in here --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45803744 --- Diff: core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala --- @@ -92,7 +92,11 @@ private[deploy] class ExecutorRunner( process.destroy() exitCode = Some(process.waitFor()) } -worker.send(ExecutorStateChanged(appId, execId, state, message, exitCode)) +try { + worker.send(ExecutorStateChanged(appId, execId, state, message, exitCode)) +} catch { + case e: IllegalStateException => logWarning(e.getMessage(), e) --- End diff -- any particular reason why we add this guard here? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11805] free the array in UnsafeExternal...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/9793 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159429461 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10387][ML][WIP] Add code gen for gbt
Github user dbtsai commented on a diff in the pull request: https://github.com/apache/spark/pull/9524#discussion_r45806031 --- Diff: mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala --- @@ -85,7 +85,7 @@ final class CategoricalSplit private[ml] ( /** * If true, then "categories" is the set of categories for splitting to the left, and vice versa. */ - private val isLeft: Boolean = _leftCategories.length <= numCategories / 2 + private[ml] val isLeft: Boolean = _leftCategories.length <= numCategories / 2 /** Set of categories determining the splitting rule, along with [[isLeft]]. */ private val categories: Set[Double] = { --- End diff -- We use `BitSet` in our system. I know `BitSet` takes fast constant, while `HashSet` takes effectively longer constant time depending on the length of elements. Also, `HashSet` will box the element, and `BitSet` stores as primitive elements. But I don't benchmark myself. Agree, this can be a separate PR. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159429463 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46631/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11783][SQL] Fixes execution Hive client...
Github user yhuai commented on the pull request: https://github.com/apache/spark/pull/9895#issuecomment-159430968 I tested it with remote metastore setup. It indeed fixes the issue. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9917#issuecomment-159436295 **[Test build #46636 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46636/consoleFull)** for PR 9917 at commit [`cb64959`](https://github.com/apache/spark/commit/cb64959d332d2184d63a13000a9da45acab3348c). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159437601 **[Test build #46630 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46630/consoleFull)** for PR 9922 at commit [`199dc6f`](https://github.com/apache/spark/commit/199dc6fd9604cba67ff169fa67e8aff5251dcf6d). * This patch passes all tests. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159437786 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46630/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11967][SQL] Consistent use of varargs f...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9945#issuecomment-159440852 **[Test build #46641 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46641/consoleFull)** for PR 9945 at commit [`f97193c`](https://github.com/apache/spark/commit/f97193cadfcd6602a3237c7e3dc8afcddce948ca). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11967][SQL] Consistent use of varargs f...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9945#issuecomment-159442061 **[Test build #46642 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46642/consoleFull)** for PR 9945 at commit [`c824f24`](https://github.com/apache/spark/commit/c824f24681de9d52dfb415fa6556d06846d04b7b). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11969] [SQL] [PYSPARK] visualization of...
Github user zsxwing commented on the pull request: https://github.com/apache/spark/pull/9949#issuecomment-159441993 Did you test `saveXXX` methods? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10911] Executors should System.exit on ...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9946#issuecomment-159443182 **[Test build #46627 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46627/consoleFull)** for PR 9946 at commit [`2869d21`](https://github.com/apache/spark/commit/2869d2122d14169587b8753910dc0ebd07d43674). * This patch passes all tests. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10911] Executors should System.exit on ...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9946#issuecomment-159443343 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11140][CORE] Transfer files using netwo...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9947#issuecomment-159428993 **[Test build #46633 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46633/consoleFull)** for PR 9947 at commit [`8849d5d`](https://github.com/apache/spark/commit/8849d5d76453f8d1af40c8c528fdd6e996d0cbf3). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45807174 --- Diff: network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java --- @@ -48,10 +54,40 @@ public abstract void receive( public abstract StreamManager getStreamManager(); /** + * Receives an RPC message that does not expect a reply. The default implementation will + * call "{@link receive(TransportClient, byte[], RpcResponseCallback}" and log a warning if + * any of the callback methods are called. + * + * @param client A channel client which enables the handler to make requests back to the sender + * of this RPC. This will always be the exact same object for a particular channel. + * @param message The serialized bytes of the RPC. + */ + public void receive(TransportClient client, byte[] message) { +receive(client, message, ONE_WAY_CALLBACK); + } + + /** * Invoked when the connection associated with the given client has been invalidated. * No further requests will come from this client. */ public void connectionTerminated(TransportClient client) { } public void exceptionCaught(Throwable cause, TransportClient client) { } + + private static class OneWayRpcCallback implements RpcResponseCallback { + +private final Logger logger = LoggerFactory.getLogger(OneWayRpcCallback.class); + +@Override +public void onSuccess(byte[] response) { + logger.warn("Response provided for one-way RPC."); --- End diff -- `NettyRpcEnv` is the only user of `TransportClient.send(byte[])`. You'll only get these logs if: - You do not override `receive(TransportClient client, byte[] message)` - Your implementation of the other `receive()` method sets a response for one-way messages. I could just as well just make `receive(TransportClient client, byte[] message)` throw an exception instead of having this default handler. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11140][CORE] Transfer files using netwo...
Github user zsxwing commented on the pull request: https://github.com/apache/spark/pull/9947#issuecomment-159431775 LGTM --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11783][SQL] Fixes execution Hive client...
Github user yhuai commented on the pull request: https://github.com/apache/spark/pull/9895#issuecomment-159432314 https://cwiki.apache.org/confluence/display/Hive/AdminManual+MetastoreAdmin does mention that `If hive.metastore.uris is empty local mode is assumed, remote otherwise`. Also https://github.com/apache/hive/blob/release-1.2.1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java#L2694-L2699 shows that if metastore uri is set, the mode will not be local. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-7889] [CORE] WiP HistoryServer cache of...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/6935#issuecomment-159433880 **[Test build #46635 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46635/consoleFull)** for PR 6935 at commit [`8ba44a2`](https://github.com/apache/spark/commit/8ba44a252d9be78b1c7175c39dee6f66a9af4b39). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45808289 --- Diff: core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala --- @@ -92,7 +92,11 @@ private[deploy] class ExecutorRunner( process.destroy() exitCode = Some(process.waitFor()) } -worker.send(ExecutorStateChanged(appId, execId, state, message, exitCode)) +try { + worker.send(ExecutorStateChanged(appId, execId, state, message, exitCode)) +} catch { + case e: IllegalStateException => logWarning(e.getMessage(), e) --- End diff -- Without this, the exception will bubble up and cause errors to show up in the output when running unit tests; this happens mostly when running in `local-cluster` mode, which is used a lot in tests. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-7889] [Core] WiP History Caching: xu ti...
Github user steveloughran closed the pull request at: https://github.com/apache/spark/pull/9913 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159437784 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10911] Executors should System.exit on ...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9946#issuecomment-159443345 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46627/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11947][SQL] Mark deprecated methods wit...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9930#issuecomment-159446018 **[Test build #2106 has started](https://amplab.cs.berkeley.edu/jenkins/job/NewSparkPullRequestBuilder/2106/consoleFull)** for PR 9930 at commit [`a2b4f33`](https://github.com/apache/spark/commit/a2b4f3390acf6224e29c815d10815b9911f0897f). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11821] Propagate Kerberos keytab for al...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9859#issuecomment-159447342 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11896] [SQL] Support Sample in Dataset ...
Github user gatorsmile closed the pull request at: https://github.com/apache/spark/pull/9877 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11896] [SQL] Support Sample in Dataset ...
Github user gatorsmile commented on the pull request: https://github.com/apache/spark/pull/9877#issuecomment-159447494 close this one and will merge it to https://github.com/apache/spark/pull/9921 Thank you! --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11821] Propagate Kerberos keytab for al...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9859#issuecomment-159447346 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46625/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10621][SQL] Consistent naming for funct...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9948#issuecomment-159426040 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10621][SQL] Consistent naming for funct...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9948#issuecomment-159426043 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46629/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10621][SQL] Consistent naming for funct...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9948#issuecomment-159426027 **[Test build #46629 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46629/consoleFull)** for PR 9948 at commit [`1cca462`](https://github.com/apache/spark/commit/1cca4628321cbde60a32959fde5004bff26f6cf0). * This patch **fails Python style tests**. * This patch merges cleanly. * This patch adds the following public classes _(experimental)_:\n * `case class GetStructField(child: Expression, ordinal: Int, name: Option[String] = None)`\n * `case class PrettyAttribute(name: String, dataType: DataType = NullType)`\n --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11954][SQL][WIP] Encoder for JavaBeans ...
Github user rxin commented on the pull request: https://github.com/apache/spark/pull/9937#issuecomment-159425958 I think Java beans is different from pojo. Beans only care about fields that have getters and setters, while pojo should care about all the fiedls. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11946][SQL] Audit pivot API for 1.6.
Github user aray commented on a diff in the pull request: https://github.com/apache/spark/pull/9929#discussion_r45806267 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala --- @@ -282,74 +282,96 @@ class GroupedData protected[sql]( } /** -* (Scala-specific) Pivots a column of the current [[DataFrame]] and preform the specified -* aggregation. -* {{{ -* // Compute the sum of earnings for each year by course with each course as a separate column -* df.groupBy($"year").pivot($"course", "dotNET", "Java").agg(sum($"earnings")) -* // Or without specifying column values -* df.groupBy($"year").pivot($"course").agg(sum($"earnings")) -* }}} -* @param pivotColumn Column to pivot -* @param values Optional list of values of pivotColumn that will be translated to columns in the -* output data frame. If values are not provided the method with do an immediate -* call to .distinct() on the pivot column. -* @since 1.6.0 -*/ - @scala.annotation.varargs - def pivot(pivotColumn: Column, values: Column*): GroupedData = groupType match { -case _: GroupedData.PivotType => - throw new UnsupportedOperationException("repeated pivots are not supported") -case GroupedData.GroupByType => - val pivotValues = if (values.nonEmpty) { -values.map { - case Column(literal: Literal) => literal - case other => -throw new UnsupportedOperationException( - s"The values of a pivot must be literals, found $other") -} - } else { -// This is to prevent unintended OOM errors when the number of distinct values is large -val maxValues = df.sqlContext.conf.getConf(SQLConf.DATAFRAME_PIVOT_MAX_VALUES) -// Get the distinct values of the column and sort them so its consistent -val values = df.select(pivotColumn) - .distinct() - .sort(pivotColumn) - .map(_.get(0)) - .take(maxValues + 1) - .map(Literal(_)).toSeq -if (values.length > maxValues) { - throw new RuntimeException( -s"The pivot column $pivotColumn has more than $maxValues distinct values, " + - "this could indicate an error. " + - "If this was intended, set \"" + SQLConf.DATAFRAME_PIVOT_MAX_VALUES.key + "\" " + - s"to at least the number of distinct values of the pivot column.") -} -values - } - new GroupedData(df, groupingExprs, GroupedData.PivotType(pivotColumn.expr, pivotValues)) -case _ => - throw new UnsupportedOperationException("pivot is only supported after a groupBy") + * Pivots a column of the current [[DataFrame]] and preform the specified aggregation. + * There are two versions of pivot function: one that requires the caller to specify the list + * of distinct values to pivot on, and one that does not. The latter is more concise but less + * efficient, because Spark needs to first compute the list of distinct values internally. + * + * {{{ + * // Compute the sum of earnings for each year by course with each course as a separate column + * df.groupBy("year").pivot("course", Seq("dotNET", "Java")).sum("earnings") + * + * // Or without specifying column values (less efficient) + * df.groupBy("year").pivot("course").sum("earnings") + * }}} + * + * @param pivotColumn Name of the column to pivot. + * @since 1.6.0 + */ + def pivot(pivotColumn: String): GroupedData = { +// This is to prevent unintended OOM errors when the number of distinct values is large +val maxValues = df.sqlContext.conf.getConf(SQLConf.DATAFRAME_PIVOT_MAX_VALUES) +// Get the distinct values of the column and sort them so its consistent +val values = df.select(pivotColumn) + .distinct() + .sort(pivotColumn) --- End diff -- The sort is there to ensure that the output columns are in a consistent logical order. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11783][SQL] Fixes execution Hive client...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/9895 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11967][SQL] Consistent use of varargs f...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9945#issuecomment-159435308 Build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11783][SQL] Fixes execution Hive client...
Github user yhuai commented on the pull request: https://github.com/apache/spark/pull/9895#issuecomment-159435129 Merged. @liancheng You can find my updated comments in https://github.com/apache/spark/commit/c7f95df5c6d8eb2e6f11cf58b704fea34326a5f2. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11967][SQL] Consistent use of varargs f...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9945#issuecomment-159435227 **[Test build #46623 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46623/consoleFull)** for PR 9945 at commit [`01897a8`](https://github.com/apache/spark/commit/01897a8c549e47362c40e5f918c67437c0a9c6ee). * This patch **fails PySpark unit tests**. * This patch **does not merge cleanly**. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11967][SQL] Consistent use of varargs f...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9945#issuecomment-159435314 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46623/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11967][SQL] Consistent use of varargs f...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9945#issuecomment-159438072 **[Test build #46638 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46638/consoleFull)** for PR 9945 at commit [`bd9a538`](https://github.com/apache/spark/commit/bd9a53827b5b31cfe32ab7999f4629df4ae4a548). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11914] [SQL] Support coalesce and repar...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/9899 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159444025 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159444027 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46639/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11955][SQL] Mark one side fields in mer...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9940#issuecomment-159443882 **[Test build #46626 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46626/consoleFull)** for PR 9940 at commit [`e24529d`](https://github.com/apache/spark/commit/e24529d0e1fc61aff5dbd331444b20dfc4d09c09). * This patch **fails Spark unit tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11955][SQL] Mark one side fields in mer...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9940#issuecomment-159443945 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159443980 **[Test build #46639 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46639/consoleFull)** for PR 9922 at commit [`d9935d5`](https://github.com/apache/spark/commit/d9935d5d2cb6a8a95893e9ade23e8de0c3e386ce). * This patch **fails Spark unit tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11955][SQL] Mark one side fields in mer...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9940#issuecomment-159443948 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46626/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11140][CORE] Transfer files using netwo...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9947#issuecomment-159445741 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11934] [SQL] Adding joinType into joinW...
Github user rxin commented on a diff in the pull request: https://github.com/apache/spark/pull/9921#discussion_r45814672 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala --- @@ -520,6 +523,25 @@ class Dataset[T] private[sql]( } } + /** + * Using inner equi-join to join this [[Dataset]] returning a [[Tuple2]] for each pair + * where `condition` evaluates to true + * + * @since 1.6.0 + */ + def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] = { +joinWith(other, condition, "inner") + } + + /** + * Joins this [[Dataset]] returning a [[Tuple2]] for each pair using cartesian join + * + * Note that cartesian joins are very expensive without an extra filter that can be pushed down. + * + * @since 1.6.0 + */ + def joinWith[U](other: Dataset[U]): Dataset[(T, U)] = joinWith (other, lit(true), "inner") --- End diff -- Actually I'd maybe just remove this for now -- since cartesian joins are too expensive. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10621][SQL] Consistent naming for funct...
Github user rxin commented on the pull request: https://github.com/apache/spark/pull/9948#issuecomment-159421906 cc @davies --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10621][SQL] Consistent naming for funct...
GitHub user rxin opened a pull request: https://github.com/apache/spark/pull/9948 [SPARK-10621][SQL] Consistent naming for functions in SQL, Python, Scala You can merge this pull request into a Git repository by running: $ git pull https://github.com/rxin/spark SPARK-10621 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/9948.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #9948 commit 5adbde98c2cf8d1c17707f772f44758a29b7765c Author: Reynold XinDate: 2015-11-24T22:09:09Z [SPARK-10621][SQL] Consistent naming for functions in SQL, Python, Scala --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45803809 --- Diff: core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala --- @@ -106,44 +106,39 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv) extends Logging { val iter = endpoints.keySet().iterator() while (iter.hasNext) { val name = iter.next - postMessage( -name, -_ => message, -() => { logWarning(s"Drop $message because $name has been stopped") }) + postMessage(name, message, +(e) => logWarning(s"Message $message dropped.", e)) } } /** Posts a message sent by a remote endpoint. */ def postRemoteMessage(message: RequestMessage, callback: RpcResponseCallback): Unit = { -def createMessage(sender: NettyRpcEndpointRef): InboxMessage = { - val rpcCallContext = -new RemoteNettyRpcCallContext( - nettyEnv, sender, callback, message.senderAddress, message.needReply) - ContentMessage(message.senderAddress, message.content, message.needReply, rpcCallContext) -} - -def onEndpointStopped(): Unit = { - callback.onFailure( -new SparkException(s"Could not find ${message.receiver.name} or it has been stopped")) -} +val rpcCallContext = + new RemoteNettyRpcCallContext(nettyEnv, callback, message.senderAddress) +val rpcMessage = RpcMessage(message.senderAddress, message.content, rpcCallContext) +postMessage(message.receiver.name, rpcMessage, + (e) => callback.onFailure(e)) --- End diff -- this probably fits on previous line --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159425286 **[Test build #46630 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46630/consoleFull)** for PR 9922 at commit [`199dc6f`](https://github.com/apache/spark/commit/199dc6fd9604cba67ff169fa67e8aff5251dcf6d). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45805486 --- Diff: network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java --- @@ -48,10 +54,40 @@ public abstract void receive( public abstract StreamManager getStreamManager(); /** + * Receives an RPC message that does not expect a reply. The default implementation will + * call "{@link receive(TransportClient, byte[], RpcResponseCallback}" and log a warning if + * any of the callback methods are called. + * + * @param client A channel client which enables the handler to make requests back to the sender + * of this RPC. This will always be the exact same object for a particular channel. + * @param message The serialized bytes of the RPC. + */ + public void receive(TransportClient client, byte[] message) { +receive(client, message, ONE_WAY_CALLBACK); + } + + /** * Invoked when the connection associated with the given client has been invalidated. * No further requests will come from this client. */ public void connectionTerminated(TransportClient client) { } public void exceptionCaught(Throwable cause, TransportClient client) { } + + private static class OneWayRpcCallback implements RpcResponseCallback { + +private final Logger logger = LoggerFactory.getLogger(OneWayRpcCallback.class); + +@Override +public void onSuccess(byte[] response) { + logger.warn("Response provided for one-way RPC."); --- End diff -- what does this mean? Do we need to do anything `onSuccess`? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11946][SQL] Audit pivot API for 1.6.
Github user rxin commented on a diff in the pull request: https://github.com/apache/spark/pull/9929#discussion_r45806695 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala --- @@ -282,74 +282,96 @@ class GroupedData protected[sql]( } /** -* (Scala-specific) Pivots a column of the current [[DataFrame]] and preform the specified -* aggregation. -* {{{ -* // Compute the sum of earnings for each year by course with each course as a separate column -* df.groupBy($"year").pivot($"course", "dotNET", "Java").agg(sum($"earnings")) -* // Or without specifying column values -* df.groupBy($"year").pivot($"course").agg(sum($"earnings")) -* }}} -* @param pivotColumn Column to pivot -* @param values Optional list of values of pivotColumn that will be translated to columns in the -* output data frame. If values are not provided the method with do an immediate -* call to .distinct() on the pivot column. -* @since 1.6.0 -*/ - @scala.annotation.varargs - def pivot(pivotColumn: Column, values: Column*): GroupedData = groupType match { -case _: GroupedData.PivotType => - throw new UnsupportedOperationException("repeated pivots are not supported") -case GroupedData.GroupByType => - val pivotValues = if (values.nonEmpty) { -values.map { - case Column(literal: Literal) => literal - case other => -throw new UnsupportedOperationException( - s"The values of a pivot must be literals, found $other") -} - } else { -// This is to prevent unintended OOM errors when the number of distinct values is large -val maxValues = df.sqlContext.conf.getConf(SQLConf.DATAFRAME_PIVOT_MAX_VALUES) -// Get the distinct values of the column and sort them so its consistent -val values = df.select(pivotColumn) - .distinct() - .sort(pivotColumn) - .map(_.get(0)) - .take(maxValues + 1) - .map(Literal(_)).toSeq -if (values.length > maxValues) { - throw new RuntimeException( -s"The pivot column $pivotColumn has more than $maxValues distinct values, " + - "this could indicate an error. " + - "If this was intended, set \"" + SQLConf.DATAFRAME_PIVOT_MAX_VALUES.key + "\" " + - s"to at least the number of distinct values of the pivot column.") -} -values - } - new GroupedData(df, groupingExprs, GroupedData.PivotType(pivotColumn.expr, pivotValues)) -case _ => - throw new UnsupportedOperationException("pivot is only supported after a groupBy") + * Pivots a column of the current [[DataFrame]] and preform the specified aggregation. + * There are two versions of pivot function: one that requires the caller to specify the list + * of distinct values to pivot on, and one that does not. The latter is more concise but less + * efficient, because Spark needs to first compute the list of distinct values internally. + * + * {{{ + * // Compute the sum of earnings for each year by course with each course as a separate column + * df.groupBy("year").pivot("course", Seq("dotNET", "Java")).sum("earnings") + * + * // Or without specifying column values (less efficient) + * df.groupBy("year").pivot("course").sum("earnings") + * }}} + * + * @param pivotColumn Name of the column to pivot. + * @since 1.6.0 + */ + def pivot(pivotColumn: String): GroupedData = { +// This is to prevent unintended OOM errors when the number of distinct values is large +val maxValues = df.sqlContext.conf.getConf(SQLConf.DATAFRAME_PIVOT_MAX_VALUES) +// Get the distinct values of the column and sort them so its consistent +val values = df.select(pivotColumn) + .distinct() + .sort(pivotColumn) --- End diff -- ok thanks - i'm going to add a comment there to explain. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-7889] [CORE] WiP HistoryServer cache of...
Github user steveloughran commented on the pull request: https://github.com/apache/spark/pull/6935#issuecomment-159434408 This is a converged patch which 1. Splits out all caching into `ApplicationCache`, for isolation and testing. 1. Now asks the history server (which then asks the HistoryProvider) to see if an app has changed. If so, it triggers a refresh. 1. Includes the metrics counting requests and timing probe/getAppUI calls, so when a descendant of #9571 goes in, the cache is instrumented. Those counters are used in the tests to check internal cache state BTW, not without immediate use. 1. The test there is test at the cache level, with a stub implementation of the operations the history server has to do. Its incomplete (need one to verify that on cache eviction the UIs are detached). There's nothing for spark UI tests (there's the template above there), or the REST API --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-7889] [CORE] WiP HistoryServer cache of...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/6935#issuecomment-159434711 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46635/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-7889] [CORE] WiP HistoryServer cache of...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/6935#issuecomment-159434703 **[Test build #46635 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46635/consoleFull)** for PR 6935 at commit [`8ba44a2`](https://github.com/apache/spark/commit/8ba44a252d9be78b1c7175c39dee6f66a9af4b39). * This patch **fails Scala style tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-7889] [CORE] WiP HistoryServer cache of...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/6935#issuecomment-159434709 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user zsxwing commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159435771 retest this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user zsxwing commented on the pull request: https://github.com/apache/spark/pull/9922#issuecomment-159444967 retest this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11905] [SQL] Support Persist/Cache and ...
Github user rxin commented on the pull request: https://github.com/apache/spark/pull/9889#issuecomment-159444815 cc @marmbrus I will let you merge this one. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11934] [SQL] Adding joinType into joinW...
Github user rxin commented on a diff in the pull request: https://github.com/apache/spark/pull/9921#discussion_r45814178 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala --- @@ -17,6 +17,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.functions._ --- End diff -- sort the imports properly --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11934] [SQL] Adding joinType into joinW...
Github user rxin commented on the pull request: https://github.com/apache/spark/pull/9921#issuecomment-159444860 OK filed: https://issues.apache.org/jira/browse/SPARK-11970 Can you merge the sample change into this one, and also add "show" support? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11866] [network] [core] Make sure timed...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9917#discussion_r45803882 --- Diff: core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala --- @@ -106,44 +106,39 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv) extends Logging { val iter = endpoints.keySet().iterator() while (iter.hasNext) { val name = iter.next - postMessage( -name, -_ => message, -() => { logWarning(s"Drop $message because $name has been stopped") }) + postMessage(name, message, +(e) => logWarning(s"Message $message dropped.", e)) } } /** Posts a message sent by a remote endpoint. */ def postRemoteMessage(message: RequestMessage, callback: RpcResponseCallback): Unit = { -def createMessage(sender: NettyRpcEndpointRef): InboxMessage = { - val rpcCallContext = -new RemoteNettyRpcCallContext( - nettyEnv, sender, callback, message.senderAddress, message.needReply) - ContentMessage(message.senderAddress, message.content, message.needReply, rpcCallContext) -} - -def onEndpointStopped(): Unit = { - callback.onFailure( -new SparkException(s"Could not find ${message.receiver.name} or it has been stopped")) -} +val rpcCallContext = + new RemoteNettyRpcCallContext(nettyEnv, callback, message.senderAddress) +val rpcMessage = RpcMessage(message.senderAddress, message.content, rpcCallContext) +postMessage(message.receiver.name, rpcMessage, + (e) => callback.onFailure(e)) + } -postMessage(message.receiver.name, createMessage, onEndpointStopped) + /** Posts a one-way message sent by a remote endpoint. */ + def postRemoteMessage(message: RequestMessage): Unit = { +postMessage(message.receiver.name, OneWayMessage(message.senderAddress, message.content), + (e) => throw e) } /** Posts a message sent by a local endpoint. */ def postLocalMessage(message: RequestMessage, p: Promise[Any]): Unit = { -def createMessage(sender: NettyRpcEndpointRef): InboxMessage = { - val rpcCallContext = -new LocalNettyRpcCallContext(sender, message.senderAddress, message.needReply, p) - ContentMessage(message.senderAddress, message.content, message.needReply, rpcCallContext) -} - -def onEndpointStopped(): Unit = { - p.tryFailure( -new SparkException(s"Could not find ${message.receiver.name} or it has been stopped")) -} +val rpcCallContext = + new LocalNettyRpcCallContext(message.senderAddress, p) +val rpcMessage = RpcMessage(message.senderAddress, message.content, rpcCallContext) +postMessage(message.receiver.name, rpcMessage, + (e) => p.tryFailure(e)) + } -postMessage(message.receiver.name, createMessage, onEndpointStopped) + /** Posts a one-way message sent by a local endpoint. */ + def postLocalMessage(message: RequestMessage): Unit = { +postMessage(message.receiver.name, OneWayMessage(message.senderAddress, message.content), + (e) => throw e) --- End diff -- the body of this method is identical to `postRemoteMessage`. Should we just have a common `postMessage` method? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10621][SQL] Consistent naming for funct...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9948#issuecomment-159425493 **[Test build #46629 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46629/consoleFull)** for PR 9948 at commit [`1cca462`](https://github.com/apache/spark/commit/1cca4628321cbde60a32959fde5004bff26f6cf0). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11206] Support SQL UI on the history se...
Github user vanzin commented on the pull request: https://github.com/apache/spark/pull/9297#issuecomment-159436370 retest this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11826][MLlib] Reimplement add() and sub...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9916#issuecomment-159442668 **[Test build #46644 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46644/consoleFull)** for PR 9916 at commit [`5e9f012`](https://github.com/apache/spark/commit/5e9f01231f7295cba880c288b958d9b12be12fc7). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11140][CORE] Transfer files using netwo...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9947#issuecomment-159445745 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/46628/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11934] [SQL] Adding joinType into joinW...
Github user rxin commented on a diff in the pull request: https://github.com/apache/spark/pull/9921#discussion_r45814648 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala --- @@ -520,6 +523,25 @@ class Dataset[T] private[sql]( } } + /** + * Using inner equi-join to join this [[Dataset]] returning a [[Tuple2]] for each pair + * where `condition` evaluates to true + * + * @since 1.6.0 + */ + def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] = { +joinWith(other, condition, "inner") + } + + /** + * Joins this [[Dataset]] returning a [[Tuple2]] for each pair using cartesian join + * + * Note that cartesian joins are very expensive without an extra filter that can be pushed down. + * + * @since 1.6.0 + */ + def joinWith[U](other: Dataset[U]): Dataset[(T, U)] = joinWith (other, lit(true), "inner") --- End diff -- remove the extra space --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11934] [SQL] Adding joinType into joinW...
Github user rxin commented on a diff in the pull request: https://github.com/apache/spark/pull/9921#discussion_r45814607 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala --- @@ -493,11 +495,12 @@ class Dataset[T] private[sql]( * * @since 1.6.0 */ - def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] = { + def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)] = { --- End diff -- you need to update the documentation to explain what options are available for joinType. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11140][CORE] Transfer files using netwo...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9947#issuecomment-159445631 **[Test build #46628 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46628/consoleFull)** for PR 9947 at commit [`08eb202`](https://github.com/apache/spark/commit/08eb2020c6e7e4049dd70310dfff2d38c9c1df5f). * This patch passes all tests. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [WIP][SPARK-11602] [MLlib] Refine visibility f...
Github user yu-iskw commented on a diff in the pull request: https://github.com/apache/spark/pull/9939#discussion_r45815661 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala --- @@ -33,7 +33,7 @@ import org.apache.spark.rdd.RDD @Since("1.6.0") @Experimental class BisectingKMeansModel @Since("1.6.0") ( -@Since("1.6.0") val root: ClusteringTreeNode +@Since("1.6.0")private[clustering] val root: ClusteringTreeNode --- End diff -- Sorry. It's not public parameter. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [Spark-8426] [scheduler] enhance blacklist mec...
Github user squito commented on a diff in the pull request: https://github.com/apache/spark/pull/8760#discussion_r45793155 --- Diff: core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala --- @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler + +import org.apache.spark.SparkFunSuite +import org.scalatest.BeforeAndAfter +import org.apache.spark.SparkConf +import org.apache.spark.TaskEndReason +import org.apache.spark.Success +import org.apache.spark.ExceptionFailure +import org.apache.spark.LocalSparkContext +import org.apache.spark.SparkContext +import org.mockito.Mockito.{when, spy} +import org.apache.spark.util.ManualClock + +class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfter with LocalSparkContext { + + val FAILURE: TaskEndReason = new ExceptionFailure( + "Fake", + "fake failure", + Array.empty[StackTraceElement], + "fake stack trace", + None, + None) + + val stage1 = 1 + val stage2 = 2 + // Variable name can indicate basic information of taskInfo + // The format is "taskInfo_executorId_taskIndex_hostName" + val taskInfo_1_1_hostA = new TaskInfo(1L, 1, 1, 0L, "1", "hostA", TaskLocality.ANY, false) + val taskInfo_1_2_hostA = new TaskInfo(2L, 2, 1, 0L, "1", "hostA", TaskLocality.ANY, false) + val taskInfo_2_1_hostA = new TaskInfo(3L, 1, 1, 0L, "2", "hostA", TaskLocality.ANY, false) + val taskInfo_2_2_hostA = new TaskInfo(4L, 2, 1, 0L, "2", "hostA", TaskLocality.ANY, false) + val taskInfo_3_3_hostB = new TaskInfo(5L, 3, 1, 0L, "3", "hostB", TaskLocality.ANY, false) + + val clock = new ManualClock(0) + + test ("expireExecutorsInBlacklist works") { +// expire time is set to 6s +val conf = new SparkConf().setAppName("test").setMaster("local") + .set("spark.ui.enabled", "false") + .set("spark.scheduler.executorTaskBlacklistTime", "6000") + +sc = new SparkContext(conf) +val scheduler = new TaskSchedulerImpl(sc, 1) + +val tracker = new BlacklistTracker(conf, clock) +// Executor 1 into blacklist at Time 00:00:00 +tracker.updateFailureExecutors(stage1, taskInfo_1_1_hostA, FAILURE) +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set("1")) + +clock.setTime(2000) +tracker.expireExecutorsInBlackList() +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set("1")) +// Executor 1 failed again at Time 00::00:02 +tracker.updateFailureExecutors(stage1, taskInfo_1_1_hostA, FAILURE) + +clock.setTime(3000) +// Executor 2 failed at Time 00:00:03 +tracker.updateFailureExecutors(stage1, taskInfo_2_1_hostA, FAILURE) +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set("1", "2")) + +clock.setTime(6000) +tracker.expireExecutorsInBlackList() +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set("1", "2")) + +clock.setTime(8000) +tracker.expireExecutorsInBlackList() +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set("2")) + +clock.setTime(1) +tracker.expireExecutorsInBlackList() +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set()) + } + + test("blacklist feature is off by default") { +val conf = new SparkConf().setAppName("test").setMaster("local") + .set("spark.ui.enabled", "false") +sc = new SparkContext(conf) + +val scheduler = new TaskSchedulerImpl(sc, 1) + +val tracker = new BlacklistTracker(conf, clock) +tracker.updateFailureExecutors(stage1, taskInfo_1_1_hostA, FAILURE) +tracker.updateFailureExecutors(stage1, taskInfo_2_1_hostA, FAILURE) +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set()) +assert(tracker.executorBlacklist(scheduler, stage1, 1) === Set()) + +
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user tdas commented on a diff in the pull request: https://github.com/apache/spark/pull/9922#discussion_r45793158 --- Diff: python/pyspark/streaming/tests.py --- @@ -408,13 +408,17 @@ def test_failed_func(self): input_stream = self.ssc.queueStream(input) def failed_func(i): -raise ValueError("failed") +raise ValueError("This is a special error") input_stream.map(failed_func).pprint() self.ssc.start() try: self.ssc.awaitTerminationOrTimeout(10) except: +import traceback +traceback.print_exc() --- End diff -- do you need the `print_exc()` in the test? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11967][SQL] Use varargs for multiple pa...
GitHub user rxin opened a pull request: https://github.com/apache/spark/pull/9945 [SPARK-11967][SQL] Use varargs for multiple paths in DataFrameReader This patch makes it consistent to use varargs in all DataFrameReader methods, including Parquet, JSON, text, and the generic load function. Also added a few more API tests for the Java API. You can merge this pull request into a Git repository by running: $ git pull https://github.com/rxin/spark SPARK-11967 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/9945.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #9945 commit 01897a8c549e47362c40e5f918c67437c0a9c6ee Author: Reynold XinDate: 2015-11-24T20:52:09Z [SPARK-11967][SQL] Use varargs for multiple paths in DataFrameReader. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11821] Propagate Kerberos keytab for al...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9859#issuecomment-159412787 **[Test build #46625 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/46625/consoleFull)** for PR 9859 at commit [`7802c0c`](https://github.com/apache/spark/commit/7802c0ca3cf4fb6d19180dbda48291d5a9f50bf5). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11935][PySpark]Send the Python exceptio...
Github user tdas commented on a diff in the pull request: https://github.com/apache/spark/pull/9922#discussion_r45798494 --- Diff: streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala --- @@ -48,6 +59,16 @@ private[python] trait PythonTransformFunction { private[python] trait PythonTransformFunctionSerializer { def dumps(id: String): Array[Byte] def loads(bytes: Array[Byte]): PythonTransformFunction + + /** + * After invoking `dumps` or `loads`, the user should use `getLastFailure` to check if there is + * any failure in Python. If so, the user should handle properly, e.g., throw an exception with --- End diff -- nit: Very verbose. Just simply say "Get the failure, if any, in the last call to `dumps` or `loads`. Returns the failure message if there was a failure, or `null` if there was not failure." --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11401] [MLLIB] PMML export for Logistic...
Github user dbtsai commented on the pull request: https://github.com/apache/spark/pull/9397#issuecomment-159416401 Can you rebase the master? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org