spark git commit: [SPARK-7887][SQL] Remove EvaluatedType from SQL Expression.
Repository: spark Updated Branches: refs/heads/master 4f98d7a7f - 9f48bf6b3 [SPARK-7887][SQL] Remove EvaluatedType from SQL Expression. This type is not really used. Might as well remove it. Author: Reynold Xin r...@databricks.com Closes #6427 from rxin/evalutedType and squashes the following commits: 51a319a [Reynold Xin] [SPARK-7887][SQL] Remove EvaluatedType from SQL Expression. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f48bf6b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f48bf6b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f48bf6b Branch: refs/heads/master Commit: 9f48bf6b3761d66c7dc50f076ed92aff21b7eea0 Parents: 4f98d7a Author: Reynold Xin r...@databricks.com Authored: Wed May 27 01:12:59 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 01:12:59 2015 -0700 -- .../spark/sql/catalyst/analysis/unresolved.scala| 10 +- .../sql/catalyst/expressions/BoundAttribute.scala | 2 -- .../spark/sql/catalyst/expressions/Cast.scala | 2 -- .../spark/sql/catalyst/expressions/Expression.scala | 8 ++-- .../sql/catalyst/expressions/ExtractValue.scala | 2 -- .../spark/sql/catalyst/expressions/ScalaUdf.scala | 2 -- .../spark/sql/catalyst/expressions/SortOrder.scala | 2 +- .../spark/sql/catalyst/expressions/aggregates.scala | 4 +--- .../spark/sql/catalyst/expressions/arithmetic.scala | 16 .../sql/catalyst/expressions/complexTypes.scala | 6 ++ .../sql/catalyst/expressions/decimalFunctions.scala | 2 -- .../spark/sql/catalyst/expressions/generators.scala | 2 -- .../spark/sql/catalyst/expressions/literals.scala | 2 -- .../sql/catalyst/expressions/mathfuncs/binary.scala | 3 ++- .../sql/catalyst/expressions/mathfuncs/unary.scala | 1 - .../sql/catalyst/expressions/namedExpressions.scala | 6 ++ .../sql/catalyst/expressions/nullFunctions.scala| 1 - .../spark/sql/catalyst/expressions/predicates.scala | 6 -- .../spark/sql/catalyst/expressions/random.scala | 2 -- .../spark/sql/catalyst/expressions/sets.scala | 4 .../sql/catalyst/expressions/stringOperations.scala | 8 .../catalyst/expressions/windowExpressions.scala| 12 +--- .../sql/catalyst/plans/physical/partitioning.scala | 4 ++-- .../spark/sql/catalyst/trees/TreeNodeSuite.scala| 9 - .../expressions/MonotonicallyIncreasingID.scala | 2 -- .../execution/expressions/SparkPartitionID.scala| 2 -- .../org/apache/spark/sql/execution/pythonUdfs.scala | 2 +- .../scala/org/apache/spark/sql/hive/hiveUdfs.scala | 5 + 28 files changed, 32 insertions(+), 95 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9f48bf6b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 2999c2e..bbb150c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -67,7 +67,7 @@ case class UnresolvedAttribute(nameParts: Seq[String]) override def withName(newName: String): UnresolvedAttribute = UnresolvedAttribute.quoted(newName) // Unresolved attributes are transient at compile time and don't get evaluated during execution. - override def eval(input: Row = null): EvaluatedType = + override def eval(input: Row = null): Any = throw new TreeNodeException(this, sNo function to evaluate expression. type: ${this.nodeName}) override def toString: String = s'$name @@ -85,7 +85,7 @@ case class UnresolvedFunction(name: String, children: Seq[Expression]) extends E override lazy val resolved = false // Unresolved functions are transient at compile time and don't get evaluated during execution. - override def eval(input: Row = null): EvaluatedType = + override def eval(input: Row = null): Any = throw new TreeNodeException(this, sNo function to evaluate expression. type: ${this.nodeName}) override def toString: String = s'$name(${children.mkString(,)}) @@ -107,7 +107,7 @@ trait Star extends NamedExpression with trees.LeafNode[Expression] { override lazy val resolved = false // Star gets expanded at runtime so we never evaluate a Star. - override def eval(input: Row = null): EvaluatedType = + override def eval(input: Row = null): Any = throw new TreeNodeException(this, sNo function to evaluate expression. type: ${this.nodeName}) def expand(input: Seq[Attribute], resolver:
spark git commit: [SQL] Rename MathematicalExpression UnaryMathExpression, and specify BinaryMathExpression's output data type as DoubleType.
Repository: spark Updated Branches: refs/heads/master 9f48bf6b3 - 3e7d7d6b3 [SQL] Rename MathematicalExpression UnaryMathExpression, and specify BinaryMathExpression's output data type as DoubleType. Two minor changes. cc brkyvz Author: Reynold Xin r...@databricks.com Closes #6428 from rxin/math-func-cleanup and squashes the following commits: 5910df5 [Reynold Xin] [SQL] Rename MathematicalExpression UnaryMathExpression, and specify BinaryMathExpression's output data type as DoubleType. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e7d7d6b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e7d7d6b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e7d7d6b Branch: refs/heads/master Commit: 3e7d7d6b3d6e07b52b1a138f7aa2ef628597fe05 Parents: 9f48bf6 Author: Reynold Xin r...@databricks.com Authored: Wed May 27 01:13:57 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 01:13:57 2015 -0700 -- .../catalyst/expressions/mathfuncs/binary.scala | 9 +--- .../catalyst/expressions/mathfuncs/unary.scala | 46 ++-- 2 files changed, 23 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e7d7d6b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala index d5be446..890efc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions.mathfuncs -import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, BinaryExpression, Expression, Row} import org.apache.spark.sql.types._ @@ -41,13 +40,7 @@ abstract class BinaryMathExpression(f: (Double, Double) = Double, name: String) left.dataType == right.dataType !DecimalType.isFixed(left.dataType) - override def dataType: DataType = { -if (!resolved) { - throw new UnresolvedException(this, -sdatatype. Can not resolve due to differing types ${left.dataType}, ${right.dataType}) -} -left.dataType - } + override def dataType: DataType = DoubleType override def eval(input: Row): Any = { val evalE1 = left.eval(input) http://git-wip-us.apache.org/repos/asf/spark/blob/3e7d7d6b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala index cdcb8e2..41b4223 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.types._ * input format, therefore these functions extend `ExpectsInputTypes`. * @param name The short name of the function */ -abstract class MathematicalExpression(f: Double = Double, name: String) +abstract class UnaryMathExpression(f: Double = Double, name: String) extends UnaryExpression with Serializable with ExpectsInputTypes { self: Product = @@ -46,46 +46,44 @@ abstract class MathematicalExpression(f: Double = Double, name: String) } } -case class Acos(child: Expression) extends MathematicalExpression(math.acos, ACOS) +case class Acos(child: Expression) extends UnaryMathExpression(math.acos, ACOS) -case class Asin(child: Expression) extends MathematicalExpression(math.asin, ASIN) +case class Asin(child: Expression) extends UnaryMathExpression(math.asin, ASIN) -case class Atan(child: Expression) extends MathematicalExpression(math.atan, ATAN) +case class Atan(child: Expression) extends UnaryMathExpression(math.atan, ATAN) -case class Cbrt(child: Expression) extends MathematicalExpression(math.cbrt, CBRT) +case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, CBRT) -case class Ceil(child: Expression) extends MathematicalExpression(math.ceil, CEIL) +case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, CEIL) -case class Cos(child: Expression) extends MathematicalExpression(math.cos, COS) +case class Cos(child: Expression) extends
spark git commit: [SPARK-7864] [UI] Fix the logic grabbing the link from table in AllJobPage
Repository: spark Updated Branches: refs/heads/branch-1.4 a25ce91f9 - 13044b046 [SPARK-7864] [UI] Fix the logic grabbing the link from table in AllJobPage This issue is related to #6419 . Now AllJobPage doesn't have a kill link but I think fix the issue mentioned in #6419 just in case to avoid accidents in the future. So, it's minor issue for now and I don't file this issue in JIRA. Author: Kousuke Saruta saru...@oss.nttdata.co.jp Closes #6432 from sarutak/remove-ambiguity-of-link and squashes the following commits: cd1a503 [Kousuke Saruta] Fixed ambiguity link issue in AllJobPage (cherry picked from commit 0db76c90ad5f84d7a5640c41de74876b906ddc90) Signed-off-by: Andrew Or and...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/13044b04 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/13044b04 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/13044b04 Branch: refs/heads/branch-1.4 Commit: 13044b0460e866804e6e3f058ebe38c0d005c1ff Parents: a25ce91 Author: Kousuke Saruta saru...@oss.nttdata.co.jp Authored: Wed May 27 11:41:35 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Wed May 27 11:42:10 2015 -0700 -- .../src/main/resources/org/apache/spark/ui/static/timeline-view.js | 2 +- core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/13044b04/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js index 28ac998..ca74ef9 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js +++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js @@ -46,7 +46,7 @@ function drawApplicationTimeline(groupArray, eventObjArray, startTime) { }; $(this).click(function() { -var jobPagePath = $(getSelectorForJobEntry(this)).find(a).attr(href) +var jobPagePath = $(getSelectorForJobEntry(this)).find(a.name-link).attr(href) window.location.href = jobPagePath }); http://git-wip-us.apache.org/repos/asf/spark/blob/13044b04/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index e010ebe..2ce670a 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -231,7 +231,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage() { /td td span class=description-input title={lastStageDescription}{lastStageDescription}/span - a href={detailUrl}{lastStageName}/a + a href={detailUrl} class=name-link{lastStageName}/a /td td sorttable_customkey={job.submissionTime.getOrElse(-1).toString} {formattedSubmissionTime} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7864] [UI] Fix the logic grabbing the link from table in AllJobPage
Repository: spark Updated Branches: refs/heads/master 15459db4f - 0db76c90a [SPARK-7864] [UI] Fix the logic grabbing the link from table in AllJobPage This issue is related to #6419 . Now AllJobPage doesn't have a kill link but I think fix the issue mentioned in #6419 just in case to avoid accidents in the future. So, it's minor issue for now and I don't file this issue in JIRA. Author: Kousuke Saruta saru...@oss.nttdata.co.jp Closes #6432 from sarutak/remove-ambiguity-of-link and squashes the following commits: cd1a503 [Kousuke Saruta] Fixed ambiguity link issue in AllJobPage Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0db76c90 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0db76c90 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0db76c90 Branch: refs/heads/master Commit: 0db76c90ad5f84d7a5640c41de74876b906ddc90 Parents: 15459db Author: Kousuke Saruta saru...@oss.nttdata.co.jp Authored: Wed May 27 11:41:35 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Wed May 27 11:41:37 2015 -0700 -- .../src/main/resources/org/apache/spark/ui/static/timeline-view.js | 2 +- core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0db76c90/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js index 28ac998..ca74ef9 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js +++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js @@ -46,7 +46,7 @@ function drawApplicationTimeline(groupArray, eventObjArray, startTime) { }; $(this).click(function() { -var jobPagePath = $(getSelectorForJobEntry(this)).find(a).attr(href) +var jobPagePath = $(getSelectorForJobEntry(this)).find(a.name-link).attr(href) window.location.href = jobPagePath }); http://git-wip-us.apache.org/repos/asf/spark/blob/0db76c90/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index e010ebe..2ce670a 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -231,7 +231,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage() { /td td span class=description-input title={lastStageDescription}{lastStageDescription}/span - a href={detailUrl}{lastStageName}/a + a href={detailUrl} class=name-link{lastStageName}/a /td td sorttable_customkey={job.submissionTime.getOrElse(-1).toString} {formattedSubmissionTime} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Removed Guava dependency from JavaTypeInference's type signature.
Repository: spark Updated Branches: refs/heads/master 0db76c90a - 6fec1a940 Removed Guava dependency from JavaTypeInference's type signature. This should also close #6243. Author: Reynold Xin r...@databricks.com Closes #6431 from rxin/JavaTypeInference-guava and squashes the following commits: e58df3c [Reynold Xin] Removed Gauva dependency from JavaTypeInference's type signature. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6fec1a94 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6fec1a94 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6fec1a94 Branch: refs/heads/master Commit: 6fec1a9409b34d8ce58ea1c330b52cc7ef3e7e7e Parents: 0db76c9 Author: Reynold Xin r...@databricks.com Authored: Wed May 27 11:54:35 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 11:54:35 2015 -0700 -- .../apache/spark/sql/catalyst/JavaTypeInference.scala| 11 ++- .../src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 +--- 2 files changed, 11 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6fec1a94/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index 625c8d3..9a3f969 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -39,11 +39,20 @@ private [sql] object JavaTypeInference { private val valuesReturnType = classOf[JMap[_, _]].getMethod(values).getGenericReturnType /** + * Infers the corresponding SQL data type of a JavaClean class. + * @param beanClass Java type + * @return (SQL data type, nullable) + */ + def inferDataType(beanClass: Class[_]): (DataType, Boolean) = { +inferDataType(TypeToken.of(beanClass)) + } + + /** * Infers the corresponding SQL data type of a Java type. * @param typeToken Java type * @return (SQL data type, nullable) */ - private [sql] def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = { + private def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = { // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific. typeToken.getRawType match { case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) = http://git-wip-us.apache.org/repos/asf/spark/blob/6fec1a94/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 3935f7b..15c3035 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -27,8 +27,6 @@ import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal -import com.google.common.reflect.TypeToken - import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} @@ -1011,7 +1009,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns a Catalyst Schema for the given java bean class. */ protected def getSchema(beanClass: Class[_]): Seq[AttributeReference] = { -val (dataType, _) = JavaTypeInference.inferDataType(TypeToken.of(beanClass)) +val (dataType, _) = JavaTypeInference.inferDataType(beanClass) dataType.asInstanceOf[StructType].fields.map { f = AttributeReference(f.name, f.dataType, f.nullable)() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Removed Guava dependency from JavaTypeInference's type signature.
Repository: spark Updated Branches: refs/heads/branch-1.4 13044b046 - 0468d57a6 Removed Guava dependency from JavaTypeInference's type signature. This should also close #6243. Author: Reynold Xin r...@databricks.com Closes #6431 from rxin/JavaTypeInference-guava and squashes the following commits: e58df3c [Reynold Xin] Removed Gauva dependency from JavaTypeInference's type signature. (cherry picked from commit 6fec1a9409b34d8ce58ea1c330b52cc7ef3e7e7e) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0468d57a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0468d57a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0468d57a Branch: refs/heads/branch-1.4 Commit: 0468d57a6fe42a7f06ccd4ac1faad59c4dcc4c68 Parents: 13044b0 Author: Reynold Xin r...@databricks.com Authored: Wed May 27 11:54:35 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 11:54:42 2015 -0700 -- .../apache/spark/sql/catalyst/JavaTypeInference.scala| 11 ++- .../src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 +--- 2 files changed, 11 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0468d57a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index 625c8d3..9a3f969 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -39,11 +39,20 @@ private [sql] object JavaTypeInference { private val valuesReturnType = classOf[JMap[_, _]].getMethod(values).getGenericReturnType /** + * Infers the corresponding SQL data type of a JavaClean class. + * @param beanClass Java type + * @return (SQL data type, nullable) + */ + def inferDataType(beanClass: Class[_]): (DataType, Boolean) = { +inferDataType(TypeToken.of(beanClass)) + } + + /** * Infers the corresponding SQL data type of a Java type. * @param typeToken Java type * @return (SQL data type, nullable) */ - private [sql] def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = { + private def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = { // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific. typeToken.getRawType match { case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) = http://git-wip-us.apache.org/repos/asf/spark/blob/0468d57a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 3935f7b..15c3035 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -27,8 +27,6 @@ import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal -import com.google.common.reflect.TypeToken - import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} @@ -1011,7 +1009,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns a Catalyst Schema for the given java bean class. */ protected def getSchema(beanClass: Class[_]): Seq[AttributeReference] = { -val (dataType, _) = JavaTypeInference.inferDataType(TypeToken.of(beanClass)) +val (dataType, _) = JavaTypeInference.inferDataType(beanClass) dataType.asInstanceOf[StructType].fields.map { f = AttributeReference(f.name, f.dataType, f.nullable)() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [CORE] [TEST] HistoryServerSuite failed due to timezone issue
Repository: spark Updated Branches: refs/heads/master 3e7d7d6b3 - 4615081d7 [CORE] [TEST] HistoryServerSuite failed due to timezone issue follow up for #6377 Change time to the equivalent in GMT /cc squito Author: scwf wangf...@huawei.com Closes #6425 from scwf/fix-HistoryServerSuite and squashes the following commits: 4d37935 [scwf] fix HistoryServerSuite Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4615081d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4615081d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4615081d Branch: refs/heads/master Commit: 4615081d7a10b023491e25478d19b8161e030974 Parents: 3e7d7d6 Author: scwf wangf...@huawei.com Authored: Wed May 27 09:12:18 2015 -0500 Committer: Imran Rashid iras...@cloudera.com Committed: Wed May 27 09:12:18 2015 -0500 -- .../scala/org/apache/spark/deploy/history/HistoryServerSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4615081d/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 4adb512..e10dd4c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -82,7 +82,7 @@ class HistoryServerSuite extends FunSuite with BeforeAndAfter with Matchers with running app list json - applications?status=running, minDate app list json - applications?minDate=2015-02-10, maxDate app list json - applications?maxDate=2015-02-10, -maxDate2 app list json - applications?maxDate=2015-02-03T10:42:40.000CST, +maxDate2 app list json - applications?maxDate=2015-02-03T16:42:40.000GMT, one app json - applications/local-1422981780767, one app multi-attempt json - applications/local-1426533911241, job list json - applications/local-1422981780767/jobs, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [CORE] [TEST] HistoryServerSuite failed due to timezone issue
Repository: spark Updated Branches: refs/heads/branch-1.4 e5357132b - 90525c9ba [CORE] [TEST] HistoryServerSuite failed due to timezone issue follow up for #6377 Change time to the equivalent in GMT /cc squito Author: scwf wangf...@huawei.com Closes #6425 from scwf/fix-HistoryServerSuite and squashes the following commits: 4d37935 [scwf] fix HistoryServerSuite (cherry picked from commit 4615081d7a10b023491e25478d19b8161e030974) Signed-off-by: Imran Rashid iras...@cloudera.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90525c9b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90525c9b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90525c9b Branch: refs/heads/branch-1.4 Commit: 90525c9ba1ca8528567ea30e611511251d55f685 Parents: e535713 Author: scwf wangf...@huawei.com Authored: Wed May 27 09:12:18 2015 -0500 Committer: Imran Rashid iras...@cloudera.com Committed: Wed May 27 09:12:31 2015 -0500 -- .../scala/org/apache/spark/deploy/history/HistoryServerSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/90525c9b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 4adb512..e10dd4c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -82,7 +82,7 @@ class HistoryServerSuite extends FunSuite with BeforeAndAfter with Matchers with running app list json - applications?status=running, minDate app list json - applications?minDate=2015-02-10, maxDate app list json - applications?maxDate=2015-02-10, -maxDate2 app list json - applications?maxDate=2015-02-03T10:42:40.000CST, +maxDate2 app list json - applications?maxDate=2015-02-03T16:42:40.000GMT, one app json - applications/local-1422981780767, one app multi-attempt json - applications/local-1426533911241, job list json - applications/local-1422981780767/jobs, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7878] Rename Stage.jobId to firstJobId
Repository: spark Updated Branches: refs/heads/master 4615081d7 - ff0ddff46 [SPARK-7878] Rename Stage.jobId to firstJobId The previous name was confusing, because each stage can be associated with many jobs, and jobId is just the ID of the first job that was associated with the Stage. This commit also renames some of the method parameters in DAGScheduler.scala to clarify when the jobId refers to the first job ID associated with the stage (as opposed to the jobId associated with a job that's currently being scheduled). cc markhamstra JoshRosen (hopefully this will help prevent future bugs like SPARK-6880) Author: Kay Ousterhout kayousterh...@gmail.com Closes #6418 from kayousterhout/SPARK-7878 and squashes the following commits: b71a9b8 [Kay Ousterhout] [SPARK-7878] Rename Stage.jobId to firstJobId Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff0ddff4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff0ddff4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff0ddff4 Branch: refs/heads/master Commit: ff0ddff46935ae3d036b7dbc437fff8a6c19d6a4 Parents: 4615081 Author: Kay Ousterhout kayousterh...@gmail.com Authored: Wed May 27 09:32:29 2015 -0700 Committer: Kay Ousterhout kayousterh...@gmail.com Committed: Wed May 27 09:32:29 2015 -0700 -- .../apache/spark/scheduler/DAGScheduler.scala | 58 +--- .../apache/spark/scheduler/ResultStage.scala| 4 +- .../spark/scheduler/ShuffleMapStage.scala | 4 +- .../org/apache/spark/scheduler/Stage.scala | 4 +- 4 files changed, 33 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ff0ddff4/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 5d81291..a083be2 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -208,19 +208,17 @@ class DAGScheduler( /** * Get or create a shuffle map stage for the given shuffle dependency's map side. - * The jobId value passed in will be used if the stage doesn't already exist with - * a lower jobId (jobId always increases across jobs.) */ private def getShuffleMapStage( shuffleDep: ShuffleDependency[_, _, _], - jobId: Int): ShuffleMapStage = { + firstJobId: Int): ShuffleMapStage = { shuffleToMapStage.get(shuffleDep.shuffleId) match { case Some(stage) = stage case None = // We are going to register ancestor shuffle dependencies -registerShuffleDependencies(shuffleDep, jobId) +registerShuffleDependencies(shuffleDep, firstJobId) // Then register current shuffleDep -val stage = newOrUsedShuffleStage(shuffleDep, jobId) +val stage = newOrUsedShuffleStage(shuffleDep, firstJobId) shuffleToMapStage(shuffleDep.shuffleId) = stage stage @@ -230,15 +228,15 @@ class DAGScheduler( /** * Helper function to eliminate some code re-use when creating new stages. */ - private def getParentStagesAndId(rdd: RDD[_], jobId: Int): (List[Stage], Int) = { -val parentStages = getParentStages(rdd, jobId) + private def getParentStagesAndId(rdd: RDD[_], firstJobId: Int): (List[Stage], Int) = { +val parentStages = getParentStages(rdd, firstJobId) val id = nextStageId.getAndIncrement() (parentStages, id) } /** * Create a ShuffleMapStage as part of the (re)-creation of a shuffle map stage in - * newOrUsedShuffleStage. The stage will be associated with the provided jobId. + * newOrUsedShuffleStage. The stage will be associated with the provided firstJobId. * Production of shuffle map stages should always use newOrUsedShuffleStage, not * newShuffleMapStage directly. */ @@ -246,21 +244,19 @@ class DAGScheduler( rdd: RDD[_], numTasks: Int, shuffleDep: ShuffleDependency[_, _, _], - jobId: Int, + firstJobId: Int, callSite: CallSite): ShuffleMapStage = { -val (parentStages: List[Stage], id: Int) = getParentStagesAndId(rdd, jobId) +val (parentStages: List[Stage], id: Int) = getParentStagesAndId(rdd, firstJobId) val stage: ShuffleMapStage = new ShuffleMapStage(id, rdd, numTasks, parentStages, - jobId, callSite, shuffleDep) + firstJobId, callSite, shuffleDep) stageIdToStage(id) = stage -updateJobIdStageIdMaps(jobId, stage) +updateJobIdStageIdMaps(firstJobId, stage) stage } /** - * Create a ResultStage -- either directly for use as a result stage, or as part of the -
spark git commit: [SPARK-7847] [SQL] Fixes dynamic partition directory escaping
Repository: spark Updated Branches: refs/heads/branch-1.4 90525c9ba - a25ce91f9 [SPARK-7847] [SQL] Fixes dynamic partition directory escaping Please refer to [SPARK-7847] [1] for details. [1]: https://issues.apache.org/jira/browse/SPARK-7847 Author: Cheng Lian l...@databricks.com Closes #6389 from liancheng/spark-7847 and squashes the following commits: 935c652 [Cheng Lian] Adds test case for writing various data types as dynamic partition value f4fc398 [Cheng Lian] Converts partition columns to Scala type when writing dynamic partitions d0aeca0 [Cheng Lian] Fixes dynamic partition directory escaping (cherry picked from commit 15459db4f6867e95076cf53fade2fca833c4cf4e) Signed-off-by: Yin Huai yh...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a25ce91f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a25ce91f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a25ce91f Branch: refs/heads/branch-1.4 Commit: a25ce91f9685604cfb567a6860182ba467ceed8d Parents: 90525c9 Author: Cheng Lian l...@databricks.com Authored: Wed May 27 10:09:12 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 10:09:20 2015 -0700 -- .../apache/spark/sql/parquet/newParquet.scala | 22 -- .../spark/sql/sources/PartitioningUtils.scala | 76 +++- .../org/apache/spark/sql/sources/commands.scala | 57 ++- .../ParquetPartitionDiscoverySuite.scala| 57 ++- 4 files changed, 152 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a25ce91f/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala index cb1e608..8b3e1b2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.parquet +import java.net.URI import java.util.{List = JList} import scala.collection.JavaConversions._ @@ -282,21 +283,28 @@ private[sql] class ParquetRelation2( val cacheMetadata = useMetadataCache @transient val cachedStatuses = inputFiles.map { f = - // In order to encode the authority of a Path containing special characters such as /, - // we need to use the string returned by the URI of the path to create a new Path. - val pathWithAuthority = new Path(f.getPath.toUri.toString) - + // In order to encode the authority of a Path containing special characters such as '/' + // (which does happen in some S3N credentials), we need to use the string returned by the + // URI of the path to create a new Path. + val pathWithEscapedAuthority = escapePathUserInfo(f.getPath) new FileStatus( f.getLen, f.isDir, f.getReplication, f.getBlockSize, f.getModificationTime, -f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithAuthority) +f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithEscapedAuthority) }.toSeq @transient val cachedFooters = footers.map { f = // In order to encode the authority of a Path containing special characters such as /, // we need to use the string returned by the URI of the path to create a new Path. - new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata) + new Footer(escapePathUserInfo(f.getFile), f.getParquetMetadata) }.toSeq +private def escapePathUserInfo(path: Path): Path = { + val uri = path.toUri + new Path(new URI( +uri.getScheme, uri.getRawUserInfo, uri.getHost, uri.getPort, uri.getPath, +uri.getQuery, uri.getFragment)) +} + // Overridden so we can inject our own cached files statuses. override def getPartitions: Array[SparkPartition] = { val inputFormat = if (cacheMetadata) { @@ -377,7 +385,7 @@ private[sql] class ParquetRelation2( .orElse(readSchema()) .orElse(maybeMetastoreSchema) .getOrElse(sys.error(Failed to get the schema.)) - + // If this Parquet relation is converted from a Hive Metastore table, must reconcile case // case insensitivity issue and possible schema mismatch (probably caused by schema // evolution). http://git-wip-us.apache.org/repos/asf/spark/blob/a25ce91f/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
spark git commit: [SPARK-7847] [SQL] Fixes dynamic partition directory escaping
Repository: spark Updated Branches: refs/heads/master ff0ddff46 - 15459db4f [SPARK-7847] [SQL] Fixes dynamic partition directory escaping Please refer to [SPARK-7847] [1] for details. [1]: https://issues.apache.org/jira/browse/SPARK-7847 Author: Cheng Lian l...@databricks.com Closes #6389 from liancheng/spark-7847 and squashes the following commits: 935c652 [Cheng Lian] Adds test case for writing various data types as dynamic partition value f4fc398 [Cheng Lian] Converts partition columns to Scala type when writing dynamic partitions d0aeca0 [Cheng Lian] Fixes dynamic partition directory escaping Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/15459db4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/15459db4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/15459db4 Branch: refs/heads/master Commit: 15459db4f6867e95076cf53fade2fca833c4cf4e Parents: ff0ddff Author: Cheng Lian l...@databricks.com Authored: Wed May 27 10:09:12 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 10:09:12 2015 -0700 -- .../apache/spark/sql/parquet/newParquet.scala | 22 -- .../spark/sql/sources/PartitioningUtils.scala | 76 +++- .../org/apache/spark/sql/sources/commands.scala | 57 ++- .../ParquetPartitionDiscoverySuite.scala| 57 ++- 4 files changed, 152 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/15459db4/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala index cb1e608..8b3e1b2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.parquet +import java.net.URI import java.util.{List = JList} import scala.collection.JavaConversions._ @@ -282,21 +283,28 @@ private[sql] class ParquetRelation2( val cacheMetadata = useMetadataCache @transient val cachedStatuses = inputFiles.map { f = - // In order to encode the authority of a Path containing special characters such as /, - // we need to use the string returned by the URI of the path to create a new Path. - val pathWithAuthority = new Path(f.getPath.toUri.toString) - + // In order to encode the authority of a Path containing special characters such as '/' + // (which does happen in some S3N credentials), we need to use the string returned by the + // URI of the path to create a new Path. + val pathWithEscapedAuthority = escapePathUserInfo(f.getPath) new FileStatus( f.getLen, f.isDir, f.getReplication, f.getBlockSize, f.getModificationTime, -f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithAuthority) +f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithEscapedAuthority) }.toSeq @transient val cachedFooters = footers.map { f = // In order to encode the authority of a Path containing special characters such as /, // we need to use the string returned by the URI of the path to create a new Path. - new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata) + new Footer(escapePathUserInfo(f.getFile), f.getParquetMetadata) }.toSeq +private def escapePathUserInfo(path: Path): Path = { + val uri = path.toUri + new Path(new URI( +uri.getScheme, uri.getRawUserInfo, uri.getHost, uri.getPort, uri.getPath, +uri.getQuery, uri.getFragment)) +} + // Overridden so we can inject our own cached files statuses. override def getPartitions: Array[SparkPartition] = { val inputFormat = if (cacheMetadata) { @@ -377,7 +385,7 @@ private[sql] class ParquetRelation2( .orElse(readSchema()) .orElse(maybeMetastoreSchema) .getOrElse(sys.error(Failed to get the schema.)) - + // If this Parquet relation is converted from a Hive Metastore table, must reconcile case // case insensitivity issue and possible schema mismatch (probably caused by schema // evolution). http://git-wip-us.apache.org/repos/asf/spark/blob/15459db4/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala -- diff --git
spark git commit: [SPARK-7790] [SQL] date and decimal conversion for dynamic partition key
Repository: spark Updated Branches: refs/heads/master 6fec1a940 - 8161562ea [SPARK-7790] [SQL] date and decimal conversion for dynamic partition key Author: Daoyuan Wang daoyuan.w...@intel.com Closes #6318 from adrian-wang/dynpart and squashes the following commits: ad73b61 [Daoyuan Wang] not use sqlTestUtils for try catch because dont have sqlcontext here 6c33b51 [Daoyuan Wang] fix according to liancheng f0f8074 [Daoyuan Wang] some specific types as dynamic partition Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8161562e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8161562e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8161562e Branch: refs/heads/master Commit: 8161562eabc1eff430cfd9d8eaf413a8c4ef2cfb Parents: 6fec1a9 Author: Daoyuan Wang daoyuan.w...@intel.com Authored: Wed May 27 12:42:13 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 12:42:13 2015 -0700 -- .../hive/execution/InsertIntoHiveTable.scala| 2 +- .../spark/sql/hive/hiveWriterContainers.scala | 17 -- .../sql/hive/execution/SQLQuerySuite.scala | 33 3 files changed, 48 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8161562e/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index c0b0b10..7a6ca48 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -106,7 +106,7 @@ case class InsertIntoHiveTable( } writerContainer - .getLocalFileWriter(row) + .getLocalFileWriter(row, table.schema) .write(serializer.serialize(outputData, standardOI)) } http://git-wip-us.apache.org/repos/asf/spark/blob/8161562e/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala index cbc381c..50b209f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala @@ -34,8 +34,10 @@ import org.apache.hadoop.hive.common.FileUtils import org.apache.spark.mapred.SparkHadoopMapRedUtil import org.apache.spark.sql.Row import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter} +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.hive.{ShimFileSinkDesc = FileSinkDesc} import org.apache.spark.sql.hive.HiveShim._ +import org.apache.spark.sql.types._ /** * Internal helper class that saves an RDD using a Hive OutputFormat. @@ -92,7 +94,7 @@ private[hive] class SparkHiveWriterContainer( part- + numberFormat.format(splitID) + extension } - def getLocalFileWriter(row: Row): FileSinkOperator.RecordWriter = writer + def getLocalFileWriter(row: Row, schema: StructType): FileSinkOperator.RecordWriter = writer def close() { // Seems the boolean value passed into close does not matter. @@ -195,11 +197,20 @@ private[spark] class SparkHiveDynamicPartitionWriterContainer( jobConf.setBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, oldMarker) } - override def getLocalFileWriter(row: Row): FileSinkOperator.RecordWriter = { + override def getLocalFileWriter(row: Row, schema: StructType): FileSinkOperator.RecordWriter = { +def convertToHiveRawString(col: String, value: Any): String = { + val raw = String.valueOf(value) + schema(col).dataType match { +case DateType = DateUtils.toString(raw.toInt) +case _: DecimalType = BigDecimal(raw).toString() +case _ = raw + } +} + val dynamicPartPath = dynamicPartColNames .zip(row.toSeq.takeRight(dynamicPartColNames.length)) .map { case (col, rawVal) = -val string = if (rawVal == null) null else String.valueOf(rawVal) +val string = if (rawVal == null) null else convertToHiveRawString(col, rawVal) val colString = if (string == null || string.isEmpty) { defaultPartName http://git-wip-us.apache.org/repos/asf/spark/blob/8161562e/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
spark git commit: [SPARK-7684] [SQL] Refactoring MetastoreDataSourcesSuite to workaround SPARK-7684
Repository: spark Updated Branches: refs/heads/branch-1.4 d33142fd8 - 89fe93fc3 [SPARK-7684] [SQL] Refactoring MetastoreDataSourcesSuite to workaround SPARK-7684 As stated in SPARK-7684, currently `TestHive.reset` has some execution order specific bug, which makes running specific test suites locally pretty frustrating. This PR refactors `MetastoreDataSourcesSuite` (which relies on `TestHive.reset` heavily) using various `withXxx` utility methods in `SQLTestUtils` to ask each test case to cleanup their own mess so that we can avoid calling `TestHive.reset`. Author: Cheng Lian l...@databricks.com Author: Yin Huai yh...@databricks.com Closes #6353 from liancheng/workaround-spark-7684 and squashes the following commits: 26939aa [Yin Huai] Move the initialization of jsonFilePath to beforeAll. a423d48 [Cheng Lian] Fixes Scala style issue dfe45d0 [Cheng Lian] Refactors MetastoreDataSourcesSuite to workaround SPARK-7684 92a116d [Cheng Lian] Fixes minor styling issues (cherry picked from commit b97ddff000b99adca3dd8fe13d01054fd5014fa0) Signed-off-by: Yin Huai yh...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/89fe93fc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/89fe93fc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/89fe93fc Branch: refs/heads/branch-1.4 Commit: 89fe93fc3b93009f1741b59dda6a4a9005128d1e Parents: d33142f Author: Cheng Lian l...@databricks.com Authored: Wed May 27 13:09:33 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 13:09:42 2015 -0700 -- .../scala/org/apache/spark/sql/QueryTest.scala |4 + .../apache/spark/sql/test/SQLTestUtils.scala| 12 +- .../sql/hive/MetastoreDataSourcesSuite.scala| 1372 +- 3 files changed, 722 insertions(+), 666 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/89fe93fc/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index bbf9ab1..98ba3c9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -67,6 +67,10 @@ class QueryTest extends PlanTest { checkAnswer(df, Seq(expectedAnswer)) } + protected def checkAnswer(df: DataFrame, expectedAnswer: DataFrame): Unit = { +checkAnswer(df, expectedAnswer.collect()) + } + def sqlTest(sqlString: String, expectedAnswer: Seq[Row])(implicit sqlContext: SQLContext) { test(sqlString) { checkAnswer(sqlContext.sql(sqlString), expectedAnswer) http://git-wip-us.apache.org/repos/asf/spark/blob/89fe93fc/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala index ca66cdc..17a8b0c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -75,14 +75,18 @@ trait SQLTestUtils { /** * Drops temporary table `tableName` after calling `f`. */ - protected def withTempTable(tableName: String)(f: = Unit): Unit = { -try f finally sqlContext.dropTempTable(tableName) + protected def withTempTable(tableNames: String*)(f: = Unit): Unit = { +try f finally tableNames.foreach(sqlContext.dropTempTable) } /** * Drops table `tableName` after calling `f`. */ - protected def withTable(tableName: String)(f: = Unit): Unit = { -try f finally sqlContext.sql(sDROP TABLE IF EXISTS $tableName) + protected def withTable(tableNames: String*)(f: = Unit): Unit = { +try f finally { + tableNames.foreach { name = +sqlContext.sql(sDROP TABLE IF EXISTS $name) + } +} } } http://git-wip-us.apache.org/repos/asf/spark/blob/89fe93fc/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 9623ef0..58e2d1f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -21,770 +21,818 @@ import java.io.File import scala.collection.mutable.ArrayBuffer +import org.scalatest.BeforeAndAfterAll + import
spark git commit: [SPARK-7853] [SQL] Fixes a class loader issue in Spark SQL
Repository: spark Updated Branches: refs/heads/branch-1.4 89fe93fc3 - e07b71560 [SPARK-7853] [SQL] Fixes a class loader issue in Spark SQL This PR is based on PR #6396 authored by chenghao-intel. Essentially, Spark SQL should use context classloader to load SerDe classes. yhuai helped updating the test case, and I fixed a bug in the original `CliSuite`: while testing the CLI tool with `runCliWithin`, we don't append `\n` to the last query, thus the last query is never executed. Original PR description is pasted below. ``` bin/spark-sql --jars ./sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar CREATE TABLE t1(a string, b string) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'; ``` Throws exception like ``` 15/05/26 00:16:33 ERROR SparkSQLDriver: Failed in [CREATE TABLE t1(a string, b string) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'] org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:333) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:310) at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:139) at org.apache.spark.sql.hive.client.ClientWrapper.runHive(ClientWrapper.scala:310) at org.apache.spark.sql.hive.client.ClientWrapper.runSqlHive(ClientWrapper.scala:300) at org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:457) at org.apache.spark.sql.hive.execution.HiveNativeCommand.run(HiveNativeCommand.scala:33) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:922) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:922) at org.apache.spark.sql.DataFrame.init(DataFrame.scala:147) at org.apache.spark.sql.DataFrame.init(DataFrame.scala:131) at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:727) at org.apache.spark.sql.hive.thriftserver.AbstractSparkSQLDriver.run(AbstractSparkSQLDriver.scala:57) ``` Author: Cheng Hao hao.ch...@intel.com Author: Cheng Lian l...@databricks.com Author: Yin Huai yh...@databricks.com Closes #6435 from liancheng/classLoader and squashes the following commits: d4c4845 [Cheng Lian] Fixes CliSuite 75e80e2 [Yin Huai] Update the fix. fd26533 [Cheng Hao] scalastyle dd78775 [Cheng Hao] workaround for classloader of IsolatedClientLoader (cherry picked from commit db3fd054f240c7e38aba0732e471df65cd14011a) Signed-off-by: Yin Huai yh...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e07b7156 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e07b7156 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e07b7156 Branch: refs/heads/branch-1.4 Commit: e07b71560cb791c701ad28adff02f5db6b490136 Parents: 89fe93f Author: Cheng Hao hao.ch...@intel.com Authored: Wed May 27 14:21:00 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 14:31:18 2015 -0700 -- .../spark/sql/hive/thriftserver/CliSuite.scala | 41 ++-- .../org/apache/spark/sql/hive/HiveContext.scala | 18 +++-- .../org/apache/spark/sql/hive/TableReader.scala | 2 +- 3 files changed, 53 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e07b7156/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala -- diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index b070fa8..cc07db8 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++
spark git commit: [SPARK-7853] [SQL] Fixes a class loader issue in Spark SQL
Repository: spark Updated Branches: refs/heads/master b97ddff00 - db3fd054f [SPARK-7853] [SQL] Fixes a class loader issue in Spark SQL This PR is based on PR #6396 authored by chenghao-intel. Essentially, Spark SQL should use context classloader to load SerDe classes. yhuai helped updating the test case, and I fixed a bug in the original `CliSuite`: while testing the CLI tool with `runCliWithin`, we don't append `\n` to the last query, thus the last query is never executed. Original PR description is pasted below. ``` bin/spark-sql --jars ./sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar CREATE TABLE t1(a string, b string) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'; ``` Throws exception like ``` 15/05/26 00:16:33 ERROR SparkSQLDriver: Failed in [CREATE TABLE t1(a string, b string) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'] org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:333) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:310) at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:139) at org.apache.spark.sql.hive.client.ClientWrapper.runHive(ClientWrapper.scala:310) at org.apache.spark.sql.hive.client.ClientWrapper.runSqlHive(ClientWrapper.scala:300) at org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:457) at org.apache.spark.sql.hive.execution.HiveNativeCommand.run(HiveNativeCommand.scala:33) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:922) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:922) at org.apache.spark.sql.DataFrame.init(DataFrame.scala:147) at org.apache.spark.sql.DataFrame.init(DataFrame.scala:131) at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:727) at org.apache.spark.sql.hive.thriftserver.AbstractSparkSQLDriver.run(AbstractSparkSQLDriver.scala:57) ``` Author: Cheng Hao hao.ch...@intel.com Author: Cheng Lian l...@databricks.com Author: Yin Huai yh...@databricks.com Closes #6435 from liancheng/classLoader and squashes the following commits: d4c4845 [Cheng Lian] Fixes CliSuite 75e80e2 [Yin Huai] Update the fix. fd26533 [Cheng Hao] scalastyle dd78775 [Cheng Hao] workaround for classloader of IsolatedClientLoader Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/db3fd054 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/db3fd054 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/db3fd054 Branch: refs/heads/master Commit: db3fd054f240c7e38aba0732e471df65cd14011a Parents: b97ddff Author: Cheng Hao hao.ch...@intel.com Authored: Wed May 27 14:21:00 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 14:21:00 2015 -0700 -- .../spark/sql/hive/thriftserver/CliSuite.scala | 41 ++-- .../org/apache/spark/sql/hive/HiveContext.scala | 18 +++-- .../org/apache/spark/sql/hive/TableReader.scala | 2 +- 3 files changed, 53 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/db3fd054/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala -- diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index b070fa8..cc07db8 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -25,11 +25,15 @@ import scala.concurrent.{Await,
spark git commit: [SPARK-7535] [.1] [MLLIB] minor changes to the pipeline API
Repository: spark Updated Branches: refs/heads/branch-1.4 d0bd68ff8 - 34e233f9c [SPARK-7535] [.1] [MLLIB] minor changes to the pipeline API 1. removed `Params.validateParams(extra)` 2. added `Evaluate.evaluate(dataset, paramPairs*)` 3. updated `RegressionEvaluator` doc jkbradley Author: Xiangrui Meng m...@databricks.com Closes #6392 from mengxr/SPARK-7535.1 and squashes the following commits: 5ff5af8 [Xiangrui Meng] add unit test for CV.validateParams f1f8369 [Xiangrui Meng] update CV.validateParams() to test estimatorParamMaps 607445d [Xiangrui Meng] merge master 8716f5f [Xiangrui Meng] specify default metric name in RegressionEvaluator e4e5631 [Xiangrui Meng] update RegressionEvaluator doc 801e864 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7535.1 fcbd3e2 [Xiangrui Meng] Merge branch 'master' into SPARK-7535.1 2192316 [Xiangrui Meng] remove validateParams(extra); add evaluate(dataset, extra*) (cherry picked from commit a9f1c0c57b9be586dbada09dab91dcfce31141d9) Signed-off-by: Xiangrui Meng m...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34e233f9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34e233f9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34e233f9 Branch: refs/heads/branch-1.4 Commit: 34e233f9ce8d5fa616ce981e0e842b4026fb9824 Parents: d0bd68f Author: Xiangrui Meng m...@databricks.com Authored: Tue May 26 23:51:32 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Tue May 26 23:51:39 2015 -0700 -- .../scala/org/apache/spark/ml/Pipeline.scala| 9 ++-- .../ml/evaluation/RegressionEvaluator.scala | 4 +- .../org/apache/spark/ml/param/params.scala | 13 - .../apache/spark/ml/tuning/CrossValidator.scala | 23 ++--- .../org/apache/spark/ml/param/ParamsSuite.scala | 2 +- .../spark/ml/tuning/CrossValidatorSuite.scala | 52 +++- 6 files changed, 71 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/34e233f9/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 9da3ff6..11a4722 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -97,12 +97,9 @@ class Pipeline(override val uid: String) extends Estimator[PipelineModel] { /** @group getParam */ def getStages: Array[PipelineStage] = $(stages).clone() - override def validateParams(paramMap: ParamMap): Unit = { -val map = extractParamMap(paramMap) -getStages.foreach { - case pStage: Params = pStage.validateParams(map) - case _ = -} + override def validateParams(): Unit = { +super.validateParams() +$(stages).foreach(_.validateParams()) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/34e233f9/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala index 1771177..abb1b35 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala @@ -36,8 +36,8 @@ final class RegressionEvaluator(override val uid: String) def this() = this(Identifiable.randomUID(regEval)) /** - * param for metric name in evaluation - * @group param supports mse, rmse, r2, mae as valid metric names. + * param for metric name in evaluation (supports `rmse` (default), `mse`, `r2`, and `mae`) + * @group param */ val metricName: Param[String] = { val allowedParams = ParamValidators.inArray(Array(mse, rmse, r2, mae)) http://git-wip-us.apache.org/repos/asf/spark/blob/34e233f9/mllib/src/main/scala/org/apache/spark/ml/param/params.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index 1afa59c..473488d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -334,19 +334,6 @@ trait Params extends Identifiable with Serializable { } /** - * Validates parameter values stored internally plus the input parameter map. - * Raises an exception if any parameter is invalid. - * - * This only needs to check for interactions between
spark git commit: [SPARK-7896] Allow ChainedBuffer to store more than 2 GB
Repository: spark Updated Branches: refs/heads/branch-1.4 9da4b6bcb - d83c2ee84 [SPARK-7896] Allow ChainedBuffer to store more than 2 GB Author: Sandy Ryza sa...@cloudera.com Closes #6440 from sryza/sandy-spark-7896 and squashes the following commits: 49d8a0d [Sandy Ryza] Fix bug introduced when reading over record boundaries 6006856 [Sandy Ryza] Fix overflow issues 006b4b2 [Sandy Ryza] Fix scalastyle by removing non ascii characters 8b000ca [Sandy Ryza] Add ascii art to describe layout of data in metaBuffer f2053c0 [Sandy Ryza] Fix negative overflow issue 0368c78 [Sandy Ryza] Initialize size as 0 a5a4820 [Sandy Ryza] Use explicit types for all numbers in ChainedBuffer b7e0213 [Sandy Ryza] SPARK-7896. Allow ChainedBuffer to store more than 2 GB (cherry picked from commit bd11b01ebaf62df8b0d8c0b63b51b66e58f50960) Signed-off-by: Patrick Wendell patr...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d83c2ee8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d83c2ee8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d83c2ee8 Branch: refs/heads/branch-1.4 Commit: d83c2ee84894b554aab0d88bf99ea2902f482176 Parents: 9da4b6b Author: Sandy Ryza sa...@cloudera.com Authored: Wed May 27 22:23:22 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Wed May 27 22:29:10 2015 -0700 -- .../spark/util/collection/ChainedBuffer.scala | 46 +- .../PartitionedSerializedPairBuffer.scala | 51 2 files changed, 55 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d83c2ee8/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala index a60bffe..516aaa4 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala @@ -28,11 +28,13 @@ import scala.collection.mutable.ArrayBuffer * occupy a contiguous segment of memory. */ private[spark] class ChainedBuffer(chunkSize: Int) { - private val chunkSizeLog2 = (math.log(chunkSize) / math.log(2)).toInt - assert(math.pow(2, chunkSizeLog2).toInt == chunkSize, + + private val chunkSizeLog2: Int = java.lang.Long.numberOfTrailingZeros( +java.lang.Long.highestOneBit(chunkSize)) + assert((1 chunkSizeLog2) == chunkSize, sChainedBuffer chunk size $chunkSize must be a power of two) private val chunks: ArrayBuffer[Array[Byte]] = new ArrayBuffer[Array[Byte]]() - private var _size: Int = _ + private var _size: Long = 0 /** * Feed bytes from this buffer into a BlockObjectWriter. @@ -41,16 +43,16 @@ private[spark] class ChainedBuffer(chunkSize: Int) { * @param os OutputStream to read into. * @param len Number of bytes to read. */ - def read(pos: Int, os: OutputStream, len: Int): Unit = { + def read(pos: Long, os: OutputStream, len: Int): Unit = { if (pos + len _size) { throw new IndexOutOfBoundsException( sRead of $len bytes at position $pos would go past size ${_size} of buffer) } -var chunkIndex = pos chunkSizeLog2 -var posInChunk = pos - (chunkIndex chunkSizeLog2) -var written = 0 +var chunkIndex: Int = (pos chunkSizeLog2).toInt +var posInChunk: Int = (pos - (chunkIndex.toLong chunkSizeLog2)).toInt +var written: Int = 0 while (written len) { - val toRead = math.min(len - written, chunkSize - posInChunk) + val toRead: Int = math.min(len - written, chunkSize - posInChunk) os.write(chunks(chunkIndex), posInChunk, toRead) written += toRead chunkIndex += 1 @@ -66,16 +68,16 @@ private[spark] class ChainedBuffer(chunkSize: Int) { * @param offs Offset in the byte array to read to. * @param len Number of bytes to read. */ - def read(pos: Int, bytes: Array[Byte], offs: Int, len: Int): Unit = { + def read(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = { if (pos + len _size) { throw new IndexOutOfBoundsException( sRead of $len bytes at position $pos would go past size of buffer) } -var chunkIndex = pos chunkSizeLog2 -var posInChunk = pos - (chunkIndex chunkSizeLog2) -var written = 0 +var chunkIndex: Int = (pos chunkSizeLog2).toInt +var posInChunk: Int = (pos - (chunkIndex.toLong chunkSizeLog2)).toInt +var written: Int = 0 while (written len) { - val toRead = math.min(len - written, chunkSize - posInChunk) + val toRead: Int = math.min(len - written, chunkSize - posInChunk)
spark git commit: [SPARK-7896] Allow ChainedBuffer to store more than 2 GB
Repository: spark Updated Branches: refs/heads/master 852f4de2d - bd11b01eb [SPARK-7896] Allow ChainedBuffer to store more than 2 GB Author: Sandy Ryza sa...@cloudera.com Closes #6440 from sryza/sandy-spark-7896 and squashes the following commits: 49d8a0d [Sandy Ryza] Fix bug introduced when reading over record boundaries 6006856 [Sandy Ryza] Fix overflow issues 006b4b2 [Sandy Ryza] Fix scalastyle by removing non ascii characters 8b000ca [Sandy Ryza] Add ascii art to describe layout of data in metaBuffer f2053c0 [Sandy Ryza] Fix negative overflow issue 0368c78 [Sandy Ryza] Initialize size as 0 a5a4820 [Sandy Ryza] Use explicit types for all numbers in ChainedBuffer b7e0213 [Sandy Ryza] SPARK-7896. Allow ChainedBuffer to store more than 2 GB Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bd11b01e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bd11b01e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bd11b01e Branch: refs/heads/master Commit: bd11b01ebaf62df8b0d8c0b63b51b66e58f50960 Parents: 852f4de Author: Sandy Ryza sa...@cloudera.com Authored: Wed May 27 22:23:22 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Wed May 27 22:28:43 2015 -0700 -- .../spark/util/collection/ChainedBuffer.scala | 46 +- .../PartitionedSerializedPairBuffer.scala | 51 2 files changed, 55 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bd11b01e/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala index a60bffe..516aaa4 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala @@ -28,11 +28,13 @@ import scala.collection.mutable.ArrayBuffer * occupy a contiguous segment of memory. */ private[spark] class ChainedBuffer(chunkSize: Int) { - private val chunkSizeLog2 = (math.log(chunkSize) / math.log(2)).toInt - assert(math.pow(2, chunkSizeLog2).toInt == chunkSize, + + private val chunkSizeLog2: Int = java.lang.Long.numberOfTrailingZeros( +java.lang.Long.highestOneBit(chunkSize)) + assert((1 chunkSizeLog2) == chunkSize, sChainedBuffer chunk size $chunkSize must be a power of two) private val chunks: ArrayBuffer[Array[Byte]] = new ArrayBuffer[Array[Byte]]() - private var _size: Int = _ + private var _size: Long = 0 /** * Feed bytes from this buffer into a BlockObjectWriter. @@ -41,16 +43,16 @@ private[spark] class ChainedBuffer(chunkSize: Int) { * @param os OutputStream to read into. * @param len Number of bytes to read. */ - def read(pos: Int, os: OutputStream, len: Int): Unit = { + def read(pos: Long, os: OutputStream, len: Int): Unit = { if (pos + len _size) { throw new IndexOutOfBoundsException( sRead of $len bytes at position $pos would go past size ${_size} of buffer) } -var chunkIndex = pos chunkSizeLog2 -var posInChunk = pos - (chunkIndex chunkSizeLog2) -var written = 0 +var chunkIndex: Int = (pos chunkSizeLog2).toInt +var posInChunk: Int = (pos - (chunkIndex.toLong chunkSizeLog2)).toInt +var written: Int = 0 while (written len) { - val toRead = math.min(len - written, chunkSize - posInChunk) + val toRead: Int = math.min(len - written, chunkSize - posInChunk) os.write(chunks(chunkIndex), posInChunk, toRead) written += toRead chunkIndex += 1 @@ -66,16 +68,16 @@ private[spark] class ChainedBuffer(chunkSize: Int) { * @param offs Offset in the byte array to read to. * @param len Number of bytes to read. */ - def read(pos: Int, bytes: Array[Byte], offs: Int, len: Int): Unit = { + def read(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = { if (pos + len _size) { throw new IndexOutOfBoundsException( sRead of $len bytes at position $pos would go past size of buffer) } -var chunkIndex = pos chunkSizeLog2 -var posInChunk = pos - (chunkIndex chunkSizeLog2) -var written = 0 +var chunkIndex: Int = (pos chunkSizeLog2).toInt +var posInChunk: Int = (pos - (chunkIndex.toLong chunkSizeLog2)).toInt +var written: Int = 0 while (written len) { - val toRead = math.min(len - written, chunkSize - posInChunk) + val toRead: Int = math.min(len - written, chunkSize - posInChunk) System.arraycopy(chunks(chunkIndex), posInChunk, bytes, offs + written, toRead) written += toRead chunkIndex += 1 @@ -91,22 +93,22 @@
spark git commit: [SPARK-7873] Allow KryoSerializerInstance to create multiple streams at the same time
Repository: spark Updated Branches: refs/heads/master 3c1f1baaf - 852f4de2d [SPARK-7873] Allow KryoSerializerInstance to create multiple streams at the same time This is a somewhat obscure bug, but I think that it will seriously impact KryoSerializer users who use custom registrators which disabled auto-reset. When auto-reset is disabled, then this breaks things in some of our shuffle paths which actually end up creating multiple OutputStreams from the same shared SerializerInstance (which is unsafe). This was introduced by a patch (SPARK-3386) which enables serializer re-use in some of the shuffle paths, since constructing new serializer instances is actually pretty costly for KryoSerializer. We had already fixed another corner-case (SPARK-7766) bug related to this, but missed this one. I think that the root problem here is that KryoSerializerInstance can be used in a way which is unsafe even within a single thread, e.g. by creating multiple open OutputStreams from the same instance or by interleaving deserialize and deserializeStream calls. I considered a smaller patch which adds assertions to guard against this type of misuse but abandoned that approach after I realized how convoluted the Scaladoc became. This patch fixes this bug by making it legal to create multiple streams from the same KryoSerializerInstance. Internally, KryoSerializerInstance now implements a `borrowKryo()` / `releaseKryo()` API that's backed by a pool of capacity 1. Each call to a KryoSerializerInstance method will borrow the Kryo, do its work, then release the serializer instance back to the pool. If the pool is empty and we need an instance, it will allocate a new Kryo on-demand. This makes it safe for multiple OutputStreams to be opened from the same serializer. If we try to release a Kryo back to the pool but the pool already contains a Kryo, then we'll just discard the new Kryo. I don't think there's a clear benefit to having a larger pool since our usages tend to fall into two cases, a) where we only create a single OutputStream and b) where we create a huge number of OutputStreams with the same lifecycle, then destroy the KryoSerializerInstance (this is what's happening in the bypassMergeSort code pat h that my regression test hits). Author: Josh Rosen joshro...@databricks.com Closes #6415 from JoshRosen/SPARK-7873 and squashes the following commits: 00b402e [Josh Rosen] Initialize eagerly to fix a failing test ba55d20 [Josh Rosen] Add explanatory comments 3f1da96 [Josh Rosen] Guard against duplicate close() ab457ca [Josh Rosen] Sketch a loan/release based solution. 9816e8f [Josh Rosen] Add a failing test showing how deserialize() and deserializeStream() can interfere. 7350886 [Josh Rosen] Add failing regression test for SPARK-7873 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/852f4de2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/852f4de2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/852f4de2 Branch: refs/heads/master Commit: 852f4de2d3d0c5fff2fa66000a7a3088bb3dbe74 Parents: 3c1f1ba Author: Josh Rosen joshro...@databricks.com Authored: Wed May 27 20:19:53 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Wed May 27 20:19:53 2015 -0700 -- .../spark/serializer/KryoSerializer.scala | 129 +++ .../apache/spark/serializer/Serializer.scala| 5 + .../spark/serializer/KryoSerializerSuite.scala | 37 +- 3 files changed, 147 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/852f4de2/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala -- diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala index 2179579..3f90988 100644 --- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -17,8 +17,9 @@ package org.apache.spark.serializer -import java.io.{EOFException, InputStream, OutputStream} +import java.io.{EOFException, IOException, InputStream, OutputStream} import java.nio.ByteBuffer +import javax.annotation.Nullable import scala.reflect.ClassTag @@ -136,21 +137,45 @@ class KryoSerializer(conf: SparkConf) } private[spark] -class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream { - val output = new KryoOutput(outStream) +class KryoSerializationStream( +serInstance: KryoSerializerInstance, +outStream: OutputStream) extends SerializationStream { + + private[this] var output: KryoOutput = new KryoOutput(outStream) + private[this] var kryo:
spark git commit: [SPARK-7873] Allow KryoSerializerInstance to create multiple streams at the same time
Repository: spark Updated Branches: refs/heads/branch-1.4 bd9173c14 - 9da4b6bcb [SPARK-7873] Allow KryoSerializerInstance to create multiple streams at the same time This is a somewhat obscure bug, but I think that it will seriously impact KryoSerializer users who use custom registrators which disabled auto-reset. When auto-reset is disabled, then this breaks things in some of our shuffle paths which actually end up creating multiple OutputStreams from the same shared SerializerInstance (which is unsafe). This was introduced by a patch (SPARK-3386) which enables serializer re-use in some of the shuffle paths, since constructing new serializer instances is actually pretty costly for KryoSerializer. We had already fixed another corner-case (SPARK-7766) bug related to this, but missed this one. I think that the root problem here is that KryoSerializerInstance can be used in a way which is unsafe even within a single thread, e.g. by creating multiple open OutputStreams from the same instance or by interleaving deserialize and deserializeStream calls. I considered a smaller patch which adds assertions to guard against this type of misuse but abandoned that approach after I realized how convoluted the Scaladoc became. This patch fixes this bug by making it legal to create multiple streams from the same KryoSerializerInstance. Internally, KryoSerializerInstance now implements a `borrowKryo()` / `releaseKryo()` API that's backed by a pool of capacity 1. Each call to a KryoSerializerInstance method will borrow the Kryo, do its work, then release the serializer instance back to the pool. If the pool is empty and we need an instance, it will allocate a new Kryo on-demand. This makes it safe for multiple OutputStreams to be opened from the same serializer. If we try to release a Kryo back to the pool but the pool already contains a Kryo, then we'll just discard the new Kryo. I don't think there's a clear benefit to having a larger pool since our usages tend to fall into two cases, a) where we only create a single OutputStream and b) where we create a huge number of OutputStreams with the same lifecycle, then destroy the KryoSerializerInstance (this is what's happening in the bypassMergeSort code pat h that my regression test hits). Author: Josh Rosen joshro...@databricks.com Closes #6415 from JoshRosen/SPARK-7873 and squashes the following commits: 00b402e [Josh Rosen] Initialize eagerly to fix a failing test ba55d20 [Josh Rosen] Add explanatory comments 3f1da96 [Josh Rosen] Guard against duplicate close() ab457ca [Josh Rosen] Sketch a loan/release based solution. 9816e8f [Josh Rosen] Add a failing test showing how deserialize() and deserializeStream() can interfere. 7350886 [Josh Rosen] Add failing regression test for SPARK-7873 (cherry picked from commit 852f4de2d3d0c5fff2fa66000a7a3088bb3dbe74) Signed-off-by: Patrick Wendell patr...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9da4b6bc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9da4b6bc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9da4b6bc Branch: refs/heads/branch-1.4 Commit: 9da4b6bcbb0340fe6f81698451348feb2d87f0ba Parents: bd9173c Author: Josh Rosen joshro...@databricks.com Authored: Wed May 27 20:19:53 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Wed May 27 20:20:01 2015 -0700 -- .../spark/serializer/KryoSerializer.scala | 129 +++ .../apache/spark/serializer/Serializer.scala| 5 + .../spark/serializer/KryoSerializerSuite.scala | 37 +- 3 files changed, 147 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9da4b6bc/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala -- diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala index 2179579..3f90988 100644 --- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -17,8 +17,9 @@ package org.apache.spark.serializer -import java.io.{EOFException, InputStream, OutputStream} +import java.io.{EOFException, IOException, InputStream, OutputStream} import java.nio.ByteBuffer +import javax.annotation.Nullable import scala.reflect.ClassTag @@ -136,21 +137,45 @@ class KryoSerializer(conf: SparkConf) } private[spark] -class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream { - val output = new KryoOutput(outStream) +class KryoSerializationStream( +serInstance: KryoSerializerInstance, +outStream:
spark git commit: [SPARK-7897][SQL] Use DecimalType to represent unsigned bigint in JDBCRDD
Repository: spark Updated Branches: refs/heads/branch-1.4 e07b71560 - b4ecbce65 [SPARK-7897][SQL] Use DecimalType to represent unsigned bigint in JDBCRDD JIRA: https://issues.apache.org/jira/browse/SPARK-7897 Author: Liang-Chi Hsieh vii...@gmail.com Closes #6438 from viirya/jdbc_unsigned_bigint and squashes the following commits: ccb3c3f [Liang-Chi Hsieh] Use DecimalType to represent unsigned bigint. (cherry picked from commit a1e092eae57172909ff2af06d8b461742595734c) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4ecbce6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4ecbce6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4ecbce6 Branch: refs/heads/branch-1.4 Commit: b4ecbce65c9329e2ed549b04752358a903ad983a Parents: e07b715 Author: Liang-Chi Hsieh vii...@gmail.com Authored: Wed May 27 18:51:36 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 18:51:42 2015 -0700 -- sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b4ecbce6/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala index 244bd3e..88f1b02 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala @@ -53,7 +53,7 @@ private[sql] object JDBCRDD extends Logging { signed: Boolean): DataType = { val answer = sqlType match { case java.sql.Types.ARRAY = null - case java.sql.Types.BIGINT= LongType + case java.sql.Types.BIGINT= if (signed) { LongType } else { DecimalType.Unlimited } case java.sql.Types.BINARY= BinaryType case java.sql.Types.BIT = BooleanType // @see JdbcDialect for quirks case java.sql.Types.BLOB = BinaryType - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7907] [SQL] [UI] Rename tab ThriftServer to SQL.
Repository: spark Updated Branches: refs/heads/branch-1.4 b4ecbce65 - bd9173c14 [SPARK-7907] [SQL] [UI] Rename tab ThriftServer to SQL. This PR has three changes: 1. Renaming the table of `ThriftServer` to `SQL`; 2. Renaming the title of the tab from `ThriftServer` to `JDBC/ODBC Server`; and 3. Renaming the title of the session page from `ThriftServer` to `JDBC/ODBC Session`. https://issues.apache.org/jira/browse/SPARK-7907 Author: Yin Huai yh...@databricks.com Closes #6448 from yhuai/JDBCServer and squashes the following commits: eadcc3d [Yin Huai] Update test. 9168005 [Yin Huai] Use SQL as the tab name. 221831e [Yin Huai] Rename ThriftServer to JDBCServer. (cherry picked from commit 3c1f1baaf003d50786d3eee1e288f4bac69096f2) Signed-off-by: Yin Huai yh...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bd9173c1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bd9173c1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bd9173c1 Branch: refs/heads/branch-1.4 Commit: bd9173c14c4a25b6f87797eae348634e7aa7f7ac Parents: b4ecbce Author: Yin Huai yh...@databricks.com Authored: Wed May 27 20:04:29 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 20:04:57 2015 -0700 -- .../apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala | 4 ++-- .../spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala | 2 +- .../apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala | 4 +++- .../org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bd9173c1/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala index 6a2be4a..7c48ff4 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala @@ -47,7 +47,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage( /h4 ++ generateSessionStatsTable() ++ generateSQLStatsTable() -UIUtils.headerSparkPage(ThriftServer, content, parent, Some(5000)) +UIUtils.headerSparkPage(JDBC/ODBC Server, content, parent, Some(5000)) } /** Generate basic stats of the thrift server program */ @@ -143,7 +143,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage( val headerRow = Seq(User, IP, Session ID, Start Time, Finish Time, Duration, Total Execute) def generateDataRow(session: SessionInfo): Seq[Node] = { -val sessionLink = %s/ThriftServer/session?id=%s +val sessionLink = %s/sql/session?id=%s .format(UIUtils.prependBaseUri(parent.basePath), session.sessionId) tr td {session.userName} /td http://git-wip-us.apache.org/repos/asf/spark/blob/bd9173c1/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala index 33ba038..d9d66dc 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala @@ -55,7 +55,7 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab) Total run {sessionStat._2.totalExecution} SQL /h4 ++ generateSQLStatsTable(sessionStat._2.sessionId) -UIUtils.headerSparkPage(ThriftServer, content, parent, Some(5000)) +UIUtils.headerSparkPage(JDBC/ODBC Session, content, parent, Some(5000)) } /** Generate basic stats of the streaming program */ http://git-wip-us.apache.org/repos/asf/spark/blob/bd9173c1/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala
[1/2] spark git commit: Preparing Spark release v1.4.0-rc3
Repository: spark Updated Branches: refs/heads/branch-1.4 d83c2ee84 - 7c342bdd9 Preparing Spark release v1.4.0-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4983dfc8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4983dfc8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4983dfc8 Branch: refs/heads/branch-1.4 Commit: 4983dfc878cc58d182d0e51c8adc3d00c985362a Parents: d83c2ee Author: Patrick Wendell pwend...@gmail.com Authored: Wed May 27 22:36:23 2015 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Wed May 27 22:36:23 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4983dfc8/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 626c857..b8a821d 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/4983dfc8/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 1f3dec9..c1aa32b 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/4983dfc8/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index bfa49d0..8acb923 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/4983dfc8/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 5b04b4f..706a97d 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/4983dfc8/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 1f3e619..e8784eb 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/4983dfc8/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 8df7edb..1794f3e 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.0-rc3 [created] 4983dfc87 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7907] [SQL] [UI] Rename tab ThriftServer to SQL.
Repository: spark Updated Branches: refs/heads/master a1e092eae - 3c1f1baaf [SPARK-7907] [SQL] [UI] Rename tab ThriftServer to SQL. This PR has three changes: 1. Renaming the table of `ThriftServer` to `SQL`; 2. Renaming the title of the tab from `ThriftServer` to `JDBC/ODBC Server`; and 3. Renaming the title of the session page from `ThriftServer` to `JDBC/ODBC Session`. https://issues.apache.org/jira/browse/SPARK-7907 Author: Yin Huai yh...@databricks.com Closes #6448 from yhuai/JDBCServer and squashes the following commits: eadcc3d [Yin Huai] Update test. 9168005 [Yin Huai] Use SQL as the tab name. 221831e [Yin Huai] Rename ThriftServer to JDBCServer. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3c1f1baa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3c1f1baa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3c1f1baa Branch: refs/heads/master Commit: 3c1f1baaf003d50786d3eee1e288f4bac69096f2 Parents: a1e092e Author: Yin Huai yh...@databricks.com Authored: Wed May 27 20:04:29 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Wed May 27 20:04:29 2015 -0700 -- .../apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala | 4 ++-- .../spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala | 2 +- .../apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala | 4 +++- .../org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3c1f1baa/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala index 6a2be4a..7c48ff4 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala @@ -47,7 +47,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage( /h4 ++ generateSessionStatsTable() ++ generateSQLStatsTable() -UIUtils.headerSparkPage(ThriftServer, content, parent, Some(5000)) +UIUtils.headerSparkPage(JDBC/ODBC Server, content, parent, Some(5000)) } /** Generate basic stats of the thrift server program */ @@ -143,7 +143,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage( val headerRow = Seq(User, IP, Session ID, Start Time, Finish Time, Duration, Total Execute) def generateDataRow(session: SessionInfo): Seq[Node] = { -val sessionLink = %s/ThriftServer/session?id=%s +val sessionLink = %s/sql/session?id=%s .format(UIUtils.prependBaseUri(parent.basePath), session.sessionId) tr td {session.userName} /td http://git-wip-us.apache.org/repos/asf/spark/blob/3c1f1baa/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala index 33ba038..d9d66dc 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala @@ -55,7 +55,7 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab) Total run {sessionStat._2.totalExecution} SQL /h4 ++ generateSQLStatsTable(sessionStat._2.sessionId) -UIUtils.headerSparkPage(ThriftServer, content, parent, Some(5000)) +UIUtils.headerSparkPage(JDBC/ODBC Session, content, parent, Some(5000)) } /** Generate basic stats of the streaming program */ http://git-wip-us.apache.org/repos/asf/spark/blob/3c1f1baa/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala index 343031f..94fd8a6 100644 ---
spark git commit: [DOCS] Fix typo in documentation for Java UDF registration
Repository: spark Updated Branches: refs/heads/master bd11b01eb - 35410614d [DOCS] Fix typo in documentation for Java UDF registration This contribution is my original work and I license the work to the project under the project's open source license Author: Matt Wise mw...@quixey.com Closes #6447 from wisematthew/fix-typo-in-java-udf-registration-doc and squashes the following commits: e7ef5f7 [Matt Wise] Fix typo in documentation for Java UDF registration Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/35410614 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/35410614 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/35410614 Branch: refs/heads/master Commit: 35410614deb7feea1c9d5cca00a6fa7970404f21 Parents: bd11b01 Author: Matt Wise mw...@quixey.com Authored: Wed May 27 22:39:19 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 22:39:19 2015 -0700 -- docs/sql-programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/35410614/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 5b41c0e..ab646f6 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1939,7 +1939,7 @@ sqlContext.udf.register(strLen, (s: String) = s.length()) div data-lang=java markdown=1 {% highlight java %} -sqlContext.udf().register(strLen, (String s) - { s.length(); }); +sqlContext.udf().register(strLen, (String s) - s.length(), DataTypes.IntegerType); {% endhighlight %} /div - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [DOCS] Fix typo in documentation for Java UDF registration
Repository: spark Updated Branches: refs/heads/branch-1.4 7c342bdd9 - 63be026da [DOCS] Fix typo in documentation for Java UDF registration This contribution is my original work and I license the work to the project under the project's open source license Author: Matt Wise mw...@quixey.com Closes #6447 from wisematthew/fix-typo-in-java-udf-registration-doc and squashes the following commits: e7ef5f7 [Matt Wise] Fix typo in documentation for Java UDF registration (cherry picked from commit 35410614deb7feea1c9d5cca00a6fa7970404f21) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63be026d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63be026d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63be026d Branch: refs/heads/branch-1.4 Commit: 63be026da3ebf6b77f37f2e950e3b8f516bdfcaa Parents: 7c342bd Author: Matt Wise mw...@quixey.com Authored: Wed May 27 22:39:19 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 22:39:24 2015 -0700 -- docs/sql-programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/63be026d/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 5b41c0e..ab646f6 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1939,7 +1939,7 @@ sqlContext.udf.register(strLen, (s: String) = s.length()) div data-lang=java markdown=1 {% highlight java %} -sqlContext.udf().register(strLen, (String s) - { s.length(); }); +sqlContext.udf().register(strLen, (String s) - s.length(), DataTypes.IntegerType); {% endhighlight %} /div - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7850][BUILD] Hive 0.12.0 profile in POM should be removed
Repository: spark Updated Branches: refs/heads/branch-1.4 34e233f9c - 4e12cec8a [SPARK-7850][BUILD] Hive 0.12.0 profile in POM should be removed I grep'ed hive-0.12.0 in the source code and removed all the profiles and doc references. Author: Cheolsoo Park cheols...@netflix.com Closes #6393 from piaozhexiu/SPARK-7850 and squashes the following commits: fb429ce [Cheolsoo Park] Remove hive-0.13.1 profile 82bf09a [Cheolsoo Park] Remove hive 0.12.0 shim code f3722da [Cheolsoo Park] Remove hive-0.12.0 profile and references from POM and build docs (cherry picked from commit 6dd645870d34d97ac992032bfd6cf39f20a0c50f) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e12cec8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e12cec8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e12cec8 Branch: refs/heads/branch-1.4 Commit: 4e12cec8a18631b6de5d9bb6a4467178444d66a9 Parents: 34e233f Author: Cheolsoo Park cheols...@netflix.com Authored: Wed May 27 00:18:42 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 00:18:54 2015 -0700 -- docs/building-spark.md | 6 +- pom.xml | 16 -- .../spark/sql/hive/thriftserver/Shim12.scala| 278 --- sql/hive/pom.xml| 10 - .../org/apache/spark/sql/hive/Shim12.scala | 265 -- 5 files changed, 1 insertion(+), 574 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4e12cec8/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index 4dbccb9..3ca7f27 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -118,14 +118,10 @@ mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=2.2.0 -DskipTests # Building With Hive and JDBC Support To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `Phive-thriftserver` profiles to your existing build options. -By default Spark will build with Hive 0.13.1 bindings. You can also build for -Hive 0.12.0 using the `-Phive-0.12.0` profile. +By default Spark will build with Hive 0.13.1 bindings. {% highlight bash %} # Apache Hadoop 2.4.X with Hive 13 support mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-thriftserver -DskipTests clean package - -# Apache Hadoop 2.4.X with Hive 12 support -mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-0.12.0 -Phive-thriftserver -DskipTests clean package {% endhighlight %} # Building for Scala 2.11 http://git-wip-us.apache.org/repos/asf/spark/blob/4e12cec8/pom.xml -- diff --git a/pom.xml b/pom.xml index 6f525b6..9f99e28 100644 --- a/pom.xml +++ b/pom.xml @@ -1753,22 +1753,6 @@ modulesql/hive-thriftserver/module /modules /profile -profile - idhive-0.12.0/id - properties -hive.version0.12.0-protobuf-2.5/hive.version -hive.version.short0.12.0/hive.version.short -derby.version10.4.2.0/derby.version - /properties -/profile -profile - idhive-0.13.1/id - properties -hive.version0.13.1a/hive.version -hive.version.short0.13.1/hive.version.short -derby.version10.10.1.1/derby.version - /properties -/profile profile idscala-2.10/id http://git-wip-us.apache.org/repos/asf/spark/blob/4e12cec8/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala -- diff --git a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala deleted file mode 100644 index b3a79ba..000 --- a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the License); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR
spark git commit: [SPARK-7697][SQL] Use LongType for unsigned int in JDBCRDD
Repository: spark Updated Branches: refs/heads/master 6dd645870 - 4f98d7a7f [SPARK-7697][SQL] Use LongType for unsigned int in JDBCRDD JIRA: https://issues.apache.org/jira/browse/SPARK-7697 The reported problem case is mysql. But for h2 db, there is no unsigned int. So it is not able to add corresponding test. Author: Liang-Chi Hsieh vii...@gmail.com Closes #6229 from viirya/unsignedint_as_long and squashes the following commits: dc4b5d8 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into unsignedint_as_long 608695b [Liang-Chi Hsieh] Use LongType for unsigned int in JDBCRDD. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f98d7a7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f98d7a7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f98d7a7 Branch: refs/heads/master Commit: 4f98d7a7f1715273bc91f1903bb7e0f287cc7394 Parents: 6dd6458 Author: Liang-Chi Hsieh vii...@gmail.com Authored: Wed May 27 00:27:39 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 00:27:39 2015 -0700 -- .../main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f98d7a7/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala index be03a23..244bd3e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala @@ -46,7 +46,11 @@ private[sql] object JDBCRDD extends Logging { * @param sqlType - A field of java.sql.Types * @return The Catalyst type corresponding to sqlType. */ - private def getCatalystType(sqlType: Int, precision: Int, scale: Int): DataType = { + private def getCatalystType( + sqlType: Int, + precision: Int, + scale: Int, + signed: Boolean): DataType = { val answer = sqlType match { case java.sql.Types.ARRAY = null case java.sql.Types.BIGINT= LongType @@ -64,7 +68,7 @@ private[sql] object JDBCRDD extends Logging { case java.sql.Types.DISTINCT = null case java.sql.Types.DOUBLE= DoubleType case java.sql.Types.FLOAT = FloatType - case java.sql.Types.INTEGER = IntegerType + case java.sql.Types.INTEGER = if (signed) { IntegerType } else { LongType } case java.sql.Types.JAVA_OBJECT = null case java.sql.Types.LONGNVARCHAR = StringType case java.sql.Types.LONGVARBINARY = BinaryType @@ -123,11 +127,12 @@ private[sql] object JDBCRDD extends Logging { val typeName = rsmd.getColumnTypeName(i + 1) val fieldSize = rsmd.getPrecision(i + 1) val fieldScale = rsmd.getScale(i + 1) + val isSigned = rsmd.isSigned(i + 1) val nullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls val metadata = new MetadataBuilder().putString(name, columnName) val columnType = dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse( - getCatalystType(dataType, fieldSize, fieldScale)) + getCatalystType(dataType, fieldSize, fieldScale, isSigned)) fields(i) = StructField(columnName, columnType, nullable, metadata.build()) i = i + 1 } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7697][SQL] Use LongType for unsigned int in JDBCRDD
Repository: spark Updated Branches: refs/heads/branch-1.4 4e12cec8a - 01c3ef536 [SPARK-7697][SQL] Use LongType for unsigned int in JDBCRDD JIRA: https://issues.apache.org/jira/browse/SPARK-7697 The reported problem case is mysql. But for h2 db, there is no unsigned int. So it is not able to add corresponding test. Author: Liang-Chi Hsieh vii...@gmail.com Closes #6229 from viirya/unsignedint_as_long and squashes the following commits: dc4b5d8 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into unsignedint_as_long 608695b [Liang-Chi Hsieh] Use LongType for unsigned int in JDBCRDD. (cherry picked from commit 4f98d7a7f1715273bc91f1903bb7e0f287cc7394) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01c3ef53 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01c3ef53 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01c3ef53 Branch: refs/heads/branch-1.4 Commit: 01c3ef536d60f21f4a63c76ffe4dad2fecaa797e Parents: 4e12cec Author: Liang-Chi Hsieh vii...@gmail.com Authored: Wed May 27 00:27:39 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 00:27:44 2015 -0700 -- .../main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/01c3ef53/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala index be03a23..244bd3e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala @@ -46,7 +46,11 @@ private[sql] object JDBCRDD extends Logging { * @param sqlType - A field of java.sql.Types * @return The Catalyst type corresponding to sqlType. */ - private def getCatalystType(sqlType: Int, precision: Int, scale: Int): DataType = { + private def getCatalystType( + sqlType: Int, + precision: Int, + scale: Int, + signed: Boolean): DataType = { val answer = sqlType match { case java.sql.Types.ARRAY = null case java.sql.Types.BIGINT= LongType @@ -64,7 +68,7 @@ private[sql] object JDBCRDD extends Logging { case java.sql.Types.DISTINCT = null case java.sql.Types.DOUBLE= DoubleType case java.sql.Types.FLOAT = FloatType - case java.sql.Types.INTEGER = IntegerType + case java.sql.Types.INTEGER = if (signed) { IntegerType } else { LongType } case java.sql.Types.JAVA_OBJECT = null case java.sql.Types.LONGNVARCHAR = StringType case java.sql.Types.LONGVARBINARY = BinaryType @@ -123,11 +127,12 @@ private[sql] object JDBCRDD extends Logging { val typeName = rsmd.getColumnTypeName(i + 1) val fieldSize = rsmd.getPrecision(i + 1) val fieldScale = rsmd.getScale(i + 1) + val isSigned = rsmd.isSigned(i + 1) val nullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls val metadata = new MetadataBuilder().putString(name, columnName) val columnType = dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse( - getCatalystType(dataType, fieldSize, fieldScale)) + getCatalystType(dataType, fieldSize, fieldScale, isSigned)) fields(i) = StructField(columnName, columnType, nullable, metadata.build()) i = i + 1 } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org