Repository: spark
Updated Branches:
  refs/heads/master a1d1529da -> 6bdddb6f6


[SPARK-6361][SQL] support adding a column with metadata in DF

This is used by ML pipelines to embed ML attributes in columns created by ML 
transformers/estimators. marmbrus

Author: Xiangrui Meng <[email protected]>

Closes #5151 from mengxr/SPARK-6361 and squashes the following commits:

bb30de3 [Xiangrui Meng] support adding a column with metadata in DF


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6bdddb6f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6bdddb6f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6bdddb6f

Branch: refs/heads/master
Commit: 6bdddb6f6ffbd1bee4c45880904e9561b18764a7
Parents: a1d1529
Author: Xiangrui Meng <[email protected]>
Authored: Tue Mar 24 12:08:19 2015 -0700
Committer: Michael Armbrust <[email protected]>
Committed: Tue Mar 24 12:08:19 2015 -0700

----------------------------------------------------------------------
 .../catalyst/expressions/namedExpressions.scala | 20 +++++++++++++-------
 .../scala/org/apache/spark/sql/Column.scala     | 13 +++++++++++++
 .../spark/sql/ColumnExpressionSuite.scala       | 15 ++++++++++++---
 3 files changed, 38 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6bdddb6f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 17f7f9f..3dd7d38 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -95,9 +95,12 @@ abstract class Attribute extends NamedExpression {
  * @param name the name to be associated with the result of computing 
[[child]].
  * @param exprId A globally unique id used to check if an 
[[AttributeReference]] refers to this
  *               alias. Auto-assigned if left blank.
+ * @param explicitMetadata Explicit metadata associated with this alias that 
overwrites child's.
  */
-case class Alias(child: Expression, name: String)
-    (val exprId: ExprId = NamedExpression.newExprId, val qualifiers: 
Seq[String] = Nil)
+case class Alias(child: Expression, name: String)(
+    val exprId: ExprId = NamedExpression.newExprId,
+    val qualifiers: Seq[String] = Nil,
+    val explicitMetadata: Option[Metadata] = None)
   extends NamedExpression with trees.UnaryNode[Expression] {
 
   override type EvaluatedType = Any
@@ -107,9 +110,11 @@ case class Alias(child: Expression, name: String)
   override def dataType = child.dataType
   override def nullable = child.nullable
   override def metadata: Metadata = {
-    child match {
-      case named: NamedExpression => named.metadata
-      case _ => Metadata.empty
+    explicitMetadata.getOrElse {
+      child match {
+        case named: NamedExpression => named.metadata
+        case _ => Metadata.empty
+      }
     }
   }
 
@@ -123,11 +128,12 @@ case class Alias(child: Expression, name: String)
 
   override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix"
 
-  override protected final def otherCopyArgs = exprId :: qualifiers :: Nil
+  override protected final def otherCopyArgs = exprId :: qualifiers :: 
explicitMetadata :: Nil
 
   override def equals(other: Any): Boolean = other match {
     case a: Alias =>
-      name == a.name && exprId == a.exprId && child == a.child && qualifiers 
== a.qualifiers
+      name == a.name && exprId == a.exprId && child == a.child && qualifiers 
== a.qualifiers &&
+        explicitMetadata == a.explicitMetadata
     case _ => false
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/6bdddb6f/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index ec7d15f..2ae47e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -595,6 +595,19 @@ class Column(protected[sql] val expr: Expression) {
   def as(alias: Symbol): Column = Alias(expr, alias.name)()
 
   /**
+   * Gives the column an alias with metadata.
+   * {{{
+   *   val metadata: Metadata = ...
+   *   df.select($"colA".as("colB", metadata))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def as(alias: String, metadata: Metadata): Column = {
+    Alias(expr, alias)(explicitMetadata = Some(metadata))
+  }
+
+  /**
    * Casts the column to a different data type.
    * {{{
    *   // Casts colA to IntegerType.

http://git-wip-us.apache.org/repos/asf/spark/blob/6bdddb6f/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index a53ae97..bc8fae1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -17,12 +17,10 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
-import org.apache.spark.sql.catalyst.plans.logical.{Project, NoRelation}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext.implicits._
-import org.apache.spark.sql.types.{BooleanType, IntegerType, StructField, 
StructType}
+import org.apache.spark.sql.types._
 
 
 class ColumnExpressionSuite extends QueryTest {
@@ -322,4 +320,15 @@ class ColumnExpressionSuite extends QueryTest {
     assert('key.desc == 'key.desc)
     assert('key.desc != 'key.asc)
   }
+
+  test("alias with metadata") {
+    val metadata = new MetadataBuilder()
+      .putString("originName", "value")
+      .build()
+    val schema = testData
+      .select($"*", col("value").as("abc", metadata))
+      .schema
+    assert(schema("value").metadata === Metadata.empty)
+    assert(schema("abc").metadata === metadata)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to