[GitHub] 10110346 closed pull request #23304: [SPARK-26353][SQL]Add typed aggregate functions: max&&min

GitBox Thu, 13 Dec 2018 22:56:51 -0800

10110346 closed pull request #23304: [SPARK-26353][SQL]Add typed aggregate 
functions: max&&min
URL: https://github.com/apache/spark/pull/23304


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/typedaggregators.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/typedaggregators.scala
index b6550bf3e4aac..2d08ea3fce6fb 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/typedaggregators.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/typedaggregators.scala
@@ -99,3 +99,71 @@ class TypedAverage[IN](val f: IN => Double) extends 
Aggregator[IN, (Double, Long
     toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]]
   }
 }
+
+class TypedMaxDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, 
Double] {
+  override def zero: Double = Double.MinValue
+  override def reduce(b: Double, a: IN): Double = if (b > f(a)) b else f(a)
+  override def merge(b1: Double, b2: Double): Double = if (b1 > b2) b1 else b2
+  override def finish(reduction: Double): Double = reduction
+
+  override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]()
+  override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]()
+
+  // Java api support
+  def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => 
f.call(x).asInstanceOf[Double])
+
+  def toColumnJava: TypedColumn[IN, java.lang.Double] = {
+    toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]]
+  }
+}
+
+class TypedMaxLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] {
+  override def zero: Long = Long.MinValue
+  override def reduce(b: Long, a: IN): Long = if (b > f(a)) b else f(a)
+  override def merge(b1: Long, b2: Long): Long = if (b1 > b2) b1 else b2
+  override def finish(reduction: Long): Long = reduction
+
+  override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]()
+  override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]()
+
+  // Java api support
+  def this(f: MapFunction[IN, java.lang.Long]) = this((x: IN) => 
f.call(x).asInstanceOf[Long])
+
+  def toColumnJava: TypedColumn[IN, java.lang.Long] = {
+    toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]]
+  }
+}
+
+class TypedMinDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, 
Double] {
+  override def zero: Double = Double.MaxValue
+  override def reduce(b: Double, a: IN): Double = if (b < f(a)) b else f(a)
+  override def merge(b1: Double, b2: Double): Double = if (b1 < b2) b1 else b2
+  override def finish(reduction: Double): Double = reduction
+
+  override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]()
+  override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]()
+
+  // Java api support
+  def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => 
f.call(x).asInstanceOf[Double])
+
+  def toColumnJava: TypedColumn[IN, java.lang.Double] = {
+    toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]]
+  }
+}
+
+class TypedMinLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] {
+  override def zero: Long = Long.MaxValue
+  override def reduce(b: Long, a: IN): Long = if (b < f(a)) b else f(a)
+  override def merge(b1: Long, b2: Long): Long = if (b1 < b2) b1 else b2
+  override def finish(reduction: Long): Long = reduction
+
+  override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]()
+  override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]()
+
+  // Java api support
+  def this(f: MapFunction[IN, java.lang.Long]) = this((x: IN) => 
f.call(x).asInstanceOf[Long])
+
+  def toColumnJava: TypedColumn[IN, java.lang.Long] = {
+    toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]]
+  }
+}
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index 1cb579c4faa76..6a8336e01d6f6 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -77,14 +77,31 @@ object typed {
    */
   def sumLong[IN](f: IN => Long): TypedColumn[IN, Long] = new 
TypedSumLong[IN](f).toColumn
 
+  /**
+   * Max aggregate function for floating point (double) type.
+   */
+  def max[IN](f: IN => Double): TypedColumn[IN, Double] = new 
TypedMaxDouble[IN](f).toColumn
+
+  /**
+   * Max aggregate function for integral (long, i.e. 64 bit integer) type.
+   */
+  def maxLong[IN](f: IN => Long): TypedColumn[IN, Long] = new 
TypedMaxLong[IN](f).toColumn
+
+  /**
+   * Min aggregate function for floating point (double) type.
+   */
+  def min[IN](f: IN => Double): TypedColumn[IN, Double] = new 
TypedMinDouble[IN](f).toColumn
+
+  /**
+   * Min aggregate function for integral (long, i.e. 64 bit integer) type.
+   */
+  def minLong[IN](f: IN => Long): TypedColumn[IN, Long] = new 
TypedMinLong[IN](f).toColumn
+
   // TODO:
   // stddevOf: Double
   // varianceOf: Double
   // approx_count_distinct: Long
 
-  // minOf: T
-  // maxOf: T
-
   // firstOf: T
   // lastOf: T
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
index 97c3f358c0e76..1eee4826f1eef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
@@ -323,6 +323,14 @@ class DatasetAggregatorSuite extends QueryTest with 
SharedSQLContext {
       ("a", 2.0, 2L, 4.0, 4L), ("b", 3.0, 1L, 3.0, 3L))
   }
 
+  test("typed aggregate: max, min") {
+    val ds = Seq("a" -> 4, "a" -> -1, "b" -> -3, "b" -> 1, "b" -> -1).toDS()
+    checkDataset(
+      ds.groupByKey(_._1).agg(
+        typed.max(_._2), typed.maxLong(_._2), typed.min(_._2), 
typed.minLong(_._2)),
+      ("a", 4.0, 4L, -1.0, -1L), ("b", 1.0, 1L, -3.0, -3L))
+  }
+
   test("generic typed sum") {
     val ds = Seq("a" -> 1, "a" -> 3, "b" -> 3).toDS()
     checkDataset(


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] 10110346 closed pull request #23304: [SPARK-26353][SQL]Add typed aggregate functions: max&&min

Reply via email to