Repository: spark
Updated Branches:
  refs/heads/master 91903e0a5 -> e060d3ee2


SPARK-2226:  [SQL] transform HAVING clauses with aggregate expressions that 
aren't in the aggregation list

This change adds an analyzer rule to
  1. find expressions in `HAVING` clause filters that depend on unresolved 
attributes,
  2. push these expressions down to the underlying aggregates, and then
  3. project them away above the filter.

It also enables the `HAVING` queries in the Hive compatibility suite.

Author: William Benton <wi...@redhat.com>

Closes #1497 from willb/spark-2226 and squashes the following commits:

92c9a93 [William Benton] Removed unnecessary import
f1d4f34 [William Benton] Cleanups missed in prior commit
0e1624f [William Benton] Incorporated suggestions from @marmbrus; thanks!
541d4ee [William Benton] Cleanups from review
5a12647 [William Benton] Explanatory comments and stylistic cleanups.
c7f2b2c [William Benton] Whitelist HAVING queries.
29a26e3 [William Benton] Added rule to handle unresolved attributes in HAVING 
clauses (SPARK-2226)


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e060d3ee
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e060d3ee
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e060d3ee

Branch: refs/heads/master
Commit: e060d3ee2d910a5a802bb29630dca6f66cc0525d
Parents: 91903e0
Author: William Benton <wi...@redhat.com>
Authored: Wed Jul 23 16:25:32 2014 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Wed Jul 23 16:25:32 2014 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  27 +-
 .../having-0-57f3f26c0203c29c2a91a7cca557ce55   |   0
 .../having-1-ef81808faeab6d212c3cf32abfc0d873   |  10 +
 .../having-2-a2b4f52cb92f730ddb912b063636d6c1   |   0
 .../having-3-3fa6387b6a4ece110ac340c7b893964e   | 308 +++++++++++++++++++
 .../having-4-e9918bd385cb35db4ebcbd4e398547f4   |   0
 .../having-5-4a0c4e521b8a6f6146151c13a2715ff    | 199 ++++++++++++
 .../having-6-9f50df5b5f31c7166b0396ab434dc095   |   0
 .../having-7-5ad96cb287df02080da1e2594f08d83e   | 125 ++++++++
 .../having-8-4aa7197e20b5a64461ca670a79488103   |   0
 .../having-9-a79743372d86d77b0ff53a71adcb1cff   | 199 ++++++++++++
 .../hive/execution/HiveCompatibilitySuite.scala |   2 +
 12 files changed, 869 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c718846..02bdb64 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 
-
 /**
  * A trivial [[Analyzer]] with an [[EmptyCatalog]] and 
[[EmptyFunctionRegistry]]. Used for testing
  * when all relations are already filled in and the analyser needs only to 
resolve attribute
@@ -54,6 +53,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, 
caseSensitive: Bool
       StarExpansion ::
       ResolveFunctions ::
       GlobalAggregates ::
+      UnresolvedHavingClauseAttributes :: 
       typeCoercionRules :_*),
     Batch("Check Analysis", Once,
       CheckResolution),
@@ -152,6 +152,31 @@ class Analyzer(catalog: Catalog, registry: 
FunctionRegistry, caseSensitive: Bool
   }
 
   /**
+   * This rule finds expressions in HAVING clause filters that depend on
+   * unresolved attributes.  It pushes these expressions down to the underlying
+   * aggregates and then projects them away above the filter.
+   */
+  object UnresolvedHavingClauseAttributes extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+      case filter @ Filter(havingCondition, aggregate @ Aggregate(_, 
originalAggExprs, _)) 
+          if !filter.resolved && aggregate.resolved && 
containsAggregate(havingCondition) => {
+        val evaluatedCondition = Alias(havingCondition,  "havingCondition")()
+        val aggExprsWithHaving = evaluatedCondition +: originalAggExprs
+        
+        Project(aggregate.output,
+          Filter(evaluatedCondition.toAttribute,
+            aggregate.copy(aggregateExpressions = aggExprsWithHaving)))
+      }
+      
+    }
+    
+    protected def containsAggregate(condition: Expression): Boolean =
+      condition
+        .collect { case ae: AggregateExpression => ae }
+        .nonEmpty
+  }
+
+  /**
    * When a SELECT clause has only a single expression and that expression is a
    * [[catalyst.expressions.Generator Generator]] we convert the
    * [[catalyst.plans.logical.Project Project]] to a 
[[catalyst.plans.logical.Generate Generate]].

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55 
b/sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873 
b/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873
new file mode 100644
index 0000000..704f1e6
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873
@@ -0,0 +1,10 @@
+4
+4
+5
+4
+5
+5
+4
+4
+5
+4

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1 
b/sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e 
b/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e
new file mode 100644
index 0000000..b56757a
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e
@@ -0,0 +1,308 @@
+0      val_0
+2      val_2
+4      val_4
+5      val_5
+8      val_8
+9      val_9
+10     val_10
+11     val_11
+12     val_12
+15     val_15
+17     val_17
+18     val_18
+19     val_19
+20     val_20
+24     val_24
+26     val_26
+27     val_27
+28     val_28
+30     val_30
+33     val_33
+34     val_34
+35     val_35
+37     val_37
+41     val_41
+42     val_42
+43     val_43
+44     val_44
+47     val_47
+51     val_51
+53     val_53
+54     val_54
+57     val_57
+58     val_58
+64     val_64
+65     val_65
+66     val_66
+67     val_67
+69     val_69
+70     val_70
+72     val_72
+74     val_74
+76     val_76
+77     val_77
+78     val_78
+80     val_80
+82     val_82
+83     val_83
+84     val_84
+85     val_85
+86     val_86
+87     val_87
+90     val_90
+92     val_92
+95     val_95
+96     val_96
+97     val_97
+98     val_98
+100    val_100
+103    val_103
+104    val_104
+105    val_105
+111    val_111
+113    val_113
+114    val_114
+116    val_116
+118    val_118
+119    val_119
+120    val_120
+125    val_125
+126    val_126
+128    val_128
+129    val_129
+131    val_131
+133    val_133
+134    val_134
+136    val_136
+137    val_137
+138    val_138
+143    val_143
+145    val_145
+146    val_146
+149    val_149
+150    val_150
+152    val_152
+153    val_153
+155    val_155
+156    val_156
+157    val_157
+158    val_158
+160    val_160
+162    val_162
+163    val_163
+164    val_164
+165    val_165
+166    val_166
+167    val_167
+168    val_168
+169    val_169
+170    val_170
+172    val_172
+174    val_174
+175    val_175
+176    val_176
+177    val_177
+178    val_178
+179    val_179
+180    val_180
+181    val_181
+183    val_183
+186    val_186
+187    val_187
+189    val_189
+190    val_190
+191    val_191
+192    val_192
+193    val_193
+194    val_194
+195    val_195
+196    val_196
+197    val_197
+199    val_199
+200    val_200
+201    val_201
+202    val_202
+203    val_203
+205    val_205
+207    val_207
+208    val_208
+209    val_209
+213    val_213
+214    val_214
+216    val_216
+217    val_217
+218    val_218
+219    val_219
+221    val_221
+222    val_222
+223    val_223
+224    val_224
+226    val_226
+228    val_228
+229    val_229
+230    val_230
+233    val_233
+235    val_235
+237    val_237
+238    val_238
+239    val_239
+241    val_241
+242    val_242
+244    val_244
+247    val_247
+248    val_248
+249    val_249
+252    val_252
+255    val_255
+256    val_256
+257    val_257
+258    val_258
+260    val_260
+262    val_262
+263    val_263
+265    val_265
+266    val_266
+272    val_272
+273    val_273
+274    val_274
+275    val_275
+277    val_277
+278    val_278
+280    val_280
+281    val_281
+282    val_282
+283    val_283
+284    val_284
+285    val_285
+286    val_286
+287    val_287
+288    val_288
+289    val_289
+291    val_291
+292    val_292
+296    val_296
+298    val_298
+305    val_305
+306    val_306
+307    val_307
+308    val_308
+309    val_309
+310    val_310
+311    val_311
+315    val_315
+316    val_316
+317    val_317
+318    val_318
+321    val_321
+322    val_322
+323    val_323
+325    val_325
+327    val_327
+331    val_331
+332    val_332
+333    val_333
+335    val_335
+336    val_336
+338    val_338
+339    val_339
+341    val_341
+342    val_342
+344    val_344
+345    val_345
+348    val_348
+351    val_351
+353    val_353
+356    val_356
+360    val_360
+362    val_362
+364    val_364
+365    val_365
+366    val_366
+367    val_367
+368    val_368
+369    val_369
+373    val_373
+374    val_374
+375    val_375
+377    val_377
+378    val_378
+379    val_379
+382    val_382
+384    val_384
+386    val_386
+389    val_389
+392    val_392
+393    val_393
+394    val_394
+395    val_395
+396    val_396
+397    val_397
+399    val_399
+400    val_400
+401    val_401
+402    val_402
+403    val_403
+404    val_404
+406    val_406
+407    val_407
+409    val_409
+411    val_411
+413    val_413
+414    val_414
+417    val_417
+418    val_418
+419    val_419
+421    val_421
+424    val_424
+427    val_427
+429    val_429
+430    val_430
+431    val_431
+432    val_432
+435    val_435
+436    val_436
+437    val_437
+438    val_438
+439    val_439
+443    val_443
+444    val_444
+446    val_446
+448    val_448
+449    val_449
+452    val_452
+453    val_453
+454    val_454
+455    val_455
+457    val_457
+458    val_458
+459    val_459
+460    val_460
+462    val_462
+463    val_463
+466    val_466
+467    val_467
+468    val_468
+469    val_469
+470    val_470
+472    val_472
+475    val_475
+477    val_477
+478    val_478
+479    val_479
+480    val_480
+481    val_481
+482    val_482
+483    val_483
+484    val_484
+485    val_485
+487    val_487
+489    val_489
+490    val_490
+491    val_491
+492    val_492
+493    val_493
+494    val_494
+495    val_495
+496    val_496
+497    val_497
+498    val_498

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4 
b/sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff 
b/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff
new file mode 100644
index 0000000..2d7022e
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff
@@ -0,0 +1,199 @@
+4
+5
+8
+9
+26
+27
+28
+30
+33
+34
+35
+37
+41
+42
+43
+44
+47
+51
+53
+54
+57
+58
+64
+65
+66
+67
+69
+70
+72
+74
+76
+77
+78
+80
+82
+83
+84
+85
+86
+87
+90
+92
+95
+96
+97
+98
+256
+257
+258
+260
+262
+263
+265
+266
+272
+273
+274
+275
+277
+278
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+291
+292
+296
+298
+302
+305
+306
+307
+308
+309
+310
+311
+315
+316
+317
+318
+321
+322
+323
+325
+327
+331
+332
+333
+335
+336
+338
+339
+341
+342
+344
+345
+348
+351
+353
+356
+360
+362
+364
+365
+366
+367
+368
+369
+373
+374
+375
+377
+378
+379
+382
+384
+386
+389
+392
+393
+394
+395
+396
+397
+399
+400
+401
+402
+403
+404
+406
+407
+409
+411
+413
+414
+417
+418
+419
+421
+424
+427
+429
+430
+431
+432
+435
+436
+437
+438
+439
+443
+444
+446
+448
+449
+452
+453
+454
+455
+457
+458
+459
+460
+462
+463
+466
+467
+468
+469
+470
+472
+475
+477
+478
+479
+480
+481
+482
+483
+484
+485
+487
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095 
b/sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e 
b/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e
new file mode 100644
index 0000000..bd545cc
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e
@@ -0,0 +1,125 @@
+302
+305
+306
+307
+308
+309
+310
+311
+315
+316
+317
+318
+321
+322
+323
+325
+327
+331
+332
+333
+335
+336
+338
+339
+341
+342
+344
+345
+348
+351
+353
+356
+360
+362
+364
+365
+366
+367
+368
+369
+373
+374
+375
+377
+378
+379
+382
+384
+386
+389
+392
+393
+394
+395
+396
+397
+399
+400
+401
+402
+403
+404
+406
+407
+409
+411
+413
+414
+417
+418
+419
+421
+424
+427
+429
+430
+431
+432
+435
+436
+437
+438
+439
+443
+444
+446
+448
+449
+452
+453
+454
+455
+457
+458
+459
+460
+462
+463
+466
+467
+468
+469
+470
+472
+475
+477
+478
+479
+480
+481
+482
+483
+484
+485
+487
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103 
b/sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff 
b/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff
new file mode 100644
index 0000000..d77586c
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff
@@ -0,0 +1,199 @@
+4      val_4
+5      val_5
+8      val_8
+9      val_9
+26     val_26
+27     val_27
+28     val_28
+30     val_30
+33     val_33
+34     val_34
+35     val_35
+37     val_37
+41     val_41
+42     val_42
+43     val_43
+44     val_44
+47     val_47
+51     val_51
+53     val_53
+54     val_54
+57     val_57
+58     val_58
+64     val_64
+65     val_65
+66     val_66
+67     val_67
+69     val_69
+70     val_70
+72     val_72
+74     val_74
+76     val_76
+77     val_77
+78     val_78
+80     val_80
+82     val_82
+83     val_83
+84     val_84
+85     val_85
+86     val_86
+87     val_87
+90     val_90
+92     val_92
+95     val_95
+96     val_96
+97     val_97
+98     val_98
+256    val_256
+257    val_257
+258    val_258
+260    val_260
+262    val_262
+263    val_263
+265    val_265
+266    val_266
+272    val_272
+273    val_273
+274    val_274
+275    val_275
+277    val_277
+278    val_278
+280    val_280
+281    val_281
+282    val_282
+283    val_283
+284    val_284
+285    val_285
+286    val_286
+287    val_287
+288    val_288
+289    val_289
+291    val_291
+292    val_292
+296    val_296
+298    val_298
+302    val_302
+305    val_305
+306    val_306
+307    val_307
+308    val_308
+309    val_309
+310    val_310
+311    val_311
+315    val_315
+316    val_316
+317    val_317
+318    val_318
+321    val_321
+322    val_322
+323    val_323
+325    val_325
+327    val_327
+331    val_331
+332    val_332
+333    val_333
+335    val_335
+336    val_336
+338    val_338
+339    val_339
+341    val_341
+342    val_342
+344    val_344
+345    val_345
+348    val_348
+351    val_351
+353    val_353
+356    val_356
+360    val_360
+362    val_362
+364    val_364
+365    val_365
+366    val_366
+367    val_367
+368    val_368
+369    val_369
+373    val_373
+374    val_374
+375    val_375
+377    val_377
+378    val_378
+379    val_379
+382    val_382
+384    val_384
+386    val_386
+389    val_389
+392    val_392
+393    val_393
+394    val_394
+395    val_395
+396    val_396
+397    val_397
+399    val_399
+400    val_400
+401    val_401
+402    val_402
+403    val_403
+404    val_404
+406    val_406
+407    val_407
+409    val_409
+411    val_411
+413    val_413
+414    val_414
+417    val_417
+418    val_418
+419    val_419
+421    val_421
+424    val_424
+427    val_427
+429    val_429
+430    val_430
+431    val_431
+432    val_432
+435    val_435
+436    val_436
+437    val_437
+438    val_438
+439    val_439
+443    val_443
+444    val_444
+446    val_446
+448    val_448
+449    val_449
+452    val_452
+453    val_453
+454    val_454
+455    val_455
+457    val_457
+458    val_458
+459    val_459
+460    val_460
+462    val_462
+463    val_463
+466    val_466
+467    val_467
+468    val_468
+469    val_469
+470    val_470
+472    val_472
+475    val_475
+477    val_477
+478    val_478
+479    val_479
+480    val_480
+481    val_481
+482    val_482
+483    val_483
+484    val_484
+485    val_485
+487    val_487
+489    val_489
+490    val_490
+491    val_491
+492    val_492
+493    val_493
+494    val_494
+495    val_495
+496    val_496
+497    val_497
+498    val_498

http://git-wip-us.apache.org/repos/asf/spark/blob/e060d3ee/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index bd036fa..8b45197 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -391,6 +391,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with 
BeforeAndAfter {
     "groupby_sort_8",
     "groupby_sort_9",
     "groupby_sort_test_1",
+    "having",
+    "having1",
     "implicit_cast1",
     "innerjoin",
     "inoutdriver",

Reply via email to