This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 3922582  SQL: Fix too-strict check in SortProject. (#6403)
3922582 is described below

commit 3922582d8c53b2c82adc83d3cc5468b41595dbfb
Author: Gian Merlino <[email protected]>
AuthorDate: Sat Sep 29 13:54:34 2018 -0700

    SQL: Fix too-strict check in SortProject. (#6403)
    
    The "Duplicate field name" check on inputRowSignature is too strict:
    it is actually fine for a row signature to have the same field name
    twice. It happens when the same expression is selected twice, and
    both selections map to the same Druid object (dimension, aggregator,
    etc).
    
    I did not succeed in writing a test that triggers this, but I did see
    it occur in production for a complex query with hundreds of aggregators.
---
 .../apache/druid/sql/calcite/rel/DruidQuery.java   |  5 ++---
 .../apache/druid/sql/calcite/rel/SortProject.java  | 24 +++++++++-------------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
index 70fda11..7e1a6fc 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
@@ -154,7 +154,7 @@ public class DruidQuery
       sortingInputRowSignature = sourceRowSignature;
     }
 
-    this.sortProject = computeSortProject(partialQuery, plannerContext, 
sortingInputRowSignature, grouping);
+    this.sortProject = computeSortProject(partialQuery, plannerContext, 
sortingInputRowSignature);
 
     // outputRowSignature is used only for scan and select query, and thus 
sort and grouping must be null
     this.outputRowSignature = sortProject == null ? sortingInputRowSignature : 
sortProject.getOutputRowSignature();
@@ -328,8 +328,7 @@ public class DruidQuery
   private SortProject computeSortProject(
       PartialDruidQuery partialQuery,
       PlannerContext plannerContext,
-      RowSignature sortingInputRowSignature,
-      Grouping grouping
+      RowSignature sortingInputRowSignature
   )
   {
     final Project sortProject = partialQuery.getSortProject();
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/rel/SortProject.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/SortProject.java
index a2b89df..f55fa18 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/SortProject.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/SortProject.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 public class SortProject
 {
@@ -44,26 +45,21 @@ public class SortProject
     this.postAggregators = Preconditions.checkNotNull(postAggregators, 
"postAggregators");
     this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, 
"outputRowSignature");
 
-    // Verify no collisions.
-    final Set<String> seen = new HashSet<>();
-    inputRowSignature.getRowOrder().forEach(field -> {
-      if (!seen.add(field)) {
-        throw new ISE("Duplicate field name: %s", field);
-      }
-    });
+    final Set<String> inputColumnNames = new 
HashSet<>(inputRowSignature.getRowOrder());
+    final Set<String> postAggregatorNames = postAggregators.stream()
+                                                           
.map(PostAggregator::getName)
+                                                           
.collect(Collectors.toSet());
 
-    for (PostAggregator postAggregator : postAggregators) {
-      if (postAggregator == null) {
-        throw new ISE("aggregation[%s] is not a postAggregator", 
postAggregator);
-      }
-      if (!seen.add(postAggregator.getName())) {
-        throw new ISE("Duplicate field name: %s", postAggregator.getName());
+    // Verify no collisions between inputs and outputs.
+    for (String postAggregatorName : postAggregatorNames) {
+      if (inputColumnNames.contains(postAggregatorName)) {
+        throw new ISE("Duplicate field name: %s", postAggregatorName);
       }
     }
 
     // Verify that items in the output signature exist.
     outputRowSignature.getRowOrder().forEach(field -> {
-      if (!seen.contains(field)) {
+      if (!inputColumnNames.contains(field) && 
!postAggregatorNames.contains(field)) {
         throw new ISE("Missing field in rowOrder: %s", field);
       }
     });


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to