This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new f793524c6 IMPALA-12800: Implement hashCode everywhere
f793524c6 is described below

commit f793524c66d02ca1d6590cec1b09cada548c6d5b
Author: Michael Smith <[email protected]>
AuthorDate: Mon Jun 24 13:52:51 2024 -0700

    IMPALA-12800: Implement hashCode everywhere
    
    Fixes a regression in earlier IMPALA-12800 commits where two decimal
    ScalarTypes could be equal, but have different hash codes. That led to
    different lookup behavior, where iterating through lhs_ (the old method)
    would find a similar expression via equals, but when looking up via the
    substitutions_ map it would resolve a different hash code and return
    null (no match). That caused analysis to fail on statements that used
    the same cast-to-decimal expression twice, as one of them would fail to
    find a result in ExprSubstitutionMap and lead to an unbound slot
    
      ERROR: AnalysisException: select list expression not produced by
      aggregation output (missing from GROUP BY clause?): <expr>...
    
    Overrides hashCode on every class that overrides equals to avoid other
    cases that might be untested, and avoid similar mistakes in the future.
    https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html
    requires "If two objects are equal according to the equals(Object)
    method, then calling the hashCode method on each of the two objects must
    produce the same integer result", which requires an explicit hashCode
    implementation anywhere we override equals.
    
    Adds a unit test covering the regression.
    
    Change-Id: I129bff6fd0968be135e23e0b24e273b2ea384eca
    Reviewed-on: http://gerrit.cloudera.org:8080/21550
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 .../main/java/org/apache/impala/analysis/ColumnDef.java | 17 +++++++++++++++++
 fe/src/main/java/org/apache/impala/analysis/Expr.java   |  8 ++++++--
 .../main/java/org/apache/impala/analysis/PlanHint.java  |  6 ++++++
 .../main/java/org/apache/impala/catalog/ArrayType.java  |  6 ++++++
 .../org/apache/impala/catalog/IcebergStructField.java   |  7 +++++++
 fe/src/main/java/org/apache/impala/catalog/MapType.java |  7 +++++++
 .../main/java/org/apache/impala/catalog/ScalarType.java | 16 ++++++++++++++++
 .../java/org/apache/impala/catalog/StructField.java     |  3 +++
 .../main/java/org/apache/impala/catalog/StructType.java |  5 +++++
 .../java/org/apache/impala/catalog/TopicUpdateLog.java  | 10 ++++++++++
 fe/src/main/java/org/apache/impala/catalog/Type.java    | 11 +++++++++++
 .../java/org/apache/impala/planner/DataPartition.java   |  6 ++++++
 .../org/apache/impala/analysis/AnalyzeExprsTest.java    |  9 +++++++++
 13 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java 
b/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
index b4e3e9b1d..4f31af4bb 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
@@ -24,6 +24,7 @@ import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 import org.apache.commons.lang3.builder.EqualsBuilder;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -383,6 +384,22 @@ public class ColumnDef {
         .isEquals();
   }
 
+  @Override
+  public int hashCode() {
+    return Objects.hash(
+        colName_,
+        comment_,
+        isPrimaryKey_,
+        isPrimaryKeyUnique_,
+        typeDef_,
+        type_,
+        isNullable_,
+        encoding_,
+        compression_,
+        defaultValue_,
+        blockSize_);
+  }
+
   public TColumn toThrift() {
     TColumn col = new TColumn(getColName(), type_.toThrift());
     Integer blockSize =
diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java 
b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 960c1775d..172c305a6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -1022,7 +1022,8 @@ abstract public class Expr extends TreeNode<Expr> 
implements ParseNode, Cloneabl
 
   /**
    * Returns true if two expressions are equal. The equality comparison works 
on analyzed
-   * as well as unanalyzed exprs by ignoring implicit casts.
+   * as well as unanalyzed exprs by ignoring implicit casts. If overridden by 
a subclass,
+   * also provide an appropriate implementation for hashCode.
    */
   @Override
   public final boolean equals(Object obj) {
@@ -1037,7 +1038,10 @@ abstract public class Expr extends TreeNode<Expr> 
implements ParseNode, Cloneabl
   }
 
   /**
-   * Returns a hash code based on the same keys used for equals.
+   * Returns a hash code based on the same keys used for equals. Any 
subclasses that
+   * override equals must ensure hashCode is defined such that a.equals(b) 
implies
+   * a.hashCode() == b.hashCode(), as required by
+   * https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html#hashCode--
    */
   @Override
   public int hashCode() {
diff --git a/fe/src/main/java/org/apache/impala/analysis/PlanHint.java 
b/fe/src/main/java/org/apache/impala/analysis/PlanHint.java
index f54931662..64b881669 100644
--- a/fe/src/main/java/org/apache/impala/analysis/PlanHint.java
+++ b/fe/src/main/java/org/apache/impala/analysis/PlanHint.java
@@ -20,6 +20,7 @@ package org.apache.impala.analysis;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 import com.google.common.base.Joiner;
 
@@ -56,6 +57,11 @@ public class PlanHint {
     return name_.equals(oh.name_) && args_.equals(oh.args_);
   }
 
+  @Override
+  public int hashCode() {
+    return Objects.hash(name_, args_);
+  }
+
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
diff --git a/fe/src/main/java/org/apache/impala/catalog/ArrayType.java 
b/fe/src/main/java/org/apache/impala/catalog/ArrayType.java
index e635aa5aa..6e81e267d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ArrayType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ArrayType.java
@@ -49,6 +49,12 @@ public class ArrayType extends Type {
     return otherArrayType.itemType_.equals(itemType_);
   }
 
+  @Override
+  public int hashCode() {
+    // Add 1 to differentiate between e.g. array<int> and array<array<int>>.
+    return 1 + itemType_.hashCode();
+  }
+
   @Override
   public void toThrift(TColumnType container) {
     TTypeNode node = new TTypeNode();
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java
index ef271a5c0..43697fb79 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java
@@ -17,6 +17,8 @@
 
 package org.apache.impala.catalog;
 
+import java.util.Objects;
+
 import org.apache.impala.thrift.TColumnType;
 import org.apache.impala.thrift.TStructField;
 import org.apache.impala.thrift.TTypeNode;
@@ -54,4 +56,9 @@ public class IcebergStructField extends StructField {
     return otherStructField.name_.equals(name_) && 
otherStructField.type_.equals(type_)
         && (otherStructField.fieldId_ == fieldId_);
   }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(name_, type_, fieldId_);
+  }
 }
diff --git a/fe/src/main/java/org/apache/impala/catalog/MapType.java 
b/fe/src/main/java/org/apache/impala/catalog/MapType.java
index 4235181d5..41e60ec1a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/MapType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/MapType.java
@@ -17,6 +17,8 @@
 
 package org.apache.impala.catalog;
 
+import java.util.Objects;
+
 import org.apache.commons.lang3.StringUtils;
 
 import org.apache.impala.thrift.TColumnType;
@@ -49,6 +51,11 @@ public class MapType extends Type {
         otherMapType.valueType_.equals(valueType_);
   }
 
+  @Override
+  public int hashCode() {
+    return Objects.hash(keyType_, valueType_);
+  }
+
   @Override
   public String toSql(int depth) {
     if (depth >= MAX_NESTING_DEPTH) return "MAP<...>";
diff --git a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java 
b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
index f466e2530..4872caabd 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
@@ -17,6 +17,8 @@
 
 package org.apache.impala.catalog;
 
+import java.util.Objects;
+
 import org.apache.commons.lang3.StringUtils;
 import org.apache.impala.analysis.TypesUtil;
 import org.apache.impala.thrift.TColumnType;
@@ -387,6 +389,20 @@ public class ScalarType extends Type {
     return true;
   }
 
+  @Override
+  public int hashCode() {
+    switch (type_) {
+      case CHAR:
+      case FIXED_UDA_INTERMEDIATE:
+      case VARCHAR:
+        return Objects.hash(type_, len_);
+      case DECIMAL:
+        return Objects.hash(type_, precision_, scale_);
+      default:
+        return Objects.hash(type_);
+    }
+  }
+
   public Type getMaxResolutionType() {
     // Dates got summed as BIGINT for AVG.
     if (isIntegerType() || type_ == PrimitiveType.DATE) {
diff --git a/fe/src/main/java/org/apache/impala/catalog/StructField.java 
b/fe/src/main/java/org/apache/impala/catalog/StructField.java
index fdfbb672f..ad611980e 100644
--- a/fe/src/main/java/org/apache/impala/catalog/StructField.java
+++ b/fe/src/main/java/org/apache/impala/catalog/StructField.java
@@ -95,6 +95,9 @@ public class StructField {
     type_.toThrift(container);
   }
 
+  /**
+   * Implements equals and hashCode so a.equals(b) implies 
a.hashCode()==b.hashCode().
+   */
   @Override
   public boolean equals(Object other) {
     if (!(other instanceof StructField)) return false;
diff --git a/fe/src/main/java/org/apache/impala/catalog/StructType.java 
b/fe/src/main/java/org/apache/impala/catalog/StructType.java
index dc637d040..c6c043f40 100644
--- a/fe/src/main/java/org/apache/impala/catalog/StructType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/StructType.java
@@ -116,6 +116,11 @@ public class StructType extends Type {
     return otherStructType.getFields().equals(fields_);
   }
 
+  @Override
+  public int hashCode() {
+    return fields_.hashCode();
+  }
+
   @Override
   public void toThrift(TColumnType container) {
     TTypeNode node = new TTypeNode();
diff --git a/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java 
b/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java
index a20abd5ae..99006ebd2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java
@@ -18,6 +18,7 @@
 package org.apache.impala.catalog;
 
 import java.util.Map;
+import java.util.Objects;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.slf4j.Logger;
@@ -103,6 +104,15 @@ public class TopicUpdateLog {
           && numSkippedUpdatesLockContention_ == entry
           .getNumSkippedUpdatesLockContention();
     }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(
+          numSkippedUpdates_,
+          lastSentVersion_,
+          lastSentTopicUpdate_,
+          numSkippedUpdatesLockContention_);
+    }
   }
 
   // Entries in the topic update log stored as a map of catalog object keys to
diff --git a/fe/src/main/java/org/apache/impala/catalog/Type.java 
b/fe/src/main/java/org/apache/impala/catalog/Type.java
index 40f4f53f5..640daad15 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Type.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Type.java
@@ -318,6 +318,17 @@ public abstract class Type {
    */
   public abstract void toThrift(TColumnType container);
 
+  /**
+   * Subclasses must provide consistent equals and hashCode implementations 
such that
+   * a.equals(b) implies a.hashCode() == b.hashCode().
+   */
+  public abstract boolean equals(Object other);
+
+  /**
+   * hashCode must be defined such that a.equals(b) implies a.hashCode() == 
b.hashCode().
+   */
+  public abstract int hashCode();
+
   /**
    * Returns true if this type is equal to t, or if t is a wildcard variant of 
this
    * type. Subclasses should override this as appropriate. The default 
implementation
diff --git a/fe/src/main/java/org/apache/impala/planner/DataPartition.java 
b/fe/src/main/java/org/apache/impala/planner/DataPartition.java
index 53a60634e..f266a9338 100644
--- a/fe/src/main/java/org/apache/impala/planner/DataPartition.java
+++ b/fe/src/main/java/org/apache/impala/planner/DataPartition.java
@@ -19,6 +19,7 @@ package org.apache.impala.planner;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.Expr;
@@ -104,6 +105,11 @@ public class DataPartition {
     return Expr.equalLists(partitionExprs_, other.partitionExprs_);
   }
 
+  @Override
+  public int hashCode() {
+    return Objects.hash(type_, partitionExprs_);
+  }
+
   public String debugString() {
     return MoreObjects.toStringHelper(this)
         .add("type_", type_)
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
index fd9417ff2..a8a577202 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
@@ -3302,6 +3302,15 @@ public class AnalyzeExprsTest extends AnalyzerTest {
     RunCastFormatTestOnType("DATE");
   }
 
+  @Test
+  public void TestMatchingCasts() throws AnalysisException {
+    // IMPALA-12800: ensure identical cast-to-decimal expressions match 
hashCodes.
+    String clause =
+        "sum(CASE WHEN id IS NOT NULL THEN cast(0.4 as decimal(20,10)) ELSE 0 
END)";
+    AnalyzesOk("SELECT CASE WHEN (" + clause + ") > 0 " +
+      "THEN (" + clause + ") ELSE null END q FROM functional.alltypes");
+  }
+
   private void RunCastFormatTestOnType(String type) {
     String to_timestamp_cast = "cast('05-01-2017' as " + type + ")";
     AnalysisError(

Reply via email to