[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-16 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r441027147



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-16 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r441022602



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-16 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440999147



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-16 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440997075



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-15 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440573361



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-15 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440573361



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-15 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440571622



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-15 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440571543



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-12 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439694355



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-12 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439693715



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-12 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439693614



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if 

[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

2020-06-12 Thread GitBox


laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439507991



##
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code 
RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code 
Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable {
+
+  //~ Instance fields 
+
+  final int hashCode;
+  final List> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors ---
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so 
on
+   */
+  private Digest(RelNode rel, List> items) {
+this.rel = rel;
+this.items = normalizeContents(items);
+this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+this.rel = rel;
+this.items = Collections.emptyList();
+this.digest = digest;
+this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode 
and equals. */
+  private static int computeIdentity(RelNode rel, List> 
contents) {
+return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * Generally, the items used for hashCode and equals should be the same. 
The exception
+   * is the row type of the relational expression: the row type is needed 
because during
+   * planning, new equivalent rels may be produced with changed fields 
nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field 
names).
+   *
+   * @param rel  The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+  RelNode rel,
+  List> contents,
+  boolean withType) {
+List hashCodeItems = new ArrayList<>();
+// The type name.
+hashCodeItems.add(rel.getRelTypeName());
+// The traits.
+hashCodeItems.addAll(rel.getTraitSet());
+// The hints.
+if (rel instanceof Hintable) {
+  hashCodeItems.addAll(((Hintable) rel).getHints());
+}
+if