[ 
https://issues.apache.org/jira/browse/BEAM-5092?focusedWorklogId=133943&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-133943
 ]

ASF GitHub Bot logged work on BEAM-5092:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 11/Aug/18 12:15
            Start Date: 11/Aug/18 12:15
    Worklog Time Spent: 10m 
      Work Description: reuvenlax closed pull request #6176: [[BEAM-5092] Row 
comparison should be faster when both are POJOs.
URL: https://github.com/apache/beam/pull/6176
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaBeanSchema.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaBeanSchema.java
index 0ea71a4695f..c9b29a406c6 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaBeanSchema.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaBeanSchema.java
@@ -31,8 +31,12 @@
  * <p>This provider finds (recursively) all public getters and setters in a 
Java object, and creates
  * schemas and rows that bind to those fields. The field order in the schema 
is not guaranteed to
  * match the method order in the class. The Java object is expected to have 
implemented a correct
- * .equals() method. TODO: Validate equals() method is provided, and if not 
generate a "slow" equals
- * method based on the schema.
+ * .equals() and .hashCode methods The equals method must be completely 
determined by the schema
+ * fields. i.e. if the object has hidden fields that are not reflected in the 
schema but are
+ * compared in equals, then results will be incorrect.
+ *
+ * <p>TODO: Validate equals() method is provided, and if not generate a "slow" 
equals method based
+ * on the schema.
  */
 @Experimental(Kind.SCHEMAS)
 public class JavaBeanSchema extends GetterBasedSchemaProvider {
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaFieldSchema.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaFieldSchema.java
index 98bf86daecb..40016ede599 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaFieldSchema.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/JavaFieldSchema.java
@@ -30,9 +30,13 @@
  *
  * <p>This provider finds all public fields (recursively) in a Java object, 
and creates schemas and
  * rows that bind to those fields. The field order in the schema is not 
guaranteed to match the
- * field order in the class. The Java object is expected to have implemented a 
correct .equals()
- * method. TODO: Validate equals() method is provided, and if not generate a 
"slow" equals method
- * based on the schema.
+ * field order in the class. The Java object is expected to have implemented a 
correct .equals() and
+ * .hashCode() methods. The equals method must be completely determined by the 
schema fields. i.e.
+ * if the object has hidden fields that are not reflected in the schema but 
are compared in equals,
+ * then results will be incorrect.
+ *
+ * <p>TODO: Validate equals() method is provided, and if not generate a "slow" 
equals method based
+ * on the schema.
  */
 @Experimental(Kind.SCHEMAS)
 public class JavaFieldSchema extends GetterBasedSchemaProvider {
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaRegistry.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaRegistry.java
index ec26c421937..c074d936c60 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaRegistry.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaRegistry.java
@@ -163,7 +163,10 @@ public void registerSchemaProvider(SchemaProvider 
schemaProvider) {
    * Register a POJO type for automatic schema inference.
    *
    * <p>Currently schema field names will match field names in the POJO, and 
all fields must be
-   * mutable (i.e. no final fields).
+   * mutable (i.e. no final fields). The Java object is expected to have 
implemented a correct
+   * .equals() and .hashCode methods The equals method must be completely 
determined by the schema
+   * fields. i.e. if the object has hidden fields that are not reflected in 
the schema but are
+   * compared in equals, then results will be incorrect.
    */
   public <T> void registerPOJO(TypeDescriptor<T> typeDescriptor) {
     registerSchemaProvider(typeDescriptor, new JavaFieldSchema());
@@ -173,7 +176,10 @@ public void registerSchemaProvider(SchemaProvider 
schemaProvider) {
    * Register a JavaBean type for automatic schema inference.
    *
    * <p>Currently schema field names will match getter names in the bean, and 
all getters must have
-   * matching setters.
+   * matching setters. The Java object is expected to have implemented a 
correct .equals() and
+   * .hashCode methods The equals method must be completely determined by the 
schema fields. i.e. if
+   * the object has hidden fields that are not reflected in the schema but are 
compared in equals,
+   * then results will be incorrect.
    */
   public <T> void registerJavaBean(Class<T> clazz) {
     registerJavaBean(TypeDescriptor.of(clazz));
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java
index 171f665a9ac..d745df74f8d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/Row.java
@@ -313,7 +313,7 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+    if (!(o instanceof Row)) {
       return false;
     }
     Row other = (Row) o;
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java
index 9990348ec06..faff7c72536 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/RowWithGetters.java
@@ -22,6 +22,7 @@
 import com.google.common.collect.Maps;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.schemas.FieldValueGetter;
@@ -123,4 +124,27 @@ public int getFieldCount() {
   public Object getGetterTarget() {
     return getterTarget;
   }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null) {
+      return false;
+    }
+    if (o instanceof RowWithGetters) {
+      RowWithGetters other = (RowWithGetters) o;
+      return Objects.equals(getSchema(), other.getSchema())
+          && Objects.equals(getterTarget, other.getterTarget);
+    } else if ((o instanceof Row)) {
+      return super.equals(o);
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(getSchema(), getterTarget);
+  }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 133943)
    Time Spent: 7h 40m  (was: 7.5h)

> Nexmark 10x performance regression
> ----------------------------------
>
>                 Key: BEAM-5092
>                 URL: https://issues.apache.org/jira/browse/BEAM-5092
>             Project: Beam
>          Issue Type: New Feature
>          Components: sdk-java-core
>            Reporter: Andrew Pilloud
>            Assignee: Reuven Lax
>            Priority: Critical
>          Time Spent: 7h 40m
>  Remaining Estimate: 0h
>
> There looks to be a 10x performance hit on the DirectRunner and Flink nexmark 
> jobs. It first showed up in this build:
> [https://builds.apache.org/view/A-D/view/Beam/job/beam_PostCommit_Java_Nexmark_Direct/151/changes]
> [https://apache-beam-testing.appspot.com/explore?dashboard=5084698770407424]
> [https://apache-beam-testing.appspot.com/explore?dashboard=5699257587728384]



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to