Author: rohini
Date: Thu Jan 12 00:09:29 2017
New Revision: 1778364

URL: http://svn.apache.org/viewvc?rev=1778364&view=rev
Log:
PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)

Modified:
    pig/branches/branch-0.16/CHANGES.txt
    
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java

Modified: pig/branches/branch-0.16/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.16/CHANGES.txt?rev=1778364&r1=1778363&r2=1778364&view=diff
==============================================================================
--- pig/branches/branch-0.16/CHANGES.txt (original)
+++ pig/branches/branch-0.16/CHANGES.txt Thu Jan 12 00:09:29 2017
@@ -32,6 +32,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)
+
 PIG-5043: Slowstart not applied in Tez with PARALLEL clause (rohini)
 
 PIG-4930: Skewed Join Breaks On Empty Sampled Input When Key is From Map 
(nkollar via rohini)

Modified: 
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
URL: 
http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java?rev=1778364&r1=1778363&r2=1778364&view=diff
==============================================================================
--- 
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
 (original)
+++ 
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
 Thu Jan 12 00:09:29 2017
@@ -17,8 +17,6 @@
  */
 package org.apache.pig.backend.hadoop.executionengine.tez.runtime;
 
-import java.util.Map;
-
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.pig.data.DataBag;
@@ -44,13 +42,10 @@ public class HashValuePartitioner extend
                 if (o != null) {
                     // Skip computing hashcode for bags.
                     // Order of elements in the map/bag may be different on 
each run
+                    // Can't even include size as some DataBag implementations
+                    // iterate through all elements in the bag to get the size.
                     if (o instanceof DataBag) {
                         hash = 31 * hash;
-                    } else if (o instanceof Map) {
-                        // Including size of map as it is easily available
-                        // Not doing for DataBag as some implementations 
actually
-                        // iterate through all elements in the bag to get the 
size.
-                        hash = 31 * hash + ((Map) o).size();
                     } else {
                         hash = 31 * hash + o.hashCode();
                     }


Reply via email to