Author: rohini
Date: Thu Jan 12 00:09:29 2017
New Revision: 1778364
URL: http://svn.apache.org/viewvc?rev=1778364&view=rev
Log:
PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)
Modified:
pig/branches/branch-0.16/CHANGES.txt
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
Modified: pig/branches/branch-0.16/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.16/CHANGES.txt?rev=1778364&r1=1778363&r2=1778364&view=diff
==============================================================================
--- pig/branches/branch-0.16/CHANGES.txt (original)
+++ pig/branches/branch-0.16/CHANGES.txt Thu Jan 12 00:09:29 2017
@@ -32,6 +32,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)
+
PIG-5043: Slowstart not applied in Tez with PARALLEL clause (rohini)
PIG-4930: Skewed Join Breaks On Empty Sampled Input When Key is From Map
(nkollar via rohini)
Modified:
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java?rev=1778364&r1=1778363&r2=1778364&view=diff
==============================================================================
---
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
(original)
+++
pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
Thu Jan 12 00:09:29 2017
@@ -17,8 +17,6 @@
*/
package org.apache.pig.backend.hadoop.executionengine.tez.runtime;
-import java.util.Map;
-
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.pig.data.DataBag;
@@ -44,13 +42,10 @@ public class HashValuePartitioner extend
if (o != null) {
// Skip computing hashcode for bags.
// Order of elements in the map/bag may be different on
each run
+ // Can't even include size as some DataBag implementations
+ // iterate through all elements in the bag to get the size.
if (o instanceof DataBag) {
hash = 31 * hash;
- } else if (o instanceof Map) {
- // Including size of map as it is easily available
- // Not doing for DataBag as some implementations
actually
- // iterate through all elements in the bag to get the
size.
- hash = 31 * hash + ((Map) o).size();
} else {
hash = 31 * hash + o.hashCode();
}