- MLHR-1868 improved the hashcode function to produce better hashes.
Project: http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/commit/dbdc9cad Tree: http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/tree/dbdc9cad Diff: http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/diff/dbdc9cad Branch: refs/heads/master Commit: dbdc9cadce46e5a8d5ec420e6c8695349fc0ada0 Parents: e1a4550 Author: Timothy Farkas <[email protected]> Authored: Mon Oct 12 10:06:12 2015 -0700 Committer: Timothy Farkas <[email protected]> Committed: Wed Oct 14 14:58:26 2015 -0700 ---------------------------------------------------------------------- .../datatorrent/lib/appdata/gpo/GPOUtils.java | 42 +++++++++++++++----- 1 file changed, 31 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/dbdc9cad/library/src/main/java/com/datatorrent/lib/appdata/gpo/GPOUtils.java ---------------------------------------------------------------------- diff --git a/library/src/main/java/com/datatorrent/lib/appdata/gpo/GPOUtils.java b/library/src/main/java/com/datatorrent/lib/appdata/gpo/GPOUtils.java index 290d814..239dab7 100644 --- a/library/src/main/java/com/datatorrent/lib/appdata/gpo/GPOUtils.java +++ b/library/src/main/java/com/datatorrent/lib/appdata/gpo/GPOUtils.java @@ -1748,10 +1748,25 @@ public class GPOUtils return hashCode; } + /** + * This function computes the hashcode of a {@link GPOMutable} based on a specified subset of its data. + * <br/> + * <br/> + * <b>Note:</b> In some cases a {@link GPOMutable} object contains a field which is bucketed. In the case of + * bucketed fields, you may want to preserve the original value of the field, but only use the bucketed value + * of the field for computing a hashcode. In order to do this you can store the original value of {@link GPOMutable}'s + * field before calling this function, and replace it with the bucketed value. Then after the hashcode is computed, the + * original value of the field can be restored. + * + * @param gpo The {@link GPOMutable} to compute a hashcode for. + * @param indexSubset The subset of the {@link GPOMutable} used to compute the hashcode. + * @return The hashcode for the given {@link GPOMutable} computed from the specified subset of its data. + */ public static int indirectHashcode(GPOMutable gpo, IndexSubset indexSubset) { - int hashCode = 0; + int hashCode = 7; + final int hashMultiplier = 23; { String[] stringArray = gpo.getFieldsString(); @@ -1763,7 +1778,7 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= stringArray[srcIndex[index]].hashCode(); + hashCode = hashMultiplier * hashCode + stringArray[srcIndex[index]].hashCode(); } } } @@ -1778,7 +1793,7 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= booleanArray[srcIndex[index]] ? 1: 0; + hashCode = hashMultiplier * hashCode + (booleanArray[srcIndex[index]] ? 1: 0); } } } @@ -1793,7 +1808,7 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= Character.getNumericValue(charArray[srcIndex[index]]); + hashCode = hashMultiplier * hashCode + Character.getNumericValue(charArray[srcIndex[index]]); } } } @@ -1808,7 +1823,7 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= byteArray[srcIndex[index]]; + hashCode = hashMultiplier * hashCode + byteArray[srcIndex[index]]; } } } @@ -1823,7 +1838,7 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= shortArray[srcIndex[index]]; + hashCode = hashMultiplier * hashCode + shortArray[srcIndex[index]]; } } } @@ -1838,7 +1853,7 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= integerArray[srcIndex[index]]; + hashCode = hashMultiplier * hashCode + integerArray[srcIndex[index]]; } } } @@ -1853,7 +1868,9 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= longArray[srcIndex[index]]; + long element = longArray[srcIndex[index]]; + int elementHash = (int) (element ^ (element >>> 32)); + hashCode = hashMultiplier * hashCode + elementHash; } } } @@ -1868,7 +1885,7 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= Float.floatToIntBits(floatArray[srcIndex[index]]); + hashCode = hashMultiplier * hashCode + Float.floatToIntBits(floatArray[srcIndex[index]]); } } } @@ -1883,7 +1900,9 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= Double.doubleToLongBits(doubleArray[srcIndex[index]]); + long element = Double.doubleToLongBits(doubleArray[srcIndex[index]]); + int elementHash = (int) (element ^ (element >>> 32)); + hashCode = hashMultiplier * hashCode + elementHash; } } } @@ -1898,7 +1917,8 @@ public class GPOUtils if(srcIndex[index] == -1) { continue; } - hashCode ^= objectArray[srcIndex[index]].hashCode(); + + hashCode = hashMultiplier * hashCode + objectArray[srcIndex[index]].hashCode(); } } }
