Author: jbellis
Date: Fri Apr 17 20:17:05 2009
New Revision: 766138
URL: http://svn.apache.org/viewvc?rev=766138&view=rev
Log:
replace JenkinsHash w/ MurmurHash. its hash distribution is just as good, and
it's faster
Added:
incubator/cassandra/trunk/src/org/apache/cassandra/utils/MurmurHash.java
Removed:
incubator/cassandra/trunk/src/org/apache/cassandra/utils/JenkinsHash.java
Added: incubator/cassandra/trunk/src/org/apache/cassandra/utils/MurmurHash.java
URL:
http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/utils/MurmurHash.java?rev=766138&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/utils/MurmurHash.java
(added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/utils/MurmurHash.java
Fri Apr 17 20:17:05 2009
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.utils;
+
+/**
+ * This is a very fast, non-cryptographic hash suitable for general hash-based
+ * lookup. See http://murmurhash.googlepages.com/ for more details.
+ *
+ * <p>The C version of MurmurHash 2.0 found at that site was ported
+ * to Java by Andrzej Bialecki (ab at getopt org).</p>
+ */
+public class MurmurHash {
+ public int hash(byte[] data, int length, int seed) {
+ int m = 0x5bd1e995;
+ int r = 24;
+
+ int h = seed ^ length;
+
+ int len_4 = length >> 2;
+
+ for (int i = 0; i < len_4; i++) {
+ int i_4 = i << 2;
+ int k = data[i_4 + 3];
+ k = k << 8;
+ k = k | (data[i_4 + 2] & 0xff);
+ k = k << 8;
+ k = k | (data[i_4 + 1] & 0xff);
+ k = k << 8;
+ k = k | (data[i_4 + 0] & 0xff);
+ k *= m;
+ k ^= k >>> r;
+ k *= m;
+ h *= m;
+ h ^= k;
+ }
+
+ // avoid calculating modulo
+ int len_m = len_4 << 2;
+ int left = length - len_m;
+
+ if (left != 0) {
+ if (left >= 3) {
+ h ^= (int) data[length - 3] << 16;
+ }
+ if (left >= 2) {
+ h ^= (int) data[length - 2] << 8;
+ }
+ if (left >= 1) {
+ h ^= (int) data[length - 1];
+ }
+
+ h *= m;
+ }
+
+ h ^= h >>> 13;
+ h *= m;
+ h ^= h >>> 15;
+
+ return h;
+ }
+}