Any thoughts of the effect of this on the Commons Collections Bloom filter
proposal?

Gary

On Fri, Jan 17, 2020 at 9:00 PM <aherb...@apache.org> wrote:

> This is an automated email from the ASF dual-hosted git repository.
>
> aherbert pushed a commit to branch master
> in repository https://gitbox.apache.org/repos/asf/commons-codec.git
>
>
> The following commit(s) were added to refs/heads/master by this push:
>      new f5a61f0  [CODEC-264]: Ensure hash128 maintains the sign extension
> bug.
> f5a61f0 is described below
>
> commit f5a61f0cd029f18666163f414f848ba0e1b39976
> Author: Alex Herbert <aherb...@apache.org>
> AuthorDate: Sat Jan 18 01:59:55 2020 +0000
>
>     [CODEC-264]: Ensure hash128 maintains the sign extension bug.
>
>     The hash128(...) method was calling the public hash128x86(...) method
>     with an int seed when it should have called the private hash128x86(...)
>     with a long seed. Thus it did not have the sign extension error. The
>     internal method has been renamed to avoid a name clash with the public
>     API. The old hash128() behaviour is now restored as the seed is passed
>     to the hash method with implicit long conversion.
> ---
>  src/changes/changes.xml                            |  4 +++
>  .../apache/commons/codec/digest/MurmurHash3.java   | 11 +++---
>  .../commons/codec/digest/MurmurHash3Test.java      | 39
> +++++++++++++++++++++-
>  3 files changed, 49 insertions(+), 5 deletions(-)
>
> diff --git a/src/changes/changes.xml b/src/changes/changes.xml
> index 7c95625..c907953 100644
> --- a/src/changes/changes.xml
> +++ b/src/changes/changes.xml
> @@ -42,6 +42,10 @@ The <action> type attribute can be
> add,update,fix,remove.
>    </properties>
>    <body>
>
> +    <release version="1.15" date="YYYY-MM-DD" description="Feature and
> fix release.">
> +      <action issue="CODEC-264" dev="aherbert" due-to="Andy Seaborne"
> type="fix">MurmurHash3: Ensure hash128 maintains the sign extension
> bug.</action>
> +    </release>
> +
>      <release version="1.14" date="2019-12-30" description="Feature and
> fix release.">
>        <action issue="CODEC-261" dev="aherbert" type="fix">Hex: Allow
> encoding read-only ByteBuffer.</action>
>        <action issue="CODEC-259" dev="aherbert" type="fix">Hex: Only use
> an available ByteBuffer backing array if the length equals the remaining
> byte count.</action>
> diff --git
> a/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> b/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> index d6f45c1..ba73ef6 100644
> --- a/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> +++ b/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> @@ -800,9 +800,12 @@ public final class MurmurHash3 {
>      @Deprecated
>      public static long[] hash128(final byte[] data, final int offset,
> final int length, final int seed) {
>          // ************
> -        // Note: This fails to apply masking using 0xffffffffL to the
> seed.
> +        // Note: This deliberately fails to apply masking using
> 0xffffffffL to the seed
> +        // to maintain behavioural compatibility with the original
> version.
> +        // The implicit conversion to a long will extend a negative sign
> +        // bit through the upper 32-bits of the long seed. These should
> be zero.
>          // ************
> -        return hash128x64(data, offset, length, seed);
> +        return hash128x64Internal(data, offset, length, seed);
>      }
>
>      /**
> @@ -820,7 +823,7 @@ public final class MurmurHash3 {
>       */
>      public static long[] hash128x64(final byte[] data, final int offset,
> final int length, final int seed) {
>          // Use an unsigned 32-bit integer as the seed
> -        return hash128x64(data, offset, length, seed & 0xffffffffL);
> +        return hash128x64Internal(data, offset, length, seed &
> 0xffffffffL);
>      }
>
>      /**
> @@ -835,7 +838,7 @@ public final class MurmurHash3 {
>       * @param seed The initial seed value
>       * @return The 128-bit hash (2 longs)
>       */
> -    private static long[] hash128x64(final byte[] data, final int offset,
> final int length, final long seed) {
> +    private static long[] hash128x64Internal(final byte[] data, final int
> offset, final int length, final long seed) {
>          long h1 = seed;
>          long h2 = seed;
>          final int nblocks = length >> 4;
> diff --git
> a/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> b/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> index 05f78f7..6f31cf7 100644
> --- a/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> +++ b/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> @@ -559,7 +559,7 @@ public class MurmurHash3Test {
>              {1237530251176898868L, 6144786892208594932L},
> {2347717913548230384L, -7461066668225718223L},
>              {-7963311463560798404L, 8435801462986138227L},
> {-7493166089060196513L, 8163503673197886404L},
>              {6807249306539951962L, -1438886581269648819L},
> {6752656991043418179L, 6334147827922066123L},
> -            {-4534351735605790331L, -4530801663887858236L},
> {-7886946241830957955L, -6261339648449285315L},        };
> +            {-4534351735605790331L, -4530801663887858236L},
> {-7886946241830957955L, -6261339648449285315L},};
>          for (int i = 0; i < answers.length; i++) {
>              final byte[] bytes = Arrays.copyOf(RANDOM_BYTES, i);
>              Assert.assertArrayEquals(answers[i],
> MurmurHash3.hash128(bytes));
> @@ -606,6 +606,43 @@ public class MurmurHash3Test {
>      }
>
>      /**
> +     * Test the {@link MurmurHash3#hash128(byte[], int, int, int)}
> algorithm.
> +     *
> +     * <p>Explicit test for a negative seed. The original implementation
> has a sign extension error
> +     * for negative seeds. This test is here to maintain behavioural
> compatibility of the
> +     * broken deprecated method.
> +     */
> +    @Test
> +    public void testHash128WithOffsetLengthAndNegativeSeed() {
> +        // Seed can be negative
> +        final int seed = -42;
> +        final int offset = 13;
> +
> +        // Test with all sizes up to 31 bytes. This ensures a full round
> of 16-bytes plus up to
> +        // 15 bytes remaining.
> +        final long[][] answers = {{5954234972212089025L,
> 3342108296337967352L},
> +                {8501094764898402923L, 7873951092908129427L},
> {-3334322685492296196L, -2842715922956549478L},
> +                {-2177918982459519644L, -1612349961980368636L},
> {4172870320608886992L, -4177375712254136503L},
> +                {7546965006884307324L, -5222114032564054641L},
> {-2885083166621537267L, -2069868899915344482L},
> +                {-2397098497873118851L, 4990578036999888806L},
> {-706479374719025018L, 7531201699171849870L},
> +                {6487943141157228609L, 3576221902299447884L},
> {6671331646806999453L, -3428049860825046360L},
> +                {-8700221138601237020L, -2748450904559980545L},
> {-9028762509863648063L, 6130259301750313402L},
> +                {729958512305702590L, -36367317333638988L},
> {-3803232241584178983L, -4257744207892929651L},
> +                {5734013720237474696L, -760784490666078990L},
> {-6097477411153026656L, 625288777006549065L},
> +                {1320365359996757504L, -2251971390373072541L},
> {5551441703887653022L, -3516892619809375941L},
> +                {698875391638415033L, 3456972931370496131L},
> {5874956830271303805L, -6074126509360777023L},
> +                {-7030758398537734781L, -3174643657101295554L},
> {6835789852786226556L, 7245353136839389595L},
> +                {-7755767580598793204L, -6680491060945077989L},
> {-3099789923710523185L, -2751080516924952518L},
> +                {-7772046549951435453L, 5263206145535830491L},
> {7458715941971015543L, 5470582752171544854L},
> +                {-7753394773760064468L, -2330157750295630617L},
> {-5899278942232791979L, 6235686401271389982L},
> +                {4881732293467626532L, 2617335658565007304L},
> {-5722863941703478257L, -5424475653939430258L},
> +                {-3703319768293496315L, -2124426428486426443L},};
> +        for (int i = 0; i < answers.length; i++) {
> +            Assert.assertArrayEquals("Length: " + i, answers[i],
> MurmurHash3.hash128(RANDOM_BYTES, offset, i, seed));
> +        }
> +    }
> +
> +    /**
>       * Test the {@link MurmurHash3#hash128(String)} algorithm. This only
> tests it can return
>       * the same value as {@link MurmurHash3#hash128(byte[], int, int,
> int)} if the string
>       * is converted to bytes using the method {@link String#getBytes()}.
>
>

Reply via email to