On Sun, Dec 29, 2019 at 5:46 PM Alex Herbert <alex.d.herb...@gmail.com>
wrote:

> OK.
>
> If you are going to make the behaviour change for the string methods then
> the javadoc should be updated. It currently states:
>
> "The string is converted to bytes using the default encoding.”
>
> So perhaps switch this to:
>
> “Before 1.14 the string was converted using default encoding. Since 1.14
> the string is converted to bytes using UTF-8 encoding.”
>
> The MurmurHash3 methods should still be deprecated as they call the
> hash32/hash128 functions that have sign bugs. I deliberately did not add a
> new method accepting a String for the fixed murmur3 algorithm. IMO it is
> unwarranted API bloat.
>
> MurmurHash2 is not deprecated. In that case the String methods are just
> API bloat but the javadoc should be updated.
>
> Alex
>

Thank you Alex, please review Javadoc changes in git master.

Also, if there is anything else that can be improved in code, Javadoc, or
test coverage, please let me know so I can delay creating the release
candidate, which will likely happen tonight (US EST) time permitting.

Gary


>
> > On 29 Dec 2019, at 21:33, ggreg...@apache.org wrote:
> >
> > This is an automated email from the ASF dual-hosted git repository.
> >
> > ggregory pushed a commit to branch master
> > in repository https://gitbox.apache.org/repos/asf/commons-codec.git
> >
> >
> > The following commit(s) were added to refs/heads/master by this push:
> >     new f40005a  [CODEC-276] Reliance on default encoding in MurmurHash2
> and MurmurHash3.
> > f40005a is described below
> >
> > commit f40005ad5a9c892fa8d216b12029a49062224351
> > Author: Gary Gregory <garydgreg...@gmail.com>
> > AuthorDate: Sun Dec 29 16:33:14 2019 -0500
> >
> >    [CODEC-276] Reliance on default encoding in MurmurHash2 and
> MurmurHash3.
> > ---
> > src/changes/changes.xml                            |  1 +
> > .../apache/commons/codec/digest/MurmurHash2.java   | 22
> ++++++++++++++++------
> > .../apache/commons/codec/digest/MurmurHash3.java   | 18
> ++++++++++++++----
> > .../commons/codec/digest/MurmurHash3Test.java      |  8 ++++----
> > 4 files changed, 35 insertions(+), 14 deletions(-)
> >
> > diff --git a/src/changes/changes.xml b/src/changes/changes.xml
> > index 55ba445..9e1c3c4 100644
> > --- a/src/changes/changes.xml
> > +++ b/src/changes/changes.xml
> > @@ -55,6 +55,7 @@ The <action> type attribute can be
> add,update,fix,remove.
> >       <action issue="CODEC-273" dev="ggregory" type="add" due-to="Gary
> Gregory">Add Path APIs to org.apache.commons.codec.digest.DigestUtils
> similar to File APIs.</action>
> >       <action issue="CODEC-274" dev="ggregory" type="add" due-to="Gary
> Gregory">Add SHA-512/224 and SHA-512/256 to DigestUtils for Java 9 and
> up.</action>
> >       <action issue="CODEC-275" dev="ggregory" type="add" due-to="Claude
> Warren">Add missing note in javadoc when sign extension error is present
> #34.</action>
> > +      <action issue="CODEC-276" dev="ggregory" type="add" due-to="Gary
> Gregory">Reliance on default encoding in MurmurHash2 and
> MurmurHash3.</action>
> >     </release>
> >
> >     <release version="1.13" date="2019-07-20" description="Feature and
> fix release.">
> > diff --git
> a/src/main/java/org/apache/commons/codec/digest/MurmurHash2.java
> b/src/main/java/org/apache/commons/codec/digest/MurmurHash2.java
> > index 5b0a712..4dfeca9 100644
> > --- a/src/main/java/org/apache/commons/codec/digest/MurmurHash2.java
> > +++ b/src/main/java/org/apache/commons/codec/digest/MurmurHash2.java
> > @@ -17,6 +17,9 @@
> >
> > package org.apache.commons.codec.digest;
> >
> > +import java.nio.charset.Charset;
> > +import java.nio.charset.StandardCharsets;
> > +
> > /**
> >  * Implementation of the MurmurHash2 32-bit and 64-bit hash functions.
> >  *
> > @@ -48,6 +51,13 @@ package org.apache.commons.codec.digest;
> >  */
> > public final class MurmurHash2 {
> >
> > +    /**
> > +     * Default Charset used to convert strings into bytes.
> > +     *
> > +     * Consider private; package private for tests only.
> > +     */
> > +    static final Charset GET_BYTES_CHARSET = StandardCharsets.UTF_8;
> > +
> >     // Constants for 32-bit variant
> >     private static final int M32 = 0x5bd1e995;
> >     private static final int R32 = 24;
> > @@ -132,7 +142,7 @@ public final class MurmurHash2 {
> >      *
> >      * <pre>
> >      * int seed = 0x9747b28c;
> > -     * byte[] bytes = data.getBytes();
> > +     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
> >      * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
> >      * </pre>
> >      *
> > @@ -141,7 +151,7 @@ public final class MurmurHash2 {
> >      * @see #hash32(byte[], int, int)
> >      */
> >     public static int hash32(final String text) {
> > -        final byte[] bytes = text.getBytes();
> > +        final byte[] bytes = text.getBytes(GET_BYTES_CHARSET);
> >         return hash32(bytes, bytes.length);
> >     }
> >
> > @@ -152,7 +162,7 @@ public final class MurmurHash2 {
> >      *
> >      * <pre>
> >      * int seed = 0x9747b28c;
> > -     * byte[] bytes = text.substring(from, from + length).getBytes();
> > +     * byte[] bytes = text.substring(from, from +
> length).getBytes(StandardCharsets.UTF_8);
> >      * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
> >      * </pre>
> >      *
> > @@ -243,7 +253,7 @@ public final class MurmurHash2 {
> >      *
> >      * <pre>
> >      * int seed = 0xe17a1465;
> > -     * byte[] bytes = data.getBytes();
> > +     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
> >      * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
> >      * </pre>
> >      *
> > @@ -252,7 +262,7 @@ public final class MurmurHash2 {
> >      * @see #hash64(byte[], int, int)
> >      */
> >     public static long hash64(final String text) {
> > -        final byte[] bytes = text.getBytes();
> > +        final byte[] bytes = text.getBytes(GET_BYTES_CHARSET);
> >         return hash64(bytes, bytes.length);
> >     }
> >
> > @@ -263,7 +273,7 @@ public final class MurmurHash2 {
> >      *
> >      * <pre>
> >      * int seed = 0xe17a1465;
> > -     * byte[] bytes = text.substring(from, from + length).getBytes();
> > +     * byte[] bytes = text.substring(from, from +
> length).getBytes(StandardCharsets.UTF_8);
> >      * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
> >      * </pre>
> >      *
> > diff --git
> a/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> b/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> > index cfeb3d1..4509a8f 100644
> > --- a/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> > +++ b/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
> > @@ -17,6 +17,9 @@
> >
> > package org.apache.commons.codec.digest;
> >
> > +import java.nio.charset.Charset;
> > +import java.nio.charset.StandardCharsets;
> > +
> > /**
> >  * Implementation of the MurmurHash3 32-bit and 128-bit hash functions.
> >  *
> > @@ -56,6 +59,13 @@ package org.apache.commons.codec.digest;
> > public final class MurmurHash3 {
> >
> >     /**
> > +     * Default Charset used to convert strings into bytes.
> > +     *
> > +     * Consider private; package private for tests only.
> > +     */
> > +    static final Charset GET_BYTES_CHARSET = StandardCharsets.UTF_8;
> > +
> > +    /**
> >      * A random number to use for a hash code.
> >      *
> >      * @deprecated This is not used internally and will be removed in a
> future release.
> > @@ -233,7 +243,7 @@ public final class MurmurHash3 {
> >      * <pre>
> >      * int offset = 0;
> >      * int seed = 104729;
> > -     * byte[] bytes = data.getBytes();
> > +     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
> >      * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
> >      * </pre>
> >      *
> > @@ -249,7 +259,7 @@ public final class MurmurHash3 {
> >      */
> >     @Deprecated
> >     public static int hash32(final String data) {
> > -        final byte[] bytes = data.getBytes();
> > +        final byte[] bytes = data.getBytes(GET_BYTES_CHARSET);
> >         return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
> >     }
> >
> > @@ -751,7 +761,7 @@ public final class MurmurHash3 {
> >      * <pre>
> >      * int offset = 0;
> >      * int seed = 104729;
> > -     * byte[] bytes = data.getBytes();
> > +     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
> >      * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
> >      * </pre>
> >      *
> > @@ -766,7 +776,7 @@ public final class MurmurHash3 {
> >      */
> >     @Deprecated
> >     public static long[] hash128(final String data) {
> > -        final byte[] bytes = data.getBytes();
> > +        final byte[] bytes = data.getBytes(GET_BYTES_CHARSET);
> >         return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
> >     }
> >
> > diff --git
> a/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> b/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> > index 4703f9f..61df7f2 100644
> > --- a/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> > +++ b/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java
> > @@ -355,7 +355,7 @@ public class MurmurHash3Test {
> >                 pos += Character.toChars(codePoint, chars, pos);
> >             }
> >             final String text = String.copyValueOf(chars, 0, pos);
> > -            final byte[] bytes = text.getBytes();
> > +            final byte[] bytes =
> text.getBytes(MurmurHash3.GET_BYTES_CHARSET);
> >             final int h1 = MurmurHash3.hash32(bytes, 0, bytes.length,
> seed);
> >             final int h2 = MurmurHash3.hash32(text);
> >             Assert.assertEquals(h1, h2);
> > @@ -455,7 +455,7 @@ public class MurmurHash3Test {
> >      */
> >     @Test
> >     public void testHash64() {
> > -        final byte[] origin = TEST_HASH64.getBytes();
> > +        final byte[] origin =
> TEST_HASH64.getBytes(MurmurHash3.GET_BYTES_CHARSET);
> >         final long hash = MurmurHash3.hash64(origin);
> >         Assert.assertEquals(5785358552565094607L, hash);
> >     }
> > @@ -466,7 +466,7 @@ public class MurmurHash3Test {
> >      */
> >     @Test
> >     public void testHash64WithOffsetAndLength() {
> > -        final byte[] origin = TEST_HASH64.getBytes();
> > +        final byte[] origin =
> TEST_HASH64.getBytes(MurmurHash3.GET_BYTES_CHARSET);
> >         final byte[] originOffset = new byte[origin.length + 150];
> >         Arrays.fill(originOffset, (byte) 123);
> >         System.arraycopy(origin, 0, originOffset, 150, origin.length);
> > @@ -627,7 +627,7 @@ public class MurmurHash3Test {
> >                 pos += Character.toChars(codePoint, chars, pos);
> >             }
> >             final String text = String.copyValueOf(chars, 0, pos);
> > -            final byte[] bytes = text.getBytes();
> > +            final byte[] bytes =
> text.getBytes(MurmurHash3.GET_BYTES_CHARSET);
> >             final long[] h1 = MurmurHash3.hash128(bytes, 0,
> bytes.length, seed);
> >             final long[] h2 = MurmurHash3.hash128(text);
> >             Assert.assertArrayEquals(h1, h2);
> >
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org
> For additional commands, e-mail: dev-h...@commons.apache.org
>
>

Reply via email to