Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Prim.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Prim.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Prim.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_BOOLEAN_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_BOOLEAN_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_BYTE_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_BYTE_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_CHAR_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_CHAR_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_DOUBLE_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_DOUBLE_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_FLOAT_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_FLOAT_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_INT_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_INT_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_LONG_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_LONG_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_OBJECT_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_OBJECT_INDEX_SCALE;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_SHORT_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_SHORT_INDEX_SCALE;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.BOOLEAN_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.BYTE_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.CHAR_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.DOUBLE_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.FLOAT_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.INT_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.LONG_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.OBJECT_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.SHORT_SHIFT;
+
+/**
+ * Creates easy to access association between the major Unsafe constants.
+ *
+ * @author Lee Rhodes
+ */
+public enum Prim {
+ BOOLEAN(ARRAY_BOOLEAN_BASE_OFFSET, ARRAY_BOOLEAN_INDEX_SCALE, BOOLEAN_SHIFT),
+ BYTE(ARRAY_BYTE_BASE_OFFSET, ARRAY_BYTE_INDEX_SCALE, BYTE_SHIFT),
+ CHAR(ARRAY_CHAR_BASE_OFFSET, ARRAY_CHAR_INDEX_SCALE, CHAR_SHIFT),
+ SHORT(ARRAY_SHORT_BASE_OFFSET, ARRAY_SHORT_INDEX_SCALE, SHORT_SHIFT),
+ INT(ARRAY_INT_BASE_OFFSET, ARRAY_INT_INDEX_SCALE, INT_SHIFT),
+ LONG(ARRAY_LONG_BASE_OFFSET, ARRAY_LONG_INDEX_SCALE, LONG_SHIFT),
+ FLOAT(ARRAY_FLOAT_BASE_OFFSET, ARRAY_FLOAT_INDEX_SCALE, FLOAT_SHIFT),
+ DOUBLE(ARRAY_DOUBLE_BASE_OFFSET, ARRAY_DOUBLE_INDEX_SCALE, DOUBLE_SHIFT),
+ OBJECT(ARRAY_OBJECT_BASE_OFFSET, ARRAY_OBJECT_INDEX_SCALE, OBJECT_SHIFT);
+
+ private final long arrBaseOff_;
+ private final long arrIdxScale_;
+ private final long sizeShift_;
+
+ private Prim(final long arrBaseOff, final long arrIdxScale, final long
sizeShift) {
+ this.arrBaseOff_ = arrBaseOff;
+ this.arrIdxScale_ = arrIdxScale;
+ this.sizeShift_ = sizeShift;
+ }
+
+ public long off() {
+ return arrBaseOff_;
+ }
+
+ public long scale() {
+ return arrIdxScale_;
+ }
+
+ public long shift() {
+ return sizeShift_;
+ }
+
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Prim.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/StepBoolean.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/StepBoolean.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/StepBoolean.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
+
+/**
+ * This is a step boolean function that can change its state only once.
+ *
+ * @author Lee Rhodes
+ */
+public final class StepBoolean {
+ private static final int FALSE = 0;
+ private static final int TRUE = 1;
+ private static final AtomicIntegerFieldUpdater<StepBoolean>
STATE_FIELD_UPDATER =
+ AtomicIntegerFieldUpdater.newUpdater(StepBoolean.class, "state");
+
+ private final int initialState;
+ private volatile int state;
+
+ /**
+ * Defines the initial state
+ * @param initialState the given initial state
+ */
+ public StepBoolean(final boolean initialState) {
+ this.initialState = initialState ? TRUE : FALSE;
+ state = this.initialState;
+ }
+
+ /**
+ * Gets the current state.
+ * @return the current state.
+ */
+ public boolean get() {
+ return state == TRUE;
+ }
+
+ /**
+ * This changes the state of this step boolean function if it has not yet
changed.
+ * @return true if this call led to the change of the state; false if the
state has already been
+ * changed
+ */
+ public boolean change() {
+ final int notInitialState = initialState == TRUE ? FALSE : TRUE;
+ return STATE_FIELD_UPDATER.compareAndSet(this, initialState,
notInitialState);
+ }
+
+ /**
+ * Return true if the state has changed from the initial state
+ * @return true if the state has changed from the initial state
+ */
+ public boolean hasChanged() {
+ return state != initialState;
+ }
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/StepBoolean.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/UnsafeUtil.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/UnsafeUtil.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/UnsafeUtil.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+
+import sun.misc.Unsafe;
+
+/**
+ * Provides access to the sun.misc.Unsafe class and its key static fields.
+ *
+ * @author Lee Rhodes
+ */
+@SuppressWarnings({"restriction"})
+public final class UnsafeUtil {
+ public static final Unsafe unsafe;
+ public static final String JDK; //must be at least "1.8"
+ public static final int JDK_MAJOR; //8, 9, 10, 11, 12, etc
+
+ //not an indicator of whether compressed references are used.
+ public static final int ADDRESS_SIZE;
+
+ //For 64-bit JVMs: these offsets vary depending on coop: 16 for JVM <= 32GB;
24 for JVM > 32GB.
+ // Making this constant long-typed, rather than int, to exclude possibility
of accidental overflow
+ // in expressions like arrayLength * ARRAY_BYTE_BASE_OFFSET, where
arrayLength is int-typed.
+ // The same consideration for constants below: ARRAY_*_INDEX_SCALE,
ARRAY_*_INDEX_SHIFT.
+ public static final long ARRAY_BOOLEAN_BASE_OFFSET;
+ public static final long ARRAY_BYTE_BASE_OFFSET;
+ public static final long ARRAY_SHORT_BASE_OFFSET;
+ public static final long ARRAY_CHAR_BASE_OFFSET;
+ public static final long ARRAY_INT_BASE_OFFSET;
+ public static final long ARRAY_LONG_BASE_OFFSET;
+ public static final long ARRAY_FLOAT_BASE_OFFSET;
+ public static final long ARRAY_DOUBLE_BASE_OFFSET;
+ public static final long ARRAY_OBJECT_BASE_OFFSET;
+
+ //@formatter:off
+
+ // Setting those values directly instead of using unsafe.arrayIndexScale(),
because it may be
+ // beneficial for runtime execution, those values are backed into generated
machine code as
+ // constants. E. g. see
https://shipilev.net/jvm-anatomy-park/14-constant-variables/
+ public static final int ARRAY_BOOLEAN_INDEX_SCALE = 1;
+ public static final int ARRAY_BYTE_INDEX_SCALE = 1;
+ public static final long ARRAY_SHORT_INDEX_SCALE = 2;
+ public static final long ARRAY_CHAR_INDEX_SCALE = 2;
+ public static final long ARRAY_INT_INDEX_SCALE = 4;
+ public static final long ARRAY_LONG_INDEX_SCALE = 8;
+ public static final long ARRAY_FLOAT_INDEX_SCALE = 4;
+ public static final long ARRAY_DOUBLE_INDEX_SCALE = 8;
+ public static final long ARRAY_OBJECT_INDEX_SCALE; // varies, 4 or 8
depending on coop
+
+ //Used to convert "type" to bytes: bytes = longs << LONG_SHIFT
+ public static final int BOOLEAN_SHIFT = 0;
+ public static final int BYTE_SHIFT = 0;
+ public static final long SHORT_SHIFT = 1;
+ public static final long CHAR_SHIFT = 1;
+ public static final long INT_SHIFT = 2;
+ public static final long LONG_SHIFT = 3;
+ public static final long FLOAT_SHIFT = 2;
+ public static final long DOUBLE_SHIFT = 3;
+ public static final long OBJECT_SHIFT; // varies, 2 or 3 depending on
coop
+
+ public static final String LS = System.getProperty("line.separator");
+
+ //@formatter:on
+
+ static {
+ try {
+ final Constructor<Unsafe> unsafeConstructor =
Unsafe.class.getDeclaredConstructor();
+ unsafeConstructor.setAccessible(true);
+ unsafe = unsafeConstructor.newInstance();
+
+ // Alternative, but may not work across different JVMs.
+ // Field field = Unsafe.class.getDeclaredField("theUnsafe");
+ // field.setAccessible(true);
+ // unsafe = (Unsafe) field.get(null);
+
+ } catch (final InstantiationException | IllegalAccessException |
IllegalArgumentException
+ | InvocationTargetException | NoSuchMethodException e) {
+ e.printStackTrace();
+ throw new RuntimeException("Unable to acquire Unsafe. " + e);
+ }
+
+ //4 on 32-bit systems. 4 on 64-bit systems < 32GB, otherwise 8.
+ //This alone is not an indicator of compressed ref (coop)
+ ADDRESS_SIZE = unsafe.addressSize();
+
+ ARRAY_BOOLEAN_BASE_OFFSET = unsafe.arrayBaseOffset(boolean[].class);
+ ARRAY_BYTE_BASE_OFFSET = unsafe.arrayBaseOffset(byte[].class);
+ ARRAY_SHORT_BASE_OFFSET = unsafe.arrayBaseOffset(short[].class);
+ ARRAY_CHAR_BASE_OFFSET = unsafe.arrayBaseOffset(char[].class);
+ ARRAY_INT_BASE_OFFSET = unsafe.arrayBaseOffset(int[].class);
+ ARRAY_LONG_BASE_OFFSET = unsafe.arrayBaseOffset(long[].class);
+ ARRAY_FLOAT_BASE_OFFSET = unsafe.arrayBaseOffset(float[].class);
+ ARRAY_DOUBLE_BASE_OFFSET = unsafe.arrayBaseOffset(double[].class);
+ ARRAY_OBJECT_BASE_OFFSET = unsafe.arrayBaseOffset(Object[].class);
+
+ ARRAY_OBJECT_INDEX_SCALE = unsafe.arrayIndexScale(Object[].class);
+ OBJECT_SHIFT = ARRAY_OBJECT_INDEX_SCALE == 4 ? 2 : 3;
+
+ final String jdkVer = System.getProperty("java.version");
+ final int[] p = parseJavaVersion(jdkVer);
+ JDK = p[0] + "." + p[1];
+ JDK_MAJOR = (p[0] == 1) ? p[1] : p[0];
+ }
+
+ private UnsafeUtil() {}
+
+ /**
+ * Returns first two number groups of the java version string.
+ * @param jdkVer the java version string from
System.getProperty("java.version").
+ * @return first two number groups of the java version string.
+ */
+ public static int[] parseJavaVersion(final String jdkVer) {
+ final int p0, p1;
+ try {
+ String[] parts = jdkVer.trim().split("[^0-9\\.]");//grab only number
groups and "."
+ parts = parts[0].split("\\."); //split out the number groups
+ p0 = Integer.parseInt(parts[0]); //the first number group
+ p1 = (parts.length > 1) ? Integer.parseInt(parts[1]) : 0; //2nd number
group, or 0
+ } catch (final NumberFormatException | ArrayIndexOutOfBoundsException e) {
+ throw new IllegalArgumentException("Improper Java -version string: " +
jdkVer + "\n" + e);
+ }
+ //checkJavaVersion(jdkVer, p0, p1); //TODO Optional to omit this.
+ return new int[] {p0, p1};
+ }
+
+ public static void checkJavaVersion(final String jdkVer, final int p0, final
int p1) {
+ if ( (p0 < 1) || ((p0 == 1) && (p1 < 8)) || (p0 > 13) ) {
+ throw new IllegalArgumentException(
+ "Unsupported JDK Major Version, must be one of 1.8, 8, 9, 10, 11,
12, 13: " + jdkVer);
+ }
+ }
+
+ public static long getFieldOffset(final Class<?> c, final String fieldName) {
+ try {
+ return unsafe.objectFieldOffset(c.getDeclaredField(fieldName));
+ } catch (final NoSuchFieldException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Like {@link Unsafe#arrayBaseOffset(Class)}, but caches return values for
common array types.
+ * Useful because calling {@link Unsafe#arrayBaseOffset(Class)} directly
incurs more overhead.
+ * @param c The given Class<?>.
+ * @return the base-offset
+ */
+ public static long getArrayBaseOffset(final Class<?> c) {
+ // Ordering here is roughly in order of what we expect to be most popular.
+ if (c == byte[].class) {
+ return ARRAY_BYTE_BASE_OFFSET;
+ } else if (c == int[].class) {
+ return ARRAY_INT_BASE_OFFSET;
+ } else if (c == long[].class) {
+ return ARRAY_LONG_BASE_OFFSET;
+ } else if (c == float[].class) {
+ return ARRAY_FLOAT_BASE_OFFSET;
+ } else if (c == double[].class) {
+ return ARRAY_DOUBLE_BASE_OFFSET;
+ } else if (c == boolean[].class) {
+ return ARRAY_BOOLEAN_BASE_OFFSET;
+ } else if (c == short[].class) {
+ return ARRAY_SHORT_BASE_OFFSET;
+ } else if (c == char[].class) {
+ return ARRAY_CHAR_BASE_OFFSET;
+ } else if (c == Object[].class) {
+ return ARRAY_OBJECT_BASE_OFFSET;
+ } else {
+ return unsafe.arrayBaseOffset(c);
+ }
+ }
+
+ /**
+ * Assert the requested offset and length against the allocated size.
+ * The invariants equation is: {@code 0 <= reqOff <= reqLen <= reqOff +
reqLen <= allocSize}.
+ * If this equation is violated and assertions are enabled, an {@link
AssertionError} will
+ * be thrown.
+ * @param reqOff the requested offset
+ * @param reqLen the requested length
+ * @param allocSize the allocated size.
+ */
+ public static void assertBounds(final long reqOff, final long reqLen, final
long allocSize) {
+ assert ((reqOff | reqLen | (reqOff + reqLen) | (allocSize - (reqOff +
reqLen))) >= 0) :
+ "reqOffset: " + reqOff + ", reqLength: " + reqLen
+ + ", (reqOff + reqLen): " + (reqOff + reqLen) + ", allocSize: " +
allocSize;
+ }
+
+ /**
+ * Check the requested offset and length against the allocated size.
+ * The invariants equation is: {@code 0 <= reqOff <= reqLen <= reqOff +
reqLen <= allocSize}.
+ * If this equation is violated an {@link IllegalArgumentException} will be
thrown.
+ * @param reqOff the requested offset
+ * @param reqLen the requested length
+ * @param allocSize the allocated size.
+ */
+ public static void checkBounds(final long reqOff, final long reqLen, final
long allocSize) {
+ if ((reqOff | reqLen | (reqOff + reqLen) | (allocSize - (reqOff +
reqLen))) < 0) {
+ throw new IllegalArgumentException(
+ "reqOffset: " + reqOff + ", reqLength: " + reqLen
+ + ", (reqOff + reqLen): " + (reqOff + reqLen) + ", allocSize: "
+ allocSize);
+ }
+ }
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/UnsafeUtil.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,632 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import static java.lang.Character.isSurrogate;
+import static java.lang.Character.isSurrogatePair;
+import static java.lang.Character.toCodePoint;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
+
+import java.io.IOException;
+import java.nio.BufferOverflowException;
+import java.nio.CharBuffer;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.Utf8CodingException;
+import org.apache.datasketches.memory.WritableMemory;
+
+/**
+ * Encoding and decoding implementations of {@link
WritableMemory#putCharsToUtf8} and
+ * {@link Memory#getCharsFromUtf8}.
+ *
+ * <p>This is specifically designed to reduce the production of intermediate
objects (garbage),
+ * thus significantly reducing pressure on the JVM Garbage Collector.
+ *
+ * <p>UTF-8 encoding/decoding is adapted from
+ *
https://github.com/protocolbuffers/protobuf/blob/master/java/core/src/main/java/com/google/protobuf/Utf8.java
+ *
+ * <p>Copyright 2008 Google Inc. All rights reserved.
+ * https://developers.google.com/protocol-buffers/
+ * See LICENSE.
+ *
+ * @author Lee Rhodes
+ * @author Roman Leventov
+ */
+@SuppressWarnings("restriction")
+final class Utf8 {
+
+ private Utf8() { }
+
+ //Decode
+ static final int getCharsFromUtf8(final long offsetBytes, final int
utf8LengthBytes,
+ final Appendable dst, final long cumBaseOffset, final Object unsafeObj)
+ throws IOException, Utf8CodingException {
+
+ if ((dst instanceof CharBuffer) && ((CharBuffer) dst).hasArray()) {
+ return getCharBufferCharsFromUtf8(offsetBytes, ((CharBuffer) dst),
utf8LengthBytes,
+ cumBaseOffset, unsafeObj);
+ }
+
+ //Decode Direct CharBuffers and all other Appendables
+
+ final long address = cumBaseOffset + offsetBytes;
+
+ // Optimize for 100% ASCII (Hotspot loves small simple top-level loops
like this).
+ // This simple loop stops when we encounter a byte >= 0x80 (i.e.
non-ASCII).
+ // Need to keep this loop int-indexed, because it's faster for Hotspot
JIT, it doesn't insert
+ // savepoint polls on each iteration.
+ int i = 0;
+ for (; i < utf8LengthBytes; i++) {
+ final byte b = unsafe.getByte(unsafeObj, address + i);
+ if (!DecodeUtil.isOneByte(b)) {
+ break;
+ }
+ dst.append((char) b);
+ }
+ if (i == utf8LengthBytes) {
+ return i;
+ }
+ return getNonAsciiCharsFromUtf8(dst, address + i, address +
utf8LengthBytes, unsafeObj,
+ cumBaseOffset) + i;
+ }
+
+ /*
+ * Optimize for heap CharBuffer manually, because Hotspot JIT doesn't itself
unfold this
+ * abstraction well (doesn't hoist array bound checks, etc.)
+ */
+ private static int getCharBufferCharsFromUtf8(final long offsetBytes, final
CharBuffer cbuf,
+ final int utf8LengthBytes, final long cumBaseOffset, final Object
unsafeObj) {
+ final char[] carr = cbuf.array();
+ final int startCpos = cbuf.position() + cbuf.arrayOffset();
+ int cpos = startCpos;
+ final int clim = cbuf.arrayOffset() + cbuf.limit();
+ final long address = cumBaseOffset + offsetBytes;
+ int i = 0; //byte index
+
+ // Optimize for 100% ASCII (Hotspot loves small simple top-level loops
like this).
+ // This simple loop stops when we encounter a byte >= 0x80 (i.e.
non-ASCII).
+ final int cbufNoCheckLimit = Math.min(utf8LengthBytes, clim - cpos);
+ // Need to keep this loop int-indexed, because it's faster for Hotspot
JIT, it doesn't insert
+ // savepoint polls on each iteration.
+ for (; i < cbufNoCheckLimit; i++) {
+ final byte b = unsafe.getByte(unsafeObj, address + i);
+ if (!DecodeUtil.isOneByte(b)) {
+ break;
+ }
+ // Not checking CharBuffer bounds!
+ carr[cpos++] = (char) b;
+ }
+
+ for (; i < utf8LengthBytes; i++) {
+ final byte b = unsafe.getByte(unsafeObj, address + i);
+ if (!DecodeUtil.isOneByte(b)) {
+ break;
+ }
+ checkCharBufferPos(cbuf, cpos, clim);
+ carr[cpos++] = (char) b;
+ }
+ if (i == utf8LengthBytes) {
+ cbuf.position(cpos - cbuf.arrayOffset());
+ return cpos - startCpos;
+ }
+
+ return getCharBufferNonAsciiCharsFromUtf8(cbuf, carr, cpos, clim, address
+ i,
+ address + utf8LengthBytes, unsafeObj, cumBaseOffset) -
cbuf.arrayOffset();
+ }
+
+ private static int getCharBufferNonAsciiCharsFromUtf8(final CharBuffer cbuf,
final char[] carr,
+ int cpos, final int clim, long address, final long addressLimit, final
Object unsafeObj,
+ final long cumBaseOffset) {
+
+ while (address < addressLimit) {
+ final byte byte1 = unsafe.getByte(unsafeObj, address++);
+ if (DecodeUtil.isOneByte(byte1)) {
+ checkCharBufferPos(cbuf, cpos, clim);
+ carr[cpos++] = (char) byte1;
+ // It's common for there to be multiple ASCII characters in a run
mixed in, so add an
+ // extra optimized loop to take care of these runs.
+ while (address < addressLimit) {
+ final byte b = unsafe.getByte(unsafeObj, address);
+ if (!DecodeUtil.isOneByte(b)) {
+ break;
+ }
+ address++;
+ checkCharBufferPos(cbuf, cpos, clim);
+ carr[cpos++] = (char) b;
+ }
+ }
+ else if (DecodeUtil.isTwoBytes(byte1)) {
+ if (address >= addressLimit) {
+ cbuf.position(cpos - cbuf.arrayOffset());
+ final long off = address - cumBaseOffset;
+ final long limit = addressLimit - cumBaseOffset;
+ throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 2);
+ }
+ checkCharBufferPos(cbuf, cpos, clim);
+ DecodeUtil.handleTwoBytesCharBuffer(
+ byte1,
+ /* byte2 */ unsafe.getByte(unsafeObj, address++),
+ cbuf, carr, cpos);
+ cpos++;
+ }
+ else if (DecodeUtil.isThreeBytes(byte1)) {
+ if (address >= (addressLimit - 1)) {
+ cbuf.position(cpos - cbuf.arrayOffset());
+ final long off = address - cumBaseOffset;
+ final long limit = addressLimit - cumBaseOffset;
+ throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 3);
+ }
+ checkCharBufferPos(cbuf, cpos, clim);
+ DecodeUtil.handleThreeBytesCharBuffer(
+ byte1,
+ /* byte2 */ unsafe.getByte(unsafeObj, address++),
+ /* byte3 */ unsafe.getByte(unsafeObj, address++),
+ cbuf, carr, cpos);
+ cpos++;
+ }
+ else {
+ if (address >= (addressLimit - 2)) {
+ cbuf.position(cpos - cbuf.arrayOffset());
+ final long off = address - cumBaseOffset;
+ final long limit = addressLimit - cumBaseOffset;
+ throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 4);
+ }
+ if (cpos >= (clim - 1)) {
+ cbuf.position(cpos - cbuf.arrayOffset());
+ throw new BufferOverflowException();
+ }
+ DecodeUtil.handleFourBytesCharBuffer(
+ byte1,
+ /* byte2 */ unsafe.getByte(unsafeObj, address++),
+ /* byte3 */ unsafe.getByte(unsafeObj, address++),
+ /* byte4 */ unsafe.getByte(unsafeObj, address++),
+ cbuf, carr, cpos);
+ cpos += 2;
+ }
+ }
+ cbuf.position(cpos - cbuf.arrayOffset());
+ return cpos;
+ }
+
+ //Decodes into Appendable destination
+ //returns num of chars decoded
+ private static int getNonAsciiCharsFromUtf8(final Appendable dst, long
address,
+ final long addressLimit, final Object unsafeObj, final long
cumBaseOffset)
+ throws IOException {
+ int chars = 0;
+ while (address < addressLimit) {
+ final byte byte1 = unsafe.getByte(unsafeObj, address++);
+ if (DecodeUtil.isOneByte(byte1)) {
+ dst.append((char) byte1);
+ chars++;
+ // It's common for there to be multiple ASCII characters in a run
mixed in, so add an
+ // extra optimized loop to take care of these runs.
+ while (address < addressLimit) {
+ final byte b = unsafe.getByte(unsafeObj, address);
+ if (!DecodeUtil.isOneByte(b)) {
+ break;
+ }
+ address++;
+ dst.append((char) b);
+ chars++;
+ }
+ }
+ else if (DecodeUtil.isTwoBytes(byte1)) {
+ if (address >= addressLimit) {
+ final long off = address - cumBaseOffset;
+ final long limit = addressLimit - cumBaseOffset;
+ throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 2);
+ }
+ DecodeUtil.handleTwoBytes(
+ byte1,
+ /* byte2 */ unsafe.getByte(unsafeObj, address++),
+ dst);
+ chars++;
+ }
+ else if (DecodeUtil.isThreeBytes(byte1)) {
+ if (address >= (addressLimit - 1)) {
+ final long off = address - cumBaseOffset;
+ final long limit = addressLimit - cumBaseOffset;
+ throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 3);
+ }
+ DecodeUtil.handleThreeBytes(
+ byte1,
+ /* byte2 */ unsafe.getByte(unsafeObj, address++),
+ /* byte3 */ unsafe.getByte(unsafeObj, address++),
+ dst);
+ chars++;
+ }
+ else {
+ if (address >= (addressLimit - 2)) {
+ final long off = address - cumBaseOffset;
+ final long limit = addressLimit - cumBaseOffset;
+ throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 4);
+ }
+ DecodeUtil.handleFourBytes(
+ byte1,
+ /* byte2 */ unsafe.getByte(unsafeObj, address++),
+ /* byte3 */ unsafe.getByte(unsafeObj, address++),
+ /* byte4 */ unsafe.getByte(unsafeObj, address++),
+ dst);
+ chars += 2;
+ }
+ }
+ return chars;
+ }
+
+ private static void checkCharBufferPos(final CharBuffer cbuf, final int
cpos, final int clim) {
+ if (cpos == clim) {
+ cbuf.position(cpos - cbuf.arrayOffset());
+ throw new BufferOverflowException();
+ }
+ }
+
+ /******************/
+ //Encode
+ static long putCharsToUtf8(final long offsetBytes, final CharSequence src,
+ final long capacityBytes, final long cumBaseOffset, final Object
unsafeObj) {
+
+
+ int cIdx = 0; //src character index
+ long bIdx = cumBaseOffset + offsetBytes; //byte index
+ long bCnt = 0; //bytes inserted
+
+ final long byteLimit = cumBaseOffset + capacityBytes; //unsafe index limit
+
+ final int utf16Length = src.length();
+ //Quickly dispatch an ASCII sequence
+ for (char c;
+ (cIdx < utf16Length) && ((cIdx + bIdx) < byteLimit) && ((c =
src.charAt(cIdx)) < 0x80);
+ cIdx++, bCnt++) {
+ unsafe.putByte(unsafeObj, bIdx + cIdx, (byte) c);
+ }
+ //encountered a non-ascii character
+ if (cIdx == utf16Length) { //done.
+ // next relative byte index in memory is (bIdx + utf16Length) -
cumBaseOffset.
+ return bCnt;
+ }
+ bIdx += cIdx; //bytes == characters for ascii
+
+ for (char c; cIdx < utf16Length; cIdx++) { //process the remaining
characters
+ c = src.charAt(cIdx);
+
+ if ((c < 0x80) && (bIdx < byteLimit)) {
+ //Encode ASCII, 0 through 0x007F.
+ unsafe.putByte(unsafeObj, bIdx++, (byte) c);
+ bCnt++;
+ }
+
+ else
+ //c MUST BE >= 0x0080 || j >= byteLimit
+
+ if ((c < 0x800) && (bIdx < (byteLimit - 1))) {
+ //Encode 0x80 through 0x7FF.
+ //This is for almost all Latin-script alphabets plus Greek, Cyrillic,
Hebrew, Arabic, etc.
+ //We must have target space for at least 2 Utf8 bytes.
+ unsafe.putByte(unsafeObj, bIdx++, (byte) ((0xF << 6) | (c >>> 6)));
+ unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & c)));
+ bCnt += 2;
+ }
+
+ else
+ //c > 0x800 || j >= byteLimit - 1 || j >= byteLimit
+
+ if ( !isSurrogate(c) && (bIdx < (byteLimit - 2)) ) {
+ //Encode the remainder of the BMP that are not surrogates:
+ // 0x0800 thru 0xD7FF; 0xE000 thru 0xFFFF, the max single-char code
point
+ //We must have target space for at least 3 Utf8 bytes.
+ unsafe.putByte(unsafeObj, bIdx++, (byte) ((0xF << 5) | (c >>> 12)));
+ unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & (c >>> 6))));
+ unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & c)));
+ bCnt += 3;
+ }
+
+ else {
+ //c is a surrogate || j >= byteLimit - 2 || j >= byteLimit - 1 || j >=
byteLimit
+
+ //At this point we are either:
+ // 1) Attempting to encode Code Points outside the BMP.
+ //
+ // The only way to properly encode code points outside the BMP into
Utf8 bytes is to use
+ // High/Low pairs of surrogate characters. Therefore, we must have
at least 2 source
+ // characters remaining, at least 4 bytes of memory space
remaining, and the next 2
+ // characters must be a valid surrogate pair.
+ //
+ // 2) There is insufficient MemoryImpl space to encode the current
character from one of the
+ // ifs above.
+ //
+ // We proceed assuming (1). If the following test fails, we move to an
exception.
+
+ final char low;
+ if ( (cIdx <= (utf16Length - 2))
+ && (bIdx <= (byteLimit - 4))
+ && isSurrogatePair(c, low = src.charAt(cIdx + 1)) ) { //we are good
+ cIdx++; //skip over low surrogate
+ final int codePoint = toCodePoint(c, low);
+ unsafe.putByte(unsafeObj, bIdx++, (byte) ((0xF << 4) | (codePoint
>>> 18)));
+ unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & (codePoint
>>> 12))));
+ unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & (codePoint
>>> 6))));
+ unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F &
codePoint)));
+ bCnt += 4;
+ }
+
+ else {
+ //We are going to throw an exception. So we have time to figure out
+ // what was wrong and hopefully throw an intelligent message!
+
+ //check the BMP code point cases and their required memory limits
+ if ( ((c < 0X0080) && (bIdx >= byteLimit))
+ || ((c < 0x0800) && (bIdx >= (byteLimit - 1)))
+ || ((c < 0xFFFF) && (bIdx >= (byteLimit - 2))) ) {
+ throw Utf8CodingException.outOfMemory();
+ }
+
+ if (cIdx > (utf16Length - 2)) { //the last char is an unpaired
surrogate
+ throw Utf8CodingException.unpairedSurrogate(c);
+ }
+
+ if (bIdx > (byteLimit - 4)) {
+ //4 MemoryImpl bytes required to encode a surrogate pair.
+ final int remaining = (int) ((bIdx - byteLimit) + 4L);
+ throw Utf8CodingException.shortUtf8EncodeByteLength(remaining);
+ }
+
+ if (!isSurrogatePair(c, src.charAt(cIdx + 1)) ) {
+ //Not a surrogate pair.
+ throw Utf8CodingException.illegalSurrogatePair(c, src.charAt(cIdx
+ 1));
+ }
+
+ //This should not happen :)
+ throw new IllegalArgumentException("Unknown Utf8 encoding
exception");
+ }
+ }
+ }
+ //final long localOffsetBytes = bIdx - cumBaseOffset;
+ return bCnt;
+ }
+
+ /*****************/
+ /**
+ * Utility methods for decoding UTF-8 bytes into {@link String}. Callers are
responsible for
+ * extracting bytes (possibly using Unsafe methods), and checking remaining
bytes. All other
+ * UTF-8 validity checks and codepoint conversions happen in this class.
+ *
+ * @see <a href="https://en.wikipedia.org/wiki/UTF-8">Wikipedia: UTF-8</a>
+ */
+ private static class DecodeUtil {
+
+ /**
+ * Returns whether this is a single-byte UTF-8 encoding.
+ * This is for ASCII.
+ *
+ * <p>Code Plane 0, Code Point range U+0000 to U+007F.
+ *
+ * <p>Bit Patterns:
+ * <ul><li>Byte 1: '0xxxxxxx'<li>
+ * </ul>
+ * @param b the byte being tested
+ * @return true if this is a single-byte UTF-8 encoding, i.e., b is ≥ 0.
+ */
+ static boolean isOneByte(final byte b) {
+ return b >= 0;
+ }
+
+ /**
+ * Returns whether this is the start of a two-byte UTF-8 encoding.
One-byte encoding must
+ * already be excluded.
+ * This is for almost all Latin-script alphabets plus Greek, Cyrillic,
Hebrew, Arabic, etc.
+ *
+ * <p>Code Plane 0, Code Point range U+0080 to U+07FF.
+ *
+ * <p>Bit Patterns:
+ * <ul><li>Byte 1: '110xxxxx'</li>
+ * <li>Byte 2: '10xxxxxx'</li>
+ * </ul>
+ *
+ * <p>All bytes must be < 0xE0.
+ *
+ * @param b the byte being tested
+ * @return true if this is the start of a two-byte UTF-8 encoding.
+ */
+ static boolean isTwoBytes(final byte b) {
+ return b < (byte) 0xE0;
+ }
+
+ /**
+ * Returns whether this is the start of a three-byte UTF-8 encoding.
Two-byte encoding must
+ * already be excluded.
+ * This is for the rest of the BMP, which includes most common Chinese,
Japanese and Korean
+ * characters.
+ *
+ * <p>Code Plane 0, Code Point range U+0800 to U+FFFF.
+ *
+ * <p>Bit Patterns:
+ * <ul><li>Byte 1: '1110xxxx'</li>
+ * <li>Byte 2: '10xxxxxx'</li>
+ * <li>Byte 3: '10xxxxxx'</li>
+ * </ul>
+ * All bytes must be less than 0xF0.
+ *
+ * @param b the byte being tested
+ * @return true if this is the start of a three-byte UTF-8 encoding, i.e.,
b ≥ 0XF0.
+ */
+ static boolean isThreeBytes(final byte b) {
+ return b < (byte) 0xF0;
+ }
+
+ /*
+ * Note that if three-byte UTF-8 coding has been excluded and if the
current byte is
+ * ≥ 0XF0, it must be the start of a four-byte UTF-8 encoding.
+ * This is for the less common CJKV characters, historic scripts, math
symbols, emoji, etc.
+ *
+ * <p>Code Plane 1 through 16, Code Point range U+10000 to U+10FFFF.
+ *
+ * <p>Bit Patterns:
+ * <ul><li>Byte 1: '11110xxx'</li>
+ * <li>Byte 2: '10xxxxxx'</li>
+ * <li>Byte 3: '10xxxxxx'</li>
+ * <li>Byte 4: '10xxxxxx'</li>
+ * </ul>
+ */
+
+ static void handleTwoBytes(
+ final byte byte1, final byte byte2,
+ final Appendable dst)
+ throws IOException, Utf8CodingException {
+ // Simultaneously checks for illegal trailing-byte in leading position
(<= '11000000') and
+ // overlong 2-byte, '11000001'.
+ if ((byte1 < (byte) 0xC2)
+ || isNotTrailingByte(byte2)) {
+ final byte[] out = new byte[] {byte1, byte2};
+ throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
+ }
+ dst.append((char) (((byte1 & 0x1F) << 6) | trailingByteValue(byte2)));
+ }
+
+ static void handleTwoBytesCharBuffer(
+ final byte byte1, final byte byte2,
+ final CharBuffer cb, final char[] ca, final int cp)
+ throws Utf8CodingException {
+ // Simultaneously checks for illegal trailing-byte in leading position
(<= '11000000') and
+ // overlong 2-byte, '11000001'.
+ if ((byte1 < (byte) 0xC2)
+ || isNotTrailingByte(byte2)) {
+ final byte[] out = new byte[] {byte1, byte2};
+ cb.position(cp - cb.arrayOffset());
+ throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
+ }
+ ca[cp] = (char) (((byte1 & 0x1F) << 6) | trailingByteValue(byte2));
+ }
+
+ static void handleThreeBytes(
+ final byte byte1, final byte byte2, final byte byte3,
+ final Appendable dst)
+ throws IOException, Utf8CodingException {
+ if (isNotTrailingByte(byte2)
+ // overlong? 5 most significant bits must not all be zero
+ || ((byte1 == (byte) 0xE0) && (byte2 < (byte) 0xA0))
+ // check for illegal surrogate codepoints
+ || ((byte1 == (byte) 0xED) && (byte2 >= (byte) 0xA0))
+ || isNotTrailingByte(byte3)) {
+ final byte[] out = new byte[] {byte1, byte2, byte3};
+ throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
+ }
+ dst.append((char)
+ (((byte1 & 0x0F) << 12) | (trailingByteValue(byte2) << 6) |
trailingByteValue(byte3)));
+ }
+
+ static void handleThreeBytesCharBuffer(
+ final byte byte1, final byte byte2, final byte byte3,
+ final CharBuffer cb, final char[] ca, final int cp)
+ throws Utf8CodingException {
+ if (isNotTrailingByte(byte2)
+ // overlong? 5 most significant bits must not all be zero
+ || ((byte1 == (byte) 0xE0) && (byte2 < (byte) 0xA0))
+ // check for illegal surrogate codepoints
+ || ((byte1 == (byte) 0xED) && (byte2 >= (byte) 0xA0))
+ || isNotTrailingByte(byte3)) {
+ cb.position(cp - cb.arrayOffset());
+ final byte[] out = new byte[] {byte1, byte2, byte3};
+ throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
+ }
+ ca[cp] = (char)
+ (((byte1 & 0x0F) << 12) | (trailingByteValue(byte2) << 6) |
trailingByteValue(byte3));
+ }
+
+ static void handleFourBytes(
+ final byte byte1, final byte byte2, final byte byte3, final byte byte4,
+ final Appendable dst)
+ throws IOException, Utf8CodingException {
+ if (isNotTrailingByte(byte2)
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // valid 4-byte leading byte?
+ // if (byte1 > (byte) 0xF4 ||
+ // overlong? 4 most significant bits must not all be zero
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // codepoint larger than the highest code point (U+10FFFF)?
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || ((((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0)
+ || isNotTrailingByte(byte3)
+ || isNotTrailingByte(byte4)) {
+ final byte[] out = new byte[] { byte1, byte2, byte3, byte4 };
+ throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
+ }
+ final int codepoint = ((byte1 & 0x07) << 18)
+ | (trailingByteValue(byte2) << 12)
+ | (trailingByteValue(byte3) << 6)
+ | trailingByteValue(byte4);
+ dst.append(DecodeUtil.highSurrogate(codepoint));
+ dst.append(DecodeUtil.lowSurrogate(codepoint));
+ }
+
+ static void handleFourBytesCharBuffer(
+ final byte byte1, final byte byte2, final byte byte3, final byte byte4,
+ final CharBuffer cb, final char[] ca, final int cp)
+ throws Utf8CodingException {
+ if (isNotTrailingByte(byte2)
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // valid 4-byte leading byte?
+ // if (byte1 > (byte) 0xF4 ||
+ // overlong? 4 most significant bits must not all be zero
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // codepoint larger than the highest code point (U+10FFFF)?
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || ((((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0)
+ || isNotTrailingByte(byte3)
+ || isNotTrailingByte(byte4)) {
+ cb.position(cp - cb.arrayOffset());
+ final byte[] out = new byte[] { byte1, byte2, byte3, byte4 };
+ throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
+ }
+ final int codepoint = ((byte1 & 0x07) << 18)
+ | (trailingByteValue(byte2) << 12)
+ | (trailingByteValue(byte3) << 6)
+ | trailingByteValue(byte4);
+ ca[cp] = DecodeUtil.highSurrogate(codepoint);
+ ca[cp + 1] = DecodeUtil.lowSurrogate(codepoint);
+ }
+
+ /*
+ * Returns whether the byte is not a valid continuation of the form
'10XXXXXX'.
+ */
+ private static boolean isNotTrailingByte(final byte b) {
+ return b > (byte) 0xBF;
+ }
+
+ /*
+ * Returns the actual value of the trailing byte (removes the prefix '10')
for composition.
+ */
+ private static int trailingByteValue(final byte b) {
+ return b & 0x3F;
+ }
+
+ private static char highSurrogate(final int codePoint) {
+ return (char)
+ ((Character.MIN_HIGH_SURROGATE
+ - (Character.MIN_SUPPLEMENTARY_CODE_POINT >>> 10))
+ + (codePoint >>> 10));
+ }
+
+ private static char lowSurrogate(final int codePoint) {
+ return (char) (Character.MIN_LOW_SURROGATE + (codePoint & 0x3ff));
+ }
+ }
+
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Util.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Util.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Util.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,355 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.ByteOrder;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Objects;
+import java.util.Random;
+
+import org.apache.datasketches.memory.Memory;
+
+/**
+ * @author Lee Rhodes
+ */
+public final class Util {
+ public static final String LS = System.getProperty("line.separator");
+
+ //Byte Order related
+ public static final ByteOrder NON_NATIVE_BYTE_ORDER =
ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN
+ ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN;
+
+ public static ByteOrder otherByteOrder(final ByteOrder order) {
+ return (order == ByteOrder.nativeOrder()) ? NON_NATIVE_BYTE_ORDER :
ByteOrder.nativeOrder();
+ }
+
+ /**
+ * Don't use sun.misc.Unsafe#copyMemory to copy blocks of memory larger than
this
+ * threshold, because internally it doesn't have safepoint polls, that may
cause long
+ * "Time To Safe Point" pauses in the application. This has been fixed in
JDK 9 (see
+ * https://bugs.openjdk.java.net/browse/JDK-8149596 and
+ * https://bugs.openjdk.java.net/browse/JDK-8141491), but not in JDK 8, so
the Memory library
+ * should keep having this boilerplate as long as it supports Java 8.
+ *
+ * <p>A reference to this can be found in java.nio.Bits.</p>
+ */
+ public static final int UNSAFE_COPY_THRESHOLD_BYTES = 1024 * 1024;
+
+ private Util() { }
+
+ //Byte Order Related
+
+ /**
+ * Returns true if the given byteOrder is the same as the native byte order.
+ * @param byteOrder the given byte order
+ * @return true if the given byteOrder is the same as the native byte order.
+ */
+ public static boolean isNativeByteOrder(final ByteOrder byteOrder) {
+ if (byteOrder == null) {
+ throw new IllegalArgumentException("ByteOrder parameter cannot be
null.");
+ }
+ return ByteOrder.nativeOrder() == byteOrder;
+ }
+
+ /**
+ * Searches a range of the specified array of longs for the specified value
using the binary
+ * search algorithm. The range must be sorted method) prior to making this
call.
+ * If it is not sorted, the results are undefined. If the range contains
+ * multiple elements with the specified value, there is no guarantee which
one will be found.
+ * @param mem the Memory to be searched
+ * @param fromLongIndex the index of the first element (inclusive) to be
searched
+ * @param toLongIndex the index of the last element (exclusive) to be
searched
+ * @param key the value to be searched for
+ * @return index of the search key, if it is contained in the array within
the specified range;
+ * otherwise, (-(insertion point) - 1). The insertion point is defined as
the point at which
+ * the key would be inserted into the array: the index of the first element
in the range greater
+ * than the key, or toIndex if all elements in the range are less than the
specified key.
+ * Note that this guarantees that the return value will be ≥ 0 if and
only if the key is found.
+ */
+ public static long binarySearchLongs(final Memory mem, final long
fromLongIndex,
+ final long toLongIndex, final long key) {
+ UnsafeUtil.checkBounds(fromLongIndex << 3, (toLongIndex - fromLongIndex)
<< 3, mem.getCapacity());
+ long low = fromLongIndex;
+ long high = toLongIndex - 1L;
+
+ while (low <= high) {
+ final long mid = (low + high) >>> 1;
+ final long midVal = mem.getLong(mid << 3);
+
+ if (midVal < key) { low = mid + 1; }
+ else if (midVal > key) { high = mid - 1; }
+ else { return mid; } // key found
+ }
+ return -(low + 1); // key not found.
+ }
+
+ /**
+ * Prepend the given string with zeros. If the given string is equal or
greater than the given
+ * field length, it will be returned without modification.
+ * @param s the given string
+ * @param fieldLength desired total field length including the given string
+ * @return the given string prepended with zeros.
+ */
+ public static final String zeroPad(final String s, final int fieldLength) {
+ return characterPad(s, fieldLength, '0', false);
+ }
+
+ /**
+ * Prepend or postpend the given string with the given character to fill the
given field length.
+ * If the given string is equal or greater than the given field length, it
will be returned
+ * without modification.
+ * @param s the given string
+ * @param fieldLength the desired field length
+ * @param padChar the desired pad character
+ * @param postpend if true append the pacCharacters to the end of the string.
+ * @return prepended or postpended given string with the given character to
fill the given field
+ * length.
+ */
+ public static final String characterPad(final String s, final int
fieldLength,
+ final char padChar, final boolean postpend) {
+ final char[] chArr = s.toCharArray();
+ final int sLen = chArr.length;
+ if (sLen < fieldLength) {
+ final char[] out = new char[fieldLength];
+ final int blanks = fieldLength - sLen;
+
+ if (postpend) {
+ for (int i = 0; i < sLen; i++) {
+ out[i] = chArr[i];
+ }
+ for (int i = sLen; i < fieldLength; i++) {
+ out[i] = padChar;
+ }
+ } else { //prepend
+ for (int i = 0; i < blanks; i++) {
+ out[i] = padChar;
+ }
+ for (int i = blanks; i < fieldLength; i++) {
+ out[i] = chArr[i - blanks];
+ }
+ }
+
+ return String.valueOf(out);
+ }
+ return s;
+ }
+
+ /**
+ * Return true if all the masked bits of value are zero
+ * @param value the value to be tested
+ * @param bitMask defines the bits of interest
+ * @return true if all the masked bits of value are zero
+ */
+ public static final boolean isAllBitsClear(final long value, final long
bitMask) {
+ return (~value & bitMask) == bitMask;
+ }
+
+ /**
+ * Return true if all the masked bits of value are one
+ * @param value the value to be tested
+ * @param bitMask defines the bits of interest
+ * @return true if all the masked bits of value are one
+ */
+ public static final boolean isAllBitsSet(final long value, final long
bitMask) {
+ return (value & bitMask) == bitMask;
+ }
+
+ /**
+ * Return true if any the masked bits of value are zero
+ * @param value the value to be tested
+ * @param bitMask defines the bits of interest
+ * @return true if any the masked bits of value are zero
+ */
+ public static final boolean isAnyBitsClear(final long value, final long
bitMask) {
+ return (~value & bitMask) != 0;
+ }
+
+ /**
+ * Return true if any the masked bits of value are one
+ * @param value the value to be tested
+ * @param bitMask defines the bits of interest
+ * @return true if any the masked bits of value are one
+ */
+ public static final boolean isAnyBitsSet(final long value, final long
bitMask) {
+ return (value & bitMask) != 0;
+ }
+
+ /**
+ * Creates random valid Character Code Points (as integers). By definition,
valid CodePoints
+ * are integers in the range 0 to Character.MAX_CODE_POINT, and exclude the
surrogate values.
+ * This is used in unit testing and characterization testing of the UTF8
class. Because the
+ * characterization tools are in a separate package, this must remain public.
+ *
+ * @author Lee Rhodes
+ */
+ public static class RandomCodePoints {
+ private Random rand; //
+ private static final int ALL_CP = Character.MAX_CODE_POINT + 1;
+ private static final int MIN_SUR = Character.MIN_SURROGATE;
+ private static final int MAX_SUR = Character.MAX_SURROGATE;
+
+ /**
+ * @param deterministic if true, configure java.util.Random with a fixed
seed.
+ */
+ public RandomCodePoints(final boolean deterministic) {
+ rand = deterministic ? new Random(0) : new Random();
+ }
+
+ /**
+ * Fills the given array with random valid Code Points from 0, inclusive,
to
+ * <i>Character.MAX_CODE_POINT</i>, inclusive.
+ * The surrogate range, which is from <i>Character.MIN_SURROGATE</i>,
inclusive, to
+ * <i>Character.MAX_SURROGATE</i>, inclusive, is always <u>excluded</u>.
+ * @param cpArr the array to fill
+ */
+ public final void fillCodePointArray(final int[] cpArr) {
+ fillCodePointArray(cpArr, 0, ALL_CP);
+ }
+
+ /**
+ * Fills the given array with random valid Code Points from
<i>startCP</i>, inclusive, to
+ * <i>endCP</i>, exclusive.
+ * The surrogate range, which is from <i>Character.MIN_SURROGATE</i>,
inclusive, to
+ * <i>Character.MAX_SURROGATE</i>, inclusive, is always <u>excluded</u>.
+ * @param cpArr the array to fill
+ * @param startCP the starting Code Point, included.
+ * @param endCP the ending Code Point, excluded. This value cannot exceed
0x110000.
+ */
+ public final void fillCodePointArray(final int[] cpArr, final int startCP,
final int endCP) {
+ final int arrLen = cpArr.length;
+ final int numCP = Math.min(endCP, 0X110000) - Math.min(0, startCP);
+ int idx = 0;
+ while (idx < arrLen) {
+ final int cp = startCP + rand.nextInt(numCP);
+ if ((cp >= MIN_SUR) && (cp <= MAX_SUR)) {
+ continue;
+ }
+ cpArr[idx++] = cp;
+ }
+ }
+
+ /**
+ * Return a single valid random Code Point from 0, inclusive, to
+ * <i>Character.MAX_CODE_POINT</i>, inclusive.
+ * The surrogate range, which is from <i>Character.MIN_SURROGATE</i>,
inclusive, to
+ * <i>Character.MAX_SURROGATE</i>, inclusive, is always <u>excluded</u>.
+ * @return a single valid random CodePoint.
+ */
+ public final int getCodePoint() {
+ return getCodePoint(0, ALL_CP);
+ }
+
+ /**
+ * Return a single valid random Code Point from <i>startCP</i>, inclusive,
to
+ * <i>endCP</i>, exclusive.
+ * The surrogate range, which is from <i>Character.MIN_SURROGATE</i>,
inclusive, to
+ * <i>Character.MAX_SURROGATE</i>, inclusive, is always <u>excluded</u>.
+ * @param startCP the starting Code Point, included.
+ * @param endCP the ending Code Point, excluded. This value cannot exceed
0x110000.
+ * @return a single valid random CodePoint.
+ */
+ public final int getCodePoint(final int startCP, final int endCP) {
+ final int numCP = Math.min(endCP, 0X110000) - Math.min(0, startCP);
+ while (true) {
+ final int cp = startCP + rand.nextInt(numCP);
+ if ((cp < MIN_SUR) || (cp > MAX_SUR)) {
+ return cp;
+ }
+ }
+ }
+ } //End class RandomCodePoints
+
+ public static final void zeroCheck(final long value, final String arg) {
+ if (value <= 0) {
+ throw new IllegalArgumentException("The argument '" + arg + "' may not
be negative or zero.");
+ }
+ }
+
+ public static final void negativeCheck(final long value, final String arg) {
+ if (value < 0) {
+ throw new IllegalArgumentException("The argument '" + arg + "' may not
be negative.");
+ }
+ }
+
+ public static final void nullCheck(final Object obj, final String arg) {
+ if (obj == null) {
+ throw new IllegalArgumentException("The argument '" + arg + "' may not
be null.");
+ }
+ }
+
+ //Resources NOTE: these 3 methods are duplicated in Java/ datasketches/Util
+
+ /**
+ * Gets the absolute path of the given resource file's shortName.
+ *
+ * <p>Note that the ClassLoader.getResource(shortName) returns a URL,
+ * which can have special characters, e.g., "%20" for spaces. This method
+ * obtains the URL, converts it to a URI, then does a uri.getPath(), which
+ * decodes any special characters in the URI path. This is required to make
+ * obtaining resources operating-system independent.</p>
+ *
+ * @param shortFileName the last name in the pathname's name sequence.
+ * @return the absolute path of the given resource file's shortName.
+ */
+ public static String getResourcePath(final String shortFileName) {
+ Objects.requireNonNull(shortFileName, "input parameter " + shortFileName +
" cannot be null.");
+ try {
+ final URL url = Util.class.getClassLoader().getResource(shortFileName);
+ Objects.requireNonNull(url, "resource " + shortFileName + " could not be
acquired.");
+ final URI uri = url.toURI();
+ //decodes any special characters
+ final String path = uri.isAbsolute() ?
Paths.get(uri).toAbsolutePath().toString() : uri.getPath();
+ return path;
+ } catch (final URISyntaxException e) {
+ throw new IllegalArgumentException("Cannot find resource: " +
shortFileName + LS + e);
+ }
+ }
+
+ /**
+ * Gets the file defined by the given resource file's shortFileName.
+ * @param shortFileName the last name in the pathname's name sequence.
+ * @return the file defined by the given resource file's shortFileName.
+ */
+ public static File getResourceFile(final String shortFileName) {
+ return new File(getResourcePath(shortFileName));
+ }
+
+ /**
+ * Returns a byte array of the contents of the file defined by the given
resource file's
+ * shortFileName.
+ * @param shortFileName the last name in the pathname's name sequence.
+ * @return a byte array of the contents of the file defined by the given
resource file's
+ * shortFileName.
+ */
+ public static byte[] getResourceBytes(final String shortFileName) {
+ try {
+ return Files.readAllBytes(Paths.get(getResourcePath(shortFileName)));
+ } catch (final IOException e) {
+ throw new IllegalArgumentException("Cannot read resource: " +
shortFileName + LS + e);
+ }
+ }
+
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Util.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/VirtualMachineMemory.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/VirtualMachineMemory.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/VirtualMachineMemory.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+/**
+ * Extracts a version-dependent reference to the `sun.misc.VM` into a
standalone
+ * class. The package name for VM has changed in later versions. The
appropriate
+ * class will be loaded by the class loader depending on the Java version that
+ * is used.
+ * For more information, see: https://openjdk.java.net/jeps/238
+ */
+public final class VirtualMachineMemory {
+
+ private static final Class<?> VM_CLASS;
+ private static final Method VM_MAX_DIRECT_MEMORY_METHOD;
+ private static final Method VM_IS_DIRECT_MEMORY_PAGE_ALIGNED_METHOD;
+ private static final long maxDBBMemory;
+ private static final boolean isPageAligned;
+
+ static {
+ try {
+ VM_CLASS = Class.forName("sun.misc.VM");
+ VM_MAX_DIRECT_MEMORY_METHOD =
VM_CLASS.getDeclaredMethod("maxDirectMemory");
+ VM_MAX_DIRECT_MEMORY_METHOD.setAccessible(true);
+ maxDBBMemory = (long) VM_MAX_DIRECT_MEMORY_METHOD.invoke(null); //
static method
+
+ VM_IS_DIRECT_MEMORY_PAGE_ALIGNED_METHOD =
VM_CLASS.getDeclaredMethod("isDirectMemoryPageAligned");
+ VM_IS_DIRECT_MEMORY_PAGE_ALIGNED_METHOD.setAccessible(true);
+ isPageAligned = (boolean)
VM_IS_DIRECT_MEMORY_PAGE_ALIGNED_METHOD.invoke(null); // static method
+ } catch (final ClassNotFoundException | NoSuchMethodException |
IllegalAccessException
+ | IllegalArgumentException | InvocationTargetException |
SecurityException e) {
+ throw new RuntimeException("Could not acquire sun.misc.VM class: "
+ e.getClass());
+ }
+ }
+
+ /**
+ * Returns the maximum amount of allocatable direct buffer memory. The
+ * directMemory variable is initialized during system initialization.
+ *
+ * @return the maximum amount of allocatable direct buffer memory.
+ */
+ public static long getMaxDBBMemory() {
+ return maxDBBMemory;
+ }
+
+ /**
+ * Returns true if the direct buffers should be page aligned.
+ *
+ * @return flag that determines whether direct buffers should be page
aligned.
+ */
+ public static boolean getIsPageAligned() {
+ return isPageAligned;
+ }
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/VirtualMachineMemory.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableDirectHandleImpl.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableDirectHandleImpl.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableDirectHandleImpl.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import org.apache.datasketches.memory.Handle;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableHandle;
+import org.apache.datasketches.memory.WritableMemory;
+
+/**
+ * A Handle for a writable direct memory resource.
+ * Joins an AutoCloseable WritableHandle with a WritableMemory and
AllocateDirect resource.
+ * Please read Javadocs for {@link Handle}.
+ *
+ * @author Lee Rhodes
+ * @author Roman Leventov
+ */
+public final class WritableDirectHandleImpl implements WritableHandle {
+
+ /**
+ * Having at least one final field makes this class safe for concurrent
publication.
+ */
+ final AllocateDirect direct;
+ private BaseWritableMemoryImpl wMem;
+
+ WritableDirectHandleImpl(final AllocateDirect allocatedDirect, final
BaseWritableMemoryImpl wMem) {
+ direct = allocatedDirect;
+ this.wMem = wMem;
+ }
+
+ @Override
+ public Memory get() {
+ return wMem;
+ }
+
+ @Override
+ public WritableMemory getWritable() {
+ return wMem;
+ }
+
+ //AutoCloseable
+
+ @Override
+ public void close() {
+ if (direct.doClose()) {
+ wMem = null;
+ }
+
+ }
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableDirectHandleImpl.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableMapHandleImpl.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableMapHandleImpl.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableMapHandleImpl.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import org.apache.datasketches.memory.Handle;
+import org.apache.datasketches.memory.WritableMapHandle;
+import org.apache.datasketches.memory.WritableMemory;
+
+/**
+ * A Handle for a memory-mapped, writable file resource.
+ * Joins a WritableHandle with an AutoCloseable WritableMap resource
+ * Please read Javadocs for {@link Handle}.
+ *
+ * @author Roman Leventov
+ * @author Lee Rhodes
+ */
+public final class WritableMapHandleImpl extends MapHandleImpl
+ implements WritableMapHandle {
+
+ WritableMapHandleImpl(
+ final AllocateDirectWritableMap dirWmap,
+ final BaseWritableMemoryImpl wMem) {
+ super(dirWmap, wMem);
+ }
+
+ @Override
+ public WritableMemory getWritable() {
+ return wMem;
+ }
+
+ @Override
+ public void force() {
+ ((AllocateDirectWritableMap)dirMap).force();
+ }
+}
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/WritableMapHandleImpl.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_BOOLEAN_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_BYTE_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_CHAR_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_DOUBLE_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_FLOAT_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_INT_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_LONG_BASE_OFFSET;
+import static
org.apache.datasketches.memory.internal.UnsafeUtil.ARRAY_SHORT_BASE_OFFSET;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.CHAR_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.DOUBLE_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.FLOAT_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.INT_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.LONG_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.SHORT_SHIFT;
+import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
+
+/**
+ * The XxHash is a fast, non-cryptographic, 64-bit hash function that has
+ * excellent avalanche and 2-way bit independence properties.
+ * This java version adapted the C++ version and the
OpenHFT/Zero-Allocation-Hashing implementation
+ * referenced below as inspiration.
+ *
+ * <p>The C++ source repository:
+ * <a href="https://github.com/Cyan4973/xxHash">
+ * https://github.com/Cyan4973/xxHash</a>. It has a BSD 2-Clause License:
+ * <a href="http://www.opensource.org/licenses/bsd-license.php">
+ * http://www.opensource.org/licenses/bsd-license.php</a>. See LICENSE.
+ *
+ * <p>Portions of this code were adapted from
+ * <a
href="https://github.com/OpenHFT/Zero-Allocation-Hashing/blob/master/src/main/java/net/openhft/hashing/XxHash.java">
+ * OpenHFT/Zero-Allocation-Hashing</a>, which has an Apache 2 license as does
this site. See LICENSE.
+ *
+ * @author Lee Rhodes
+ */
+@SuppressWarnings("restriction")
+public class XxHash64 {
+ // Unsigned, 64-bit primes
+ private static final long P1 = -7046029288634856825L;
+ private static final long P2 = -4417276706812531889L;
+ private static final long P3 = 1609587929392839161L;
+ private static final long P4 = -8796714831421723037L;
+ private static final long P5 = 2870177450012600261L;
+
+ /**
+ * Returns the 64-bit hash of the sequence of bytes in the unsafeObject
specified by
+ * <i>cumOffsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>.
+ *
+ * @param unsafeObj A reference to the object parameter required by unsafe.
It may be null.
+ * @param cumOffsetBytes cumulative offset in bytes of this object from the
backing resource
+ * including any user given offsetBytes. This offset may also include other
offset components
+ * such as the native off-heap memory address, DirectByteBuffer split
offsets, region offsets,
+ * and unsafe arrayBaseOffsets.
+ * @param lengthBytes the length in bytes of the sequence to be hashed
+ * @param seed a given seed
+ * @return the 64-bit hash of the sequence of bytes in the unsafeObject
specified by
+ * <i>cumOffsetBytes</i>, <i>lengthBytes</i> and a <i>seed</i>.
+ */
+ static long hash(final Object unsafeObj, long cumOffsetBytes, final long
lengthBytes,
+ final long seed) {
+ long hash;
+ long remaining = lengthBytes;
+
+ if (remaining >= 32) {
+ long v1 = seed + P1 + P2;
+ long v2 = seed + P2;
+ long v3 = seed;
+ long v4 = seed - P1;
+
+ do {
+ v1 += unsafe.getLong(unsafeObj, cumOffsetBytes) * P2;
+ v1 = Long.rotateLeft(v1, 31);
+ v1 *= P1;
+
+ v2 += unsafe.getLong(unsafeObj, cumOffsetBytes + 8L) * P2;
+ v2 = Long.rotateLeft(v2, 31);
+ v2 *= P1;
+
+ v3 += unsafe.getLong(unsafeObj, cumOffsetBytes + 16L) * P2;
+ v3 = Long.rotateLeft(v3, 31);
+ v3 *= P1;
+
+ v4 += unsafe.getLong(unsafeObj, cumOffsetBytes + 24L) * P2;
+ v4 = Long.rotateLeft(v4, 31);
+ v4 *= P1;
+
+ cumOffsetBytes += 32;
+ remaining -= 32;
+ } while (remaining >= 32);
+
+ hash = Long.rotateLeft(v1, 1)
+ + Long.rotateLeft(v2, 7)
+ + Long.rotateLeft(v3, 12)
+ + Long.rotateLeft(v4, 18);
+
+ v1 *= P2;
+ v1 = Long.rotateLeft(v1, 31);
+ v1 *= P1;
+ hash ^= v1;
+ hash = (hash * P1) + P4;
+
+ v2 *= P2;
+ v2 = Long.rotateLeft(v2, 31);
+ v2 *= P1;
+ hash ^= v2;
+ hash = (hash * P1) + P4;
+
+ v3 *= P2;
+ v3 = Long.rotateLeft(v3, 31);
+ v3 *= P1;
+ hash ^= v3;
+ hash = (hash * P1) + P4;
+
+ v4 *= P2;
+ v4 = Long.rotateLeft(v4, 31);
+ v4 *= P1;
+ hash ^= v4;
+ hash = (hash * P1) + P4;
+ } //end remaining >= 32
+ else {
+ hash = seed + P5;
+ }
+
+ hash += lengthBytes;
+
+ while (remaining >= 8) {
+ long k1 = unsafe.getLong(unsafeObj, cumOffsetBytes);
+ k1 *= P2;
+ k1 = Long.rotateLeft(k1, 31);
+ k1 *= P1;
+ hash ^= k1;
+ hash = (Long.rotateLeft(hash, 27) * P1) + P4;
+ cumOffsetBytes += 8;
+ remaining -= 8;
+ }
+
+ if (remaining >= 4) { //treat as unsigned ints
+ hash ^= (unsafe.getInt(unsafeObj, cumOffsetBytes) & 0XFFFF_FFFFL) * P1;
+ hash = (Long.rotateLeft(hash, 23) * P2) + P3;
+ cumOffsetBytes += 4;
+ remaining -= 4;
+ }
+
+ while (remaining != 0) { //treat as unsigned bytes
+ hash ^= (unsafe.getByte(unsafeObj, cumOffsetBytes) & 0XFFL) * P5;
+ hash = Long.rotateLeft(hash, 11) * P1;
+ --remaining;
+ ++cumOffsetBytes;
+ }
+
+ return finalize(hash);
+ }
+
+ /**
+ * Returns a 64-bit hash from a single long. This method has been optimized
for speed when only
+ * a single hash of a long is required.
+ * @param in A long.
+ * @param seed A long valued seed.
+ * @return the hash.
+ */
+ public static long hash(final long in, final long seed) {
+ long hash = seed + P5;
+ hash += 8;
+ long k1 = in;
+ k1 *= P2;
+ k1 = Long.rotateLeft(k1, 31);
+ k1 *= P1;
+ hash ^= k1;
+ hash = (Long.rotateLeft(hash, 27) * P1) + P4;
+ return finalize(hash);
+ }
+
+ private static long finalize(long hash) {
+ hash ^= hash >>> 33;
+ hash *= P2;
+ hash ^= hash >>> 29;
+ hash *= P3;
+ hash ^= hash >>> 32;
+ return hash;
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetBooleans starting at this offset
+ * @param lengthBooleans continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashBooleans(final boolean[] arr, final long
offsetBooleans,
+ final long lengthBooleans, final long seed) {
+ return hash(arr, ARRAY_BOOLEAN_BASE_OFFSET + offsetBooleans,
lengthBooleans, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetBytes starting at this offset
+ * @param lengthBytes continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashBytes(final byte[] arr, final long offsetBytes,
+ final long lengthBytes, final long seed) {
+ return hash(arr, ARRAY_BYTE_BASE_OFFSET + offsetBytes, lengthBytes, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetShorts starting at this offset
+ * @param lengthShorts continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashShorts(final short[] arr, final long offsetShorts,
+ final long lengthShorts, final long seed) {
+ return hash(arr, ARRAY_SHORT_BASE_OFFSET + (offsetShorts << SHORT_SHIFT),
+ lengthShorts << SHORT_SHIFT, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetChars starting at this offset
+ * @param lengthChars continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashChars(final char[] arr, final long offsetChars,
+ final long lengthChars, final long seed) {
+ return hash(arr, ARRAY_CHAR_BASE_OFFSET + (offsetChars << CHAR_SHIFT),
+ lengthChars << CHAR_SHIFT, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetInts starting at this offset
+ * @param lengthInts continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashInts(final int[] arr, final long offsetInts,
+ final long lengthInts, final long seed) {
+ return hash(arr, ARRAY_INT_BASE_OFFSET + (offsetInts << INT_SHIFT),
+ lengthInts << INT_SHIFT, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetLongs starting at this offset
+ * @param lengthLongs continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashLongs(final long[] arr, final long offsetLongs,
+ final long lengthLongs, final long seed) {
+ return hash(arr, ARRAY_LONG_BASE_OFFSET + (offsetLongs << LONG_SHIFT),
+ lengthLongs << LONG_SHIFT, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetFloats starting at this offset
+ * @param lengthFloats continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashFloats(final float[] arr, final long offsetFloats,
+ final long lengthFloats, final long seed) {
+ return hash(arr, ARRAY_FLOAT_BASE_OFFSET + (offsetFloats << FLOAT_SHIFT),
+ lengthFloats << FLOAT_SHIFT, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetDoubles starting at this offset
+ * @param lengthDoubles continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashDoubles(final double[] arr, final long offsetDoubles,
+ final long lengthDoubles, final long seed) {
+ return hash(arr, ARRAY_DOUBLE_BASE_OFFSET + (offsetDoubles <<
DOUBLE_SHIFT),
+ lengthDoubles << DOUBLE_SHIFT, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the
given length using the
+ * given seed.
+ * @param str the given string
+ * @param offsetChars starting at this offset
+ * @param lengthChars continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashString(final String str, final long offsetChars,
+ final long lengthChars, final long seed) {
+ return hashChars(str.toCharArray(), offsetChars, lengthChars, seed);
+ }
+
+}
+
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/XxHash64.java
------------------------------------------------------------------------------
svn:executable = *
Added:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/package-info.java
==============================================================================
---
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/package-info.java
(added)
+++
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/package-info.java
Tue May 21 21:11:49 2024
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * @author Lee Rhodes
+ */
+package org.apache.datasketches.memory.internal;
Propchange:
dev/datasketches/memory/2.2.0-RC1/apache-datasketches-memory-2.2.0-src/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/package-info.java
------------------------------------------------------------------------------
svn:executable = *
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]