This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch IntegrateJava17_v2
in repository https://gitbox.apache.org/repos/asf/datasketches-memory.git
The following commit(s) were added to refs/heads/IntegrateJava17_v2 by this
push:
new 51c71b2 Interim -- Eclipse works but Maven does not. Will live with
this temporarily.
51c71b2 is described below
commit 51c71b261dd6ceecd09522b3eb1f7754b13dc1e5
Author: Lee Rhodes <[email protected]>
AuthorDate: Tue Dec 20 16:03:22 2022 -0800
Interim -- Eclipse works but Maven does not. Will live with this
temporarily.
---
.gitignore | 1 +
datasketches-memory-java17/pom.xml | 20 +-
.../src/main/java17/module-info.java | 5 +
.../org/apache/datasketches/memory/Resource.java | 31 +-
.../datasketches/memory/internal/ResourceImpl.java | 47 +-
.../datasketches/memory/internal/Buffer2Test.java | 10 +-
datasketches-memory-java8/pom.xml | 10 +-
.../org/apache/datasketches/memory/Memory.java | 37 --
.../org/apache/datasketches/memory/Resource.java | 29 +-
.../apache/datasketches/memory/WritableMemory.java | 15 +-
.../memory/internal/AllocateDirectMap.java | 4 +-
.../memory/internal/BaseWritableMemoryImpl.java | 29 -
.../datasketches/memory/internal/ResourceImpl.java | 21 +-
.../apache/datasketches/memory/internal/Utf8.java | 632 ---------------------
.../datasketches/memory/internal/Buffer2Test.java | 1 -
.../internal/NativeWritableBufferImplTest.java | 1 -
.../internal/NativeWritableMemoryImplTest.java | 1 -
.../datasketches/memory/internal/ResourceTest.java | 1 -
.../datasketches/memory/internal/Utf8Test.java | 517 -----------------
pom.xml | 13 +-
20 files changed, 105 insertions(+), 1320 deletions(-)
diff --git a/.gitignore b/.gitignore
index f36d4f4..05a52c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,6 +49,7 @@ local/
reports/
.pmd
tmp
+doc/
# Build artifacts
target/
diff --git a/datasketches-memory-java17/pom.xml
b/datasketches-memory-java17/pom.xml
index ddf40bc..c90ad7c 100644
--- a/datasketches-memory-java17/pom.xml
+++ b/datasketches-memory-java17/pom.xml
@@ -35,23 +35,16 @@
<properties>
<java.version>17</java.version>
<jdk-toolchain.version>17</jdk-toolchain.version>
- <maven.compiler.source>${java.version}</maven.compiler.source>
- <maven.compiler.target>${java.version}</maven.compiler.target>
+ <maven.compiler.source>17</maven.compiler.source>
+ <maven.compiler.target>17</maven.compiler.target>
</properties>
-
<dependencies>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
- <!-- Used for UTF8 testing -->
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- <scope>test</scope>
- </dependency>
<dependency>
<!-- Used for xxHash testing -->
<groupId>net.openhft</groupId>
@@ -61,12 +54,13 @@
</dependency>
</dependencies>
-
<build>
<sourceDirectory>${project.basedir}/src/main/java17</sourceDirectory>
<testSourceDirectory>${project.basedir}/src/test/java17</testSourceDirectory>
<testResources>
- <directory>${project.basedir}/src/test/resources</directory>
+ <testResource>
+ <directory>${project.basedir}/src/test/resources</directory>
+ </testResource>
</testResources>
<pluginManagement>
<plugins>
@@ -77,8 +71,10 @@
<configuration>
<compilerArgs>
<arg>--add-exports</arg>
-
<arg>java.base/jdk.internal.ref=org.apache.datasketches.memory</arg>
+
<arg>java.base/jdk.incubator.foreign=org.apache.datasketches.memory</arg>
</compilerArgs>
+ <source>17</source>
+ <target>17</target>
</configuration>
</plugin>
</plugins>
diff --git a/datasketches-memory-java17/src/main/java17/module-info.java
b/datasketches-memory-java17/src/main/java17/module-info.java
index 125759f..6524094 100644
--- a/datasketches-memory-java17/src/main/java17/module-info.java
+++ b/datasketches-memory-java17/src/main/java17/module-info.java
@@ -16,10 +16,15 @@
* specific language governing permissions and limitations
* under the License.
*/
+/**
+ * The DataSketches Memory Java Module.
+ * @author lrhodes
+ */
module org.apache.datasketches.memory {
requires java.base;
requires java.logging;
requires jdk.unsupported;
requires transitive jdk.incubator.foreign;
+
exports org.apache.datasketches.memory;
}
\ No newline at end of file
diff --git
a/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/Resource.java
b/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/Resource.java
index 950923e..a2da9ba 100644
---
a/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/Resource.java
+++
b/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/Resource.java
@@ -26,8 +26,7 @@ import jdk.incubator.foreign.MemorySegment;
import jdk.incubator.foreign.ResourceScope;
/**
- * Keeps key configuration state for Memory and Buffer plus some common static
variables
- * and check methods.
+ * Keeps key configuration state for Memory and Buffer plus some common
methods.
*
* @author Lee Rhodes
*/
@@ -156,6 +155,16 @@ public interface Resource {
*/
boolean isRegionView();
+ /**
+ * Returns true if the backing resource of <i>this</i> is identical with the
backing resource
+ * of <i>that</i>. The capacities must be the same. If <i>this</i> is a
region,
+ * the region offset must also be the same.
+ * @param that A different non-null object
+ * @return true if the backing resource of <i>this</i> is the same as the
backing resource
+ * of <i>that</i>.
+ */
+ boolean isSameResource(Resource that);
+
/**
* Returns a description of this object with an optional formatted hex
string of the data
* for the specified a range. Used primarily for testing.
@@ -230,28 +239,22 @@ public interface Resource {
/**
* Loads the contents of this mapped segment into physical memory. Please
refer to
* <a
href="https://docs.oracle.com/en/java/javase/17/docs/api/jdk.incubator.foreign/jdk/incubator/foreign/MemorySegment.html#load()">load()</a>
+ *
+ * @throws IllegalStateException if the scope associated with the underlying
MemorySegment has been closed,
+ * or if access occurs from a thread other than the thread owning that scope.
+ * @throws UnsupportedOperationException if this segment is not a mapped
memory segment, e.g. if
+ * {@code isMapped() == false}.
*/
void load();
/**
- * See <a
href="https://docs.oracle.com/en/java/javase/17/docs/api/jdk.incubator.foreign/jdk/incubator/foreign/MemorySegment.html#mismatch(jdk.incubator.foreign.MemorySegment)>mismatch</a>
+ * See <a
href="https://docs.oracle.com/en/java/javase/17/docs/api/jdk.incubator.foreign/jdk/incubator/foreign/MemorySegment.html#mismatch(jdk.incubator.foreign.MemorySegment)">mismatch</a>
* @param that the other Resource
* @return the relative offset, in bytes, of the first mismatch between this
and the given other Resource object,
* otherwise -1 if no mismatch
*/
long mismatch(Resource that);
- /**
- * Returns a positive number if <i>this</i> overlaps <i>that</i> and
<i>this</i> base address is ≤ <i>that</i>
- * base address.
- * Returns a negative number if <i>this</i> overlaps <i>that</i> and
<i>this</i> base address is > <i>that</i>
- * base address.
- * Returns a zero if there is no overlap or if one or both objects are null,
not active or on heap.
- * @param that the other Resource object
- * @return a long value representing the ordering and size of overlap
between <i>this</i> and <i>that</i>.
- */
- long nativeOverlap(Resource that);
-
/**
* Returns the resource scope associated with this memory segment.
* @return the resource scope associated with this memory segment.
diff --git
a/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/internal/ResourceImpl.java
b/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/internal/ResourceImpl.java
index fedec19..3bb686d 100644
---
a/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/internal/ResourceImpl.java
+++
b/datasketches-memory-java17/src/main/java17/org/apache/datasketches/memory/internal/ResourceImpl.java
@@ -35,8 +35,7 @@ import jdk.incubator.foreign.MemorySegment;
import jdk.incubator.foreign.ResourceScope;
/**
- * Keeps key configuration state for MemoryImpl and BufferImpl plus some
common static variables
- * and check methods.
+ * Implements the root Resource methods.
*
* @author Lee Rhodes
*/
@@ -303,9 +302,8 @@ abstract class ResourceImpl implements Resource {
return byteBuf;
}
- //@SuppressWarnings("resource")
@Override //Java 17 only
- public void close() { //moved here
+ public void close() {
if (seg != null && seg.scope().isAlive() && !seg.scope().isImplicit()) {
if (seg.isNative() || seg.isMapped()) {
seg.scope().close();
@@ -321,7 +319,9 @@ abstract class ResourceImpl implements Resource {
}
@Override //Java 17 only
- public void force() { seg.force(); } //moved here
+ public void force() {
+ if (seg != null && seg.scope().isAlive() && seg.isMapped()) { seg.force();
}
+ }
@Override
public final ByteOrder getByteOrder() {
@@ -367,7 +367,12 @@ abstract class ResourceImpl implements Resource {
}
@Override //Java 17 only
- public boolean isLoaded() { return seg.isLoaded(); }
+ public boolean isLoaded() {
+ if (seg != null && seg.scope().isAlive() && seg.isMapped()) {
+ return seg.isLoaded();
+ }
+ return false;
+ }
@Override
public boolean isMemoryMappedFileResource() {
@@ -396,8 +401,18 @@ abstract class ResourceImpl implements Resource {
return (typeId & REGION) > 0;
}
+ @Override
+ public boolean isSameResource(Resource that) {
+ long thisCap = getCapacity();
+ long thatCap = that.getCapacity();
+ long overlap = nativeOverlap(that);
+ return (thisCap == thatCap && thisCap == overlap);
+ }
+
@Override //Java 17 only
- public void load() { seg.load(); } //moved here
+ public void load() {
+ if (seg != null && seg.scope().isAlive() && seg.isMapped()) { seg.load(); }
+ }
@Override
public long mismatch(final Resource that) { //Java 17 only
@@ -407,8 +422,16 @@ abstract class ResourceImpl implements Resource {
return seg.mismatch(thatBSI.seg);
}
- @Override //Java 17 only
- public final long nativeOverlap(final Resource that) { //Java 17 only
+ /**
+ * Returns a positive number if <i>this</i> overlaps <i>that</i> and
<i>this</i> base address is ≤ <i>that</i>
+ * base address.
+ * Returns a negative number if <i>this</i> overlaps <i>that</i> and
<i>this</i> base address is > <i>that</i>
+ * base address.
+ * Returns a zero if there is no overlap or if one or both objects are null,
not active or on heap.
+ * @param that the other Resource object
+ * @return a long value representing the ordering and size of overlap
between <i>this</i> and <i>that</i>.
+ */
+ final long nativeOverlap(final Resource that) { //Java 17 only
if (that == null) { return 0; }
if (!that.isAlive()) { return 0; }
ResourceImpl thatBSI = (ResourceImpl) that;
@@ -478,7 +501,11 @@ abstract class ResourceImpl implements Resource {
}
@Override //Java 17 only
- public void unload() { seg.unload(); } //moved here
+ public void unload() {
+ if (seg != null && seg.scope().isAlive() && seg.isMapped()) {
+ seg.unload();
+ }
+ }
@Override
public final long xxHash64(final long in, final long seed) {
diff --git
a/datasketches-memory-java17/src/test/java17/org/apache/datasketches/memory/internal/Buffer2Test.java
b/datasketches-memory-java17/src/test/java17/org/apache/datasketches/memory/internal/Buffer2Test.java
index 572b5d7..3078710 100644
---
a/datasketches-memory-java17/src/test/java17/org/apache/datasketches/memory/internal/Buffer2Test.java
+++
b/datasketches-memory-java17/src/test/java17/org/apache/datasketches/memory/internal/Buffer2Test.java
@@ -27,9 +27,9 @@ import static org.testng.Assert.assertTrue;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
-import org.apache.datasketches.memory.Resource;
import org.apache.datasketches.memory.Buffer;
import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.Resource;
import org.apache.datasketches.memory.WritableBuffer;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.annotations.Test;
@@ -398,25 +398,25 @@ public class Buffer2Test {
}
@Test
- public void checkIndependence() {
+ public void checkIsSameResource() {
int cap = 64;
ResourceScope scope = ResourceScope.newImplicitScope();
WritableMemory wmem = WritableMemory.allocateDirect(cap, scope, null);
WritableBuffer wbuf1 = wmem.asWritableBuffer();
WritableBuffer wbuf2 = wmem.asWritableBuffer();
assertFalse(wbuf1 == wbuf2);
- assertTrue(wbuf1.nativeOverlap(wbuf2) == cap);
+ assertTrue(wbuf1.isSameResource(wbuf2));
WritableMemory reg1 = wmem.writableRegion(0, cap);
WritableMemory reg2 = wmem.writableRegion(0, cap);
assertFalse(reg1 == reg2);
- assertTrue(reg1.nativeOverlap(reg2) == cap);
+ assertTrue(reg1.isSameResource(reg2));
WritableBuffer wbuf3 = wbuf1.writableRegion();
WritableBuffer wbuf4 = wbuf1.writableRegion();
assertFalse(wbuf3 == wbuf4);
- assertTrue(wbuf3.nativeOverlap(wbuf4) == cap);
+ assertTrue(wbuf3.isSameResource(wbuf4));
}
@Test
diff --git a/datasketches-memory-java8/pom.xml
b/datasketches-memory-java8/pom.xml
index 45c44ec..d47e358 100644
--- a/datasketches-memory-java8/pom.xml
+++ b/datasketches-memory-java8/pom.xml
@@ -47,12 +47,6 @@
<artifactId>testng</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
- <!-- Used for UTF8 testing -->
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- <scope>test</scope>
- </dependency>
<dependency>
<!-- Used for xxHash testing -->
<groupId>net.openhft</groupId>
@@ -66,7 +60,9 @@
<sourceDirectory>${project.basedir}/src/main/java</sourceDirectory>
<testSourceDirectory>${project.basedir}/src/test/java</testSourceDirectory>
<testResources>
- <directory>${project.basedir}/src/test/resources</directory>
+ <testResource>
+ <directory>${project.basedir}/src/test/resources</directory>
+ </testResource>
</testResources>
<pluginManagement>
<plugins>
diff --git
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Memory.java
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Memory.java
index 90c53bd..9237e4e 100644
---
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Memory.java
+++
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Memory.java
@@ -325,43 +325,6 @@ public interface Memory extends Resource {
*/
void getCharArray(long offsetBytes, char[] dstArray, int dstOffsetChars, int
lengthChars);
- /**
- * Gets UTF-8 encoded bytes from this Memory, starting at offsetBytes to a
length of
- * utf8LengthBytes, decodes them into characters and appends them to the
given Appendable.
- * This is specifically designed to reduce the production of intermediate
objects (garbage),
- * thus significantly reducing pressure on the JVM Garbage Collector.
- * @param offsetBytes offset bytes relative to the Memory start
- * @param utf8LengthBytes the number of encoded UTF-8 bytes to decode. It is
assumed that the
- * caller has the correct number of utf8 bytes required to decode the number
of characters
- * to be appended to dst. Characters outside the ASCII range can require 2,
3 or 4 bytes per
- * character to decode.
- * @param dst the destination Appendable to append the decoded characters to.
- * @return the number of characters decoded
- * @throws IOException if dst.append() throws IOException
- * @throws Utf8CodingException in case of malformed or illegal UTF-8 input
- */
- int getCharsFromUtf8(long offsetBytes, int utf8LengthBytes, Appendable dst)
- throws IOException, Utf8CodingException;
-
- /**
- * Gets UTF-8 encoded bytes from this Memory, starting at offsetBytes to a
length of
- * utf8LengthBytes, decodes them into characters and appends them to the
given StringBuilder.
- * This method does *not* reset the length of the destination StringBuilder
before appending
- * characters to it.
- * This is specifically designed to reduce the production of intermediate
objects (garbage),
- * thus significantly reducing pressure on the JVM Garbage Collector.
- * @param offsetBytes offset bytes relative to the Memory start
- * @param utf8LengthBytes the number of encoded UTF-8 bytes to decode. It is
assumed that the
- * caller has the correct number of utf8 bytes required to decode the number
of characters
- * to be appended to dst. Characters outside the ASCII range can require 2,
3 or 4 bytes per
- * character to decode.
- * @param dst the destination StringBuilder to append decoded characters to.
- * @return the number of characters decoded.
- * @throws Utf8CodingException in case of malformed or illegal UTF-8 input
- */
- int getCharsFromUtf8(long offsetBytes, int utf8LengthBytes, StringBuilder
dst)
- throws Utf8CodingException;
-
/**
* Gets the double value at the given offset
* @param offsetBytes offset bytes relative to this Memory start
diff --git
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Resource.java
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Resource.java
index 1b56dc0..b13d4b7 100644
---
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Resource.java
+++
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/Resource.java
@@ -22,8 +22,7 @@ package org.apache.datasketches.memory;
import java.nio.ByteOrder;
/**
- * Keeps key configuration state for Memory and Buffer plus some common static
variables
- * and check methods.
+ * Keeps key configuration state for Memory and Buffer plus some common
methods.
*
* @author Lee Rhodes
*/
@@ -152,6 +151,16 @@ public interface Resource {
*/
boolean isRegionView();
+ /**
+ * Returns true if the backing resource of <i>this</i> is identical with the
backing resource
+ * of <i>that</i>. The capacities must be the same. If <i>this</i> is a
region,
+ * the region offset must also be the same.
+ * @param that A different non-null object
+ * @return true if the backing resource of <i>this</i> is the same as the
backing resource
+ * of <i>that</i>.
+ */
+ boolean isSameResource(Resource that);
+
/**
* Returns a description of this object with an optional formatted hex
string of the data
* for the specified a range. Used primarily for testing.
@@ -185,19 +194,7 @@ public interface Resource {
*/
long xxHash64(long offsetBytes, long lengthBytes, long seed);
- //DEPRECATED. NOT SUPPORTED AS OF JAVA 17+ VERSIONS
-
- /**
- * Returns true if the backing resource of <i>this</i> is identical with the
backing resource
- * of <i>that</i>. The capacities must be the same. If <i>this</i> is a
region,
- * the region offset must also be the same.
- * @param that A different non-null object
- * @return true if the backing resource of <i>this</i> is the same as the
backing resource
- * of <i>that</i>.
- * @deprecated no longer supported as of Java 17 versions.
- * With Java 17 use nativeOverlap(other) instead.
- */
- @Deprecated
- boolean isSameResource(Object that);
+ //How to configure MemoryRequestServer, default, set and get
+ //Resort ResourceImpls
}
diff --git
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/WritableMemory.java
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/WritableMemory.java
index 3aee9cc..2cc22d4 100644
---
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/WritableMemory.java
+++
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/WritableMemory.java
@@ -449,20 +449,6 @@ public interface WritableMemory extends Memory {
*/
void putCharArray(long offsetBytes, char[] srcArray, int srcOffsetChars, int
lengthChars);
- /**
- * Encodes characters from the given CharSequence into UTF-8 bytes and puts
them into this
- * <i>WritableMemory</i> begining at the given offsetBytes.
- * This is specifically designed to reduce the production of intermediate
objects (garbage),
- * thus significantly reducing pressure on the JVM Garbage Collector.
- * @param offsetBytes offset bytes relative to this <i>WritableMemory</i>
start
- * @param src The source CharSequence to be encoded and put into this
WritableMemory. It is
- * the responsibility of the caller to provide sufficient capacity in this
- * <i>WritableMemory</i> for the encoded Utf8 bytes. Characters outside the
ASCII range can
- * require 2, 3 or 4 bytes per character to encode.
- * @return the number of bytes encoded
- */
- long putCharsToUtf8(long offsetBytes, CharSequence src);
-
/**
* Puts the double value at the given offset
* @param offsetBytes offset bytes relative to this <i>WritableMemory</i>
start
@@ -635,6 +621,7 @@ public interface WritableMemory extends Memory {
* in the test tree.
* @return the MemoryRequestServer object or null.
*/
+ @Override
MemoryRequestServer getMemoryRequestServer();
}
diff --git
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/AllocateDirectMap.java
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/AllocateDirectMap.java
index 3d2aee6..af76bff 100644
---
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/AllocateDirectMap.java
+++
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/AllocateDirectMap.java
@@ -69,7 +69,7 @@ class AllocateDirectMap implements Map {
static final Method MAPPED_BYTE_BUFFER_FORCE0_METHOD;
static {
- try { //The FileChannelImpl methods map0 and unmap0 still exist in 16
+ try { //The FileChannelImpl methods map0 and unmap0 still exist in JDK16
FILE_CHANNEL_IMPL_MAP0_METHOD = FileChannelImpl.class
.getDeclaredMethod("map0", int.class, long.class, long.class);
//JDK14 add boolean.class
FILE_CHANNEL_IMPL_MAP0_METHOD.setAccessible(true);
@@ -79,7 +79,7 @@ class AllocateDirectMap implements Map {
FILE_CHANNEL_IMPL_UNMAP0_METHOD.setAccessible(true);
- //The MappedByteBuffer methods load0, isLoaded0 and force0 are removed
in 15
+ //The MappedByteBuffer methods load0, isLoaded0 and force0 are removed
in JDK15
MAPPED_BYTE_BUFFER_LOAD0_METHOD = MappedByteBuffer.class
.getDeclaredMethod("load0", long.class, long.class); //JDK15 removed
MAPPED_BYTE_BUFFER_LOAD0_METHOD.setAccessible(true);
diff --git
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseWritableMemoryImpl.java
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseWritableMemoryImpl.java
index ed5b092..70b3702 100644
---
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseWritableMemoryImpl.java
+++
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/BaseWritableMemoryImpl.java
@@ -41,7 +41,6 @@ import org.apache.datasketches.memory.Buffer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.ReadOnlyException;
-import org.apache.datasketches.memory.Utf8CodingException;
import org.apache.datasketches.memory.WritableBuffer;
import org.apache.datasketches.memory.WritableHandle;
import org.apache.datasketches.memory.WritableMapHandle;
@@ -246,27 +245,6 @@ public abstract class BaseWritableMemoryImpl extends
ResourceImpl implements Wri
copyBytes);
}
- @Override
- public final int getCharsFromUtf8(final long offsetBytes, final int
utf8LengthBytes,
- final Appendable dst) throws IOException, Utf8CodingException {
- checkAlive();
- checkBounds(offsetBytes, utf8LengthBytes, capacityBytes_);
- return Utf8.getCharsFromUtf8(offsetBytes, utf8LengthBytes, dst,
getCumulativeOffset(0),
- getUnsafeObject());
- }
-
- @Override
- public final int getCharsFromUtf8(final long offsetBytes, final int
utf8LengthBytes,
- final StringBuilder dst) throws Utf8CodingException {
- try {
- // Ensure that we do at most one resize of internal StringBuilder's char
array
- dst.ensureCapacity(dst.length() + utf8LengthBytes);
- return getCharsFromUtf8(offsetBytes, utf8LengthBytes, (Appendable) dst);
- } catch (final IOException e) {
- throw new RuntimeException("Should not happen", e);
- }
- }
-
//PRIMITIVE getX() Native Endian (used by both endians)
final char getNativeOrderedChar(final long offsetBytes) {
checkAlive();
@@ -374,13 +352,6 @@ public abstract class BaseWritableMemoryImpl extends
ResourceImpl implements Wri
);
}
- @Override
- public final long putCharsToUtf8(final long offsetBytes, final CharSequence
src) {
- checkAlive();
- return Utf8.putCharsToUtf8(offsetBytes, src, getCapacity(),
getCumulativeOffset(0),
- getUnsafeObject());
- }
-
//PRIMITIVE putX() Native Endian (used by both endians)
final void putNativeOrderedChar(final long offsetBytes, final char value) {
checkAlive();
diff --git
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java
index 9bbd7fe..b7d56d9 100644
---
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java
+++
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java
@@ -31,8 +31,7 @@ import org.apache.datasketches.memory.ReadOnlyException;
import org.apache.datasketches.memory.Resource;
/**
- * Keeps key configuration state for MemoryImpl and BufferImpl plus some
common static variables
- * and check methods.
+ * Implements the root Resource methods.
*
* @author Lee Rhodes
*/
@@ -420,17 +419,17 @@ public abstract class ResourceImpl implements Resource {
}
@Override //Java 8 & 11 only
- public final boolean isSameResource(final Object that) {
+ public final boolean isSameResource(final Resource that) {
checkAlive();
if (that == null) { return false; }
- final ResourceImpl that1 = (ResourceImpl) that;
- that1.checkAlive();
- if (this == that1) { return true; }
-
- return cumBaseOffset_ == that1.cumBaseOffset_
- && capacityBytes_ == that1.capacityBytes_
- && getUnsafeObject() == that1.getUnsafeObject()
- && getByteBuffer() == that1.getByteBuffer();
+ final ResourceImpl thatR = (ResourceImpl) that;
+ thatR.checkAlive();
+ if (this == thatR) { return true; }
+
+ return cumBaseOffset_ == thatR.cumBaseOffset_
+ && capacityBytes_ == thatR.capacityBytes_
+ && getUnsafeObject() == thatR.getUnsafeObject()
+ && getByteBuffer() == thatR.getByteBuffer();
}
@Override
diff --git
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java
b/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java
deleted file mode 100644
index d8fb52d..0000000
---
a/datasketches-memory-java8/src/main/java/org/apache/datasketches/memory/internal/Utf8.java
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.memory.internal;
-
-import static java.lang.Character.isSurrogate;
-import static java.lang.Character.isSurrogatePair;
-import static java.lang.Character.toCodePoint;
-import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
-
-import java.io.IOException;
-import java.nio.BufferOverflowException;
-import java.nio.CharBuffer;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.Utf8CodingException;
-import org.apache.datasketches.memory.WritableMemory;
-
-/**
- * Encoding and decoding implementations of {@link
WritableMemory#putCharsToUtf8} and
- * {@link Memory#getCharsFromUtf8}.
- *
- * <p>This is specifically designed to reduce the production of intermediate
objects (garbage),
- * thus significantly reducing pressure on the JVM Garbage Collector.
- *
- * <p>UTF-8 encoding/decoding is adapted from
- *
https://github.com/protocolbuffers/protobuf/blob/master/java/core/src/main/java/com/google/protobuf/Utf8.java
- *
- * <p>Copyright 2008 Google Inc. All rights reserved.
- * https://developers.google.com/protocol-buffers/
- * See LICENSE.
- *
- * @author Lee Rhodes
- * @author Roman Leventov
- */
-@SuppressWarnings("restriction")
-final class Utf8 {
-
- private Utf8() { }
-
- //Decode
- static final int getCharsFromUtf8(final long offsetBytes, final int
utf8LengthBytes,
- final Appendable dst, final long cumBaseOffset, final Object unsafeObj)
- throws IOException, Utf8CodingException {
-
- if ((dst instanceof CharBuffer) && ((CharBuffer) dst).hasArray()) {
- return getCharBufferCharsFromUtf8(offsetBytes, ((CharBuffer) dst),
utf8LengthBytes,
- cumBaseOffset, unsafeObj);
- }
-
- //Decode Direct CharBuffers and all other Appendables
-
- final long address = cumBaseOffset + offsetBytes;
-
- // Optimize for 100% ASCII (Hotspot loves small simple top-level loops
like this).
- // This simple loop stops when we encounter a byte >= 0x80 (i.e.
non-ASCII).
- // Need to keep this loop int-indexed, because it's faster for Hotspot
JIT, it doesn't insert
- // savepoint polls on each iteration.
- int i = 0;
- for (; i < utf8LengthBytes; i++) {
- final byte b = unsafe.getByte(unsafeObj, address + i);
- if (!DecodeUtil.isOneByte(b)) {
- break;
- }
- dst.append((char) b);
- }
- if (i == utf8LengthBytes) {
- return i;
- }
- return getNonAsciiCharsFromUtf8(dst, address + i, address +
utf8LengthBytes, unsafeObj,
- cumBaseOffset) + i;
- }
-
- /*
- * Optimize for heap CharBuffer manually, because Hotspot JIT doesn't itself
unfold this
- * abstraction well (doesn't hoist array bound checks, etc.)
- */
- private static int getCharBufferCharsFromUtf8(final long offsetBytes, final
CharBuffer cbuf,
- final int utf8LengthBytes, final long cumBaseOffset, final Object
unsafeObj) {
- final char[] carr = cbuf.array();
- final int startCpos = cbuf.position() + cbuf.arrayOffset();
- int cpos = startCpos;
- final int clim = cbuf.arrayOffset() + cbuf.limit();
- final long address = cumBaseOffset + offsetBytes;
- int i = 0; //byte index
-
- // Optimize for 100% ASCII (Hotspot loves small simple top-level loops
like this).
- // This simple loop stops when we encounter a byte >= 0x80 (i.e.
non-ASCII).
- final int cbufNoCheckLimit = Math.min(utf8LengthBytes, clim - cpos);
- // Need to keep this loop int-indexed, because it's faster for Hotspot
JIT, it doesn't insert
- // savepoint polls on each iteration.
- for (; i < cbufNoCheckLimit; i++) {
- final byte b = unsafe.getByte(unsafeObj, address + i);
- if (!DecodeUtil.isOneByte(b)) {
- break;
- }
- // Not checking CharBuffer bounds!
- carr[cpos++] = (char) b;
- }
-
- for (; i < utf8LengthBytes; i++) {
- final byte b = unsafe.getByte(unsafeObj, address + i);
- if (!DecodeUtil.isOneByte(b)) {
- break;
- }
- checkCharBufferPos(cbuf, cpos, clim);
- carr[cpos++] = (char) b;
- }
- if (i == utf8LengthBytes) {
- cbuf.position(cpos - cbuf.arrayOffset());
- return cpos - startCpos;
- }
-
- return getCharBufferNonAsciiCharsFromUtf8(cbuf, carr, cpos, clim, address
+ i,
- address + utf8LengthBytes, unsafeObj, cumBaseOffset) -
cbuf.arrayOffset();
- }
-
- private static int getCharBufferNonAsciiCharsFromUtf8(final CharBuffer cbuf,
final char[] carr,
- int cpos, final int clim, long address, final long addressLimit, final
Object unsafeObj,
- final long cumBaseOffset) {
-
- while (address < addressLimit) {
- final byte byte1 = unsafe.getByte(unsafeObj, address++);
- if (DecodeUtil.isOneByte(byte1)) {
- checkCharBufferPos(cbuf, cpos, clim);
- carr[cpos++] = (char) byte1;
- // It's common for there to be multiple ASCII characters in a run
mixed in, so add an
- // extra optimized loop to take care of these runs.
- while (address < addressLimit) {
- final byte b = unsafe.getByte(unsafeObj, address);
- if (!DecodeUtil.isOneByte(b)) {
- break;
- }
- address++;
- checkCharBufferPos(cbuf, cpos, clim);
- carr[cpos++] = (char) b;
- }
- }
- else if (DecodeUtil.isTwoBytes(byte1)) {
- if (address >= addressLimit) {
- cbuf.position(cpos - cbuf.arrayOffset());
- final long off = address - cumBaseOffset;
- final long limit = addressLimit - cumBaseOffset;
- throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 2);
- }
- checkCharBufferPos(cbuf, cpos, clim);
- DecodeUtil.handleTwoBytesCharBuffer(
- byte1,
- /* byte2 */ unsafe.getByte(unsafeObj, address++),
- cbuf, carr, cpos);
- cpos++;
- }
- else if (DecodeUtil.isThreeBytes(byte1)) {
- if (address >= (addressLimit - 1)) {
- cbuf.position(cpos - cbuf.arrayOffset());
- final long off = address - cumBaseOffset;
- final long limit = addressLimit - cumBaseOffset;
- throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 3);
- }
- checkCharBufferPos(cbuf, cpos, clim);
- DecodeUtil.handleThreeBytesCharBuffer(
- byte1,
- /* byte2 */ unsafe.getByte(unsafeObj, address++),
- /* byte3 */ unsafe.getByte(unsafeObj, address++),
- cbuf, carr, cpos);
- cpos++;
- }
- else {
- if (address >= (addressLimit - 2)) {
- cbuf.position(cpos - cbuf.arrayOffset());
- final long off = address - cumBaseOffset;
- final long limit = addressLimit - cumBaseOffset;
- throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 4);
- }
- if (cpos >= (clim - 1)) {
- cbuf.position(cpos - cbuf.arrayOffset());
- throw new BufferOverflowException();
- }
- DecodeUtil.handleFourBytesCharBuffer(
- byte1,
- /* byte2 */ unsafe.getByte(unsafeObj, address++),
- /* byte3 */ unsafe.getByte(unsafeObj, address++),
- /* byte4 */ unsafe.getByte(unsafeObj, address++),
- cbuf, carr, cpos);
- cpos += 2;
- }
- }
- cbuf.position(cpos - cbuf.arrayOffset());
- return cpos;
- }
-
- //Decodes into Appendable destination
- //returns num of chars decoded
- private static int getNonAsciiCharsFromUtf8(final Appendable dst, long
address,
- final long addressLimit, final Object unsafeObj, final long
cumBaseOffset)
- throws IOException {
- int chars = 0;
- while (address < addressLimit) {
- final byte byte1 = unsafe.getByte(unsafeObj, address++);
- if (DecodeUtil.isOneByte(byte1)) {
- dst.append((char) byte1);
- chars++;
- // It's common for there to be multiple ASCII characters in a run
mixed in, so add an
- // extra optimized loop to take care of these runs.
- while (address < addressLimit) {
- final byte b = unsafe.getByte(unsafeObj, address);
- if (!DecodeUtil.isOneByte(b)) {
- break;
- }
- address++;
- dst.append((char) b);
- chars++;
- }
- }
- else if (DecodeUtil.isTwoBytes(byte1)) {
- if (address >= addressLimit) {
- final long off = address - cumBaseOffset;
- final long limit = addressLimit - cumBaseOffset;
- throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 2);
- }
- DecodeUtil.handleTwoBytes(
- byte1,
- /* byte2 */ unsafe.getByte(unsafeObj, address++),
- dst);
- chars++;
- }
- else if (DecodeUtil.isThreeBytes(byte1)) {
- if (address >= (addressLimit - 1)) {
- final long off = address - cumBaseOffset;
- final long limit = addressLimit - cumBaseOffset;
- throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 3);
- }
- DecodeUtil.handleThreeBytes(
- byte1,
- /* byte2 */ unsafe.getByte(unsafeObj, address++),
- /* byte3 */ unsafe.getByte(unsafeObj, address++),
- dst);
- chars++;
- }
- else {
- if (address >= (addressLimit - 2)) {
- final long off = address - cumBaseOffset;
- final long limit = addressLimit - cumBaseOffset;
- throw Utf8CodingException.shortUtf8DecodeByteSequence(byte1, off,
limit, 4);
- }
- DecodeUtil.handleFourBytes(
- byte1,
- /* byte2 */ unsafe.getByte(unsafeObj, address++),
- /* byte3 */ unsafe.getByte(unsafeObj, address++),
- /* byte4 */ unsafe.getByte(unsafeObj, address++),
- dst);
- chars += 2;
- }
- }
- return chars;
- }
-
- private static void checkCharBufferPos(final CharBuffer cbuf, final int
cpos, final int clim) {
- if (cpos == clim) {
- cbuf.position(cpos - cbuf.arrayOffset());
- throw new BufferOverflowException();
- }
- }
-
- /******************/
- //Encode
- static long putCharsToUtf8(final long offsetBytes, final CharSequence src,
- final long capacityBytes, final long cumBaseOffset, final Object
unsafeObj) {
-
-
- int cIdx = 0; //src character index
- long bIdx = cumBaseOffset + offsetBytes; //byte index
- long bCnt = 0; //bytes inserted
-
- final long byteLimit = cumBaseOffset + capacityBytes; //unsafe index limit
-
- final int utf16Length = src.length();
- //Quickly dispatch an ASCII sequence
- for (char c;
- (cIdx < utf16Length) && ((cIdx + bIdx) < byteLimit) && ((c =
src.charAt(cIdx)) < 0x80);
- cIdx++, bCnt++) {
- unsafe.putByte(unsafeObj, bIdx + cIdx, (byte) c);
- }
- //encountered a non-ascii character
- if (cIdx == utf16Length) { //done.
- // next relative byte index in memory is (bIdx + utf16Length) -
cumBaseOffset.
- return bCnt;
- }
- bIdx += cIdx; //bytes == characters for ascii
-
- for (char c; cIdx < utf16Length; cIdx++) { //process the remaining
characters
- c = src.charAt(cIdx);
-
- if ((c < 0x80) && (bIdx < byteLimit)) {
- //Encode ASCII, 0 through 0x007F.
- unsafe.putByte(unsafeObj, bIdx++, (byte) c);
- bCnt++;
- }
-
- else
- //c MUST BE >= 0x0080 || j >= byteLimit
-
- if ((c < 0x800) && (bIdx < (byteLimit - 1))) {
- //Encode 0x80 through 0x7FF.
- //This is for almost all Latin-script alphabets plus Greek, Cyrillic,
Hebrew, Arabic, etc.
- //We must have target space for at least 2 Utf8 bytes.
- unsafe.putByte(unsafeObj, bIdx++, (byte) ((0xF << 6) | (c >>> 6)));
- unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & c)));
- bCnt += 2;
- }
-
- else
- //c > 0x800 || j >= byteLimit - 1 || j >= byteLimit
-
- if ( !isSurrogate(c) && (bIdx < (byteLimit - 2)) ) {
- //Encode the remainder of the BMP that are not surrogates:
- // 0x0800 thru 0xD7FF; 0xE000 thru 0xFFFF, the max single-char code
point
- //We must have target space for at least 3 Utf8 bytes.
- unsafe.putByte(unsafeObj, bIdx++, (byte) ((0xF << 5) | (c >>> 12)));
- unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & (c >>> 6))));
- unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & c)));
- bCnt += 3;
- }
-
- else {
- //c is a surrogate || j >= byteLimit - 2 || j >= byteLimit - 1 || j >=
byteLimit
-
- //At this point we are either:
- // 1) Attempting to encode Code Points outside the BMP.
- //
- // The only way to properly encode code points outside the BMP into
Utf8 bytes is to use
- // High/Low pairs of surrogate characters. Therefore, we must have
at least 2 source
- // characters remaining, at least 4 bytes of memory space
remaining, and the next 2
- // characters must be a valid surrogate pair.
- //
- // 2) There is insufficient MemoryImpl space to encode the current
character from one of the
- // ifs above.
- //
- // We proceed assuming (1). If the following test fails, we move to an
exception.
-
- final char low;
- if ( (cIdx <= (utf16Length - 2))
- && (bIdx <= (byteLimit - 4))
- && isSurrogatePair(c, low = src.charAt(cIdx + 1)) ) { //we are good
- cIdx++; //skip over low surrogate
- final int codePoint = toCodePoint(c, low);
- unsafe.putByte(unsafeObj, bIdx++, (byte) ((0xF << 4) | (codePoint
>>> 18)));
- unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & (codePoint
>>> 12))));
- unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F & (codePoint
>>> 6))));
- unsafe.putByte(unsafeObj, bIdx++, (byte) (0x80 | (0x3F &
codePoint)));
- bCnt += 4;
- }
-
- else {
- //We are going to throw an exception. So we have time to figure out
- // what was wrong and hopefully throw an intelligent message!
-
- //check the BMP code point cases and their required memory limits
- if ( ((c < 0X0080) && (bIdx >= byteLimit))
- || ((c < 0x0800) && (bIdx >= (byteLimit - 1)))
- || ((c < 0xFFFF) && (bIdx >= (byteLimit - 2))) ) {
- throw Utf8CodingException.outOfMemory();
- }
-
- if (cIdx > (utf16Length - 2)) { //the last char is an unpaired
surrogate
- throw Utf8CodingException.unpairedSurrogate(c);
- }
-
- if (bIdx > (byteLimit - 4)) {
- //4 MemoryImpl bytes required to encode a surrogate pair.
- final int remaining = (int) ((bIdx - byteLimit) + 4L);
- throw Utf8CodingException.shortUtf8EncodeByteLength(remaining);
- }
-
- if (!isSurrogatePair(c, src.charAt(cIdx + 1)) ) {
- //Not a surrogate pair.
- throw Utf8CodingException.illegalSurrogatePair(c, src.charAt(cIdx
+ 1));
- }
-
- //This should not happen :)
- throw new IllegalArgumentException("Unknown Utf8 encoding
exception");
- }
- }
- }
- //final long localOffsetBytes = bIdx - cumBaseOffset;
- return bCnt;
- }
-
- /*****************/
- /**
- * Utility methods for decoding UTF-8 bytes into {@link String}. Callers are
responsible for
- * extracting bytes (possibly using Unsafe methods), and checking remaining
bytes. All other
- * UTF-8 validity checks and codepoint conversions happen in this class.
- *
- * @see <a href="https://en.wikipedia.org/wiki/UTF-8">Wikipedia: UTF-8</a>
- */
- private static class DecodeUtil {
-
- /**
- * Returns whether this is a single-byte UTF-8 encoding.
- * This is for ASCII.
- *
- * <p>Code Plane 0, Code Point range U+0000 to U+007F.
- *
- * <p>Bit Patterns:
- * <ul><li>Byte 1: '0xxxxxxx'<li>
- * </ul>
- * @param b the byte being tested
- * @return true if this is a single-byte UTF-8 encoding, i.e., b is ≥ 0.
- */
- static boolean isOneByte(final byte b) {
- return b >= 0;
- }
-
- /**
- * Returns whether this is the start of a two-byte UTF-8 encoding.
One-byte encoding must
- * already be excluded.
- * This is for almost all Latin-script alphabets plus Greek, Cyrillic,
Hebrew, Arabic, etc.
- *
- * <p>Code Plane 0, Code Point range U+0080 to U+07FF.
- *
- * <p>Bit Patterns:
- * <ul><li>Byte 1: '110xxxxx'</li>
- * <li>Byte 2: '10xxxxxx'</li>
- * </ul>
- *
- * <p>All bytes must be < 0xE0.
- *
- * @param b the byte being tested
- * @return true if this is the start of a two-byte UTF-8 encoding.
- */
- static boolean isTwoBytes(final byte b) {
- return b < (byte) 0xE0;
- }
-
- /**
- * Returns whether this is the start of a three-byte UTF-8 encoding.
Two-byte encoding must
- * already be excluded.
- * This is for the rest of the BMP, which includes most common Chinese,
Japanese and Korean
- * characters.
- *
- * <p>Code Plane 0, Code Point range U+0800 to U+FFFF.
- *
- * <p>Bit Patterns:
- * <ul><li>Byte 1: '1110xxxx'</li>
- * <li>Byte 2: '10xxxxxx'</li>
- * <li>Byte 3: '10xxxxxx'</li>
- * </ul>
- * All bytes must be less than 0xF0.
- *
- * @param b the byte being tested
- * @return true if this is the start of a three-byte UTF-8 encoding, i.e.,
b ≥ 0XF0.
- */
- static boolean isThreeBytes(final byte b) {
- return b < (byte) 0xF0;
- }
-
- /*
- * Note that if three-byte UTF-8 coding has been excluded and if the
current byte is
- * ≥ 0XF0, it must be the start of a four-byte UTF-8 encoding.
- * This is for the less common CJKV characters, historic scripts, math
symbols, emoji, etc.
- *
- * <p>Code Plane 1 through 16, Code Point range U+10000 to U+10FFFF.
- *
- * <p>Bit Patterns:
- * <ul><li>Byte 1: '11110xxx'</li>
- * <li>Byte 2: '10xxxxxx'</li>
- * <li>Byte 3: '10xxxxxx'</li>
- * <li>Byte 4: '10xxxxxx'</li>
- * </ul>
- */
-
- static void handleTwoBytes(
- final byte byte1, final byte byte2,
- final Appendable dst)
- throws IOException, Utf8CodingException {
- // Simultaneously checks for illegal trailing-byte in leading position
(<= '11000000') and
- // overlong 2-byte, '11000001'.
- if ((byte1 < (byte) 0xC2)
- || isNotTrailingByte(byte2)) {
- final byte[] out = new byte[] {byte1, byte2};
- throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
- }
- dst.append((char) (((byte1 & 0x1F) << 6) | trailingByteValue(byte2)));
- }
-
- static void handleTwoBytesCharBuffer(
- final byte byte1, final byte byte2,
- final CharBuffer cb, final char[] ca, final int cp)
- throws Utf8CodingException {
- // Simultaneously checks for illegal trailing-byte in leading position
(<= '11000000') and
- // overlong 2-byte, '11000001'.
- if ((byte1 < (byte) 0xC2)
- || isNotTrailingByte(byte2)) {
- final byte[] out = new byte[] {byte1, byte2};
- cb.position(cp - cb.arrayOffset());
- throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
- }
- ca[cp] = (char) (((byte1 & 0x1F) << 6) | trailingByteValue(byte2));
- }
-
- static void handleThreeBytes(
- final byte byte1, final byte byte2, final byte byte3,
- final Appendable dst)
- throws IOException, Utf8CodingException {
- if (isNotTrailingByte(byte2)
- // overlong? 5 most significant bits must not all be zero
- || ((byte1 == (byte) 0xE0) && (byte2 < (byte) 0xA0))
- // check for illegal surrogate codepoints
- || ((byte1 == (byte) 0xED) && (byte2 >= (byte) 0xA0))
- || isNotTrailingByte(byte3)) {
- final byte[] out = new byte[] {byte1, byte2, byte3};
- throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
- }
- dst.append((char)
- (((byte1 & 0x0F) << 12) | (trailingByteValue(byte2) << 6) |
trailingByteValue(byte3)));
- }
-
- static void handleThreeBytesCharBuffer(
- final byte byte1, final byte byte2, final byte byte3,
- final CharBuffer cb, final char[] ca, final int cp)
- throws Utf8CodingException {
- if (isNotTrailingByte(byte2)
- // overlong? 5 most significant bits must not all be zero
- || ((byte1 == (byte) 0xE0) && (byte2 < (byte) 0xA0))
- // check for illegal surrogate codepoints
- || ((byte1 == (byte) 0xED) && (byte2 >= (byte) 0xA0))
- || isNotTrailingByte(byte3)) {
- cb.position(cp - cb.arrayOffset());
- final byte[] out = new byte[] {byte1, byte2, byte3};
- throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
- }
- ca[cp] = (char)
- (((byte1 & 0x0F) << 12) | (trailingByteValue(byte2) << 6) |
trailingByteValue(byte3));
- }
-
- static void handleFourBytes(
- final byte byte1, final byte byte2, final byte byte3, final byte byte4,
- final Appendable dst)
- throws IOException, Utf8CodingException {
- if (isNotTrailingByte(byte2)
- // Check that 1 <= plane <= 16. Tricky optimized form of:
- // valid 4-byte leading byte?
- // if (byte1 > (byte) 0xF4 ||
- // overlong? 4 most significant bits must not all be zero
- // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
- // codepoint larger than the highest code point (U+10FFFF)?
- // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
- || ((((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0)
- || isNotTrailingByte(byte3)
- || isNotTrailingByte(byte4)) {
- final byte[] out = new byte[] { byte1, byte2, byte3, byte4 };
- throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
- }
- final int codepoint = ((byte1 & 0x07) << 18)
- | (trailingByteValue(byte2) << 12)
- | (trailingByteValue(byte3) << 6)
- | trailingByteValue(byte4);
- dst.append(DecodeUtil.highSurrogate(codepoint));
- dst.append(DecodeUtil.lowSurrogate(codepoint));
- }
-
- static void handleFourBytesCharBuffer(
- final byte byte1, final byte byte2, final byte byte3, final byte byte4,
- final CharBuffer cb, final char[] ca, final int cp)
- throws Utf8CodingException {
- if (isNotTrailingByte(byte2)
- // Check that 1 <= plane <= 16. Tricky optimized form of:
- // valid 4-byte leading byte?
- // if (byte1 > (byte) 0xF4 ||
- // overlong? 4 most significant bits must not all be zero
- // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
- // codepoint larger than the highest code point (U+10FFFF)?
- // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
- || ((((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0)
- || isNotTrailingByte(byte3)
- || isNotTrailingByte(byte4)) {
- cb.position(cp - cb.arrayOffset());
- final byte[] out = new byte[] { byte1, byte2, byte3, byte4 };
- throw Utf8CodingException.illegalUtf8DecodeByteSequence(out);
- }
- final int codepoint = ((byte1 & 0x07) << 18)
- | (trailingByteValue(byte2) << 12)
- | (trailingByteValue(byte3) << 6)
- | trailingByteValue(byte4);
- ca[cp] = DecodeUtil.highSurrogate(codepoint);
- ca[cp + 1] = DecodeUtil.lowSurrogate(codepoint);
- }
-
- /*
- * Returns whether the byte is not a valid continuation of the form
'10XXXXXX'.
- */
- private static boolean isNotTrailingByte(final byte b) {
- return b > (byte) 0xBF;
- }
-
- /*
- * Returns the actual value of the trailing byte (removes the prefix '10')
for composition.
- */
- private static int trailingByteValue(final byte b) {
- return b & 0x3F;
- }
-
- private static char highSurrogate(final int codePoint) {
- return (char)
- ((Character.MIN_HIGH_SURROGATE
- - (Character.MIN_SUPPLEMENTARY_CODE_POINT >>> 10))
- + (codePoint >>> 10));
- }
-
- private static char lowSurrogate(final int codePoint) {
- return (char) (Character.MIN_LOW_SURROGATE + (codePoint & 0x3ff));
- }
- }
-
-}
diff --git
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/Buffer2Test.java
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/Buffer2Test.java
index a45537c..21a97b7 100644
---
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/Buffer2Test.java
+++
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/Buffer2Test.java
@@ -35,7 +35,6 @@ import org.apache.datasketches.memory.WritableBuffer;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.annotations.Test;
-@SuppressWarnings("deprecation")
public class Buffer2Test {
@Test
diff --git
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableBufferImplTest.java
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableBufferImplTest.java
index 06832d8..ec4402a 100644
---
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableBufferImplTest.java
+++
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableBufferImplTest.java
@@ -39,7 +39,6 @@ import org.apache.datasketches.memory.WritableMemory;
import org.testng.Assert;
import org.testng.annotations.Test;
-@SuppressWarnings("deprecation")
public class NativeWritableBufferImplTest {
//Simple Native direct
diff --git
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableMemoryImplTest.java
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableMemoryImplTest.java
index 506094a..9d8da97 100644
---
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableMemoryImplTest.java
+++
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/NativeWritableMemoryImplTest.java
@@ -38,7 +38,6 @@ import org.apache.datasketches.memory.WritableHandle;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.annotations.Test;
-@SuppressWarnings("deprecation")
public class NativeWritableMemoryImplTest {
//Simple Native direct
diff --git
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/ResourceTest.java
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/ResourceTest.java
index 564bbd9..fc2f506 100644
---
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/ResourceTest.java
+++
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/ResourceTest.java
@@ -35,7 +35,6 @@ import org.apache.datasketches.memory.WritableBuffer;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.annotations.Test;
-@SuppressWarnings("deprecation")
public class ResourceTest {
@Test
diff --git
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/Utf8Test.java
b/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/Utf8Test.java
deleted file mode 100644
index 98d5ee5..0000000
---
a/datasketches-memory-java8/src/test/java/org/apache/datasketches/memory/internal/Utf8Test.java
+++ /dev/null
@@ -1,517 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.memory.internal;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.fail;
-
-import java.io.IOException;
-import java.nio.CharBuffer;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.datasketches.memory.BoundsException;
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.Utf8CodingException;
-import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.memory.internal.Util.RandomCodePoints;
-import org.testng.annotations.Test;
-
-import com.google.protobuf.ByteString;
-
-/**
- * Adapted version of
- *
https://github.com/protocolbuffers/protobuf/blob/master/java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java
- *
- * Copyright 2008 Google Inc. All rights reserved.
- * https://developers.google.com/protocol-buffers/
- * See LICENSE.
- */
-public class Utf8Test {
-
- @Test
- public void testRoundTripAllValidCodePoints() throws IOException { //the
non-surrogate code pts
- for (int cp = Character.MIN_CODE_POINT; cp < Character.MAX_CODE_POINT;
cp++) {
- if (!isSurrogateCodePoint(cp)) {
- String refStr = new String(Character.toChars(cp));
- assertRoundTrips(refStr);
- }
- }
- }
-
- @Test
- public void testPutInvalidChars() { //The surrogates must be a pair, thus
invalid alone
- WritableMemory mem = WritableMemory.allocate(10);
- WritableMemory emptyMem = WritableMemory.allocate(0);
- for (int c = Character.MIN_SURROGATE; c <= Character.MAX_SURROGATE; c++) {
- confirmSurrogate(mem, (char) c);
- confirmSurrogate(emptyMem, (char) c);
- }
- }
-
- private static void confirmSurrogate(WritableMemory mem, char c) {
- try {
- mem.putCharsToUtf8(0, new String(new char[] {c}));
- fail();
- } catch (Utf8CodingException e) {
- // Expected.
- }
- }
-
- @Test
- public void testPutInvaidSurrogatePairs() {
- WritableMemory mem = WritableMemory.allocate(4);
- StringBuilder sb = new StringBuilder();
- sb.append(Character.MIN_HIGH_SURROGATE);
- sb.append(Character.MAX_HIGH_SURROGATE);
- try {
- mem.putCharsToUtf8(0, sb);
- } catch (Utf8CodingException e) {
- //Expected;
- }
- }
-
- @Test
- public void testPutHighBMP() {
- WritableMemory mem = WritableMemory.allocate(2);
- StringBuilder sb = new StringBuilder();
- sb.append("\uE000");
- try {
- mem.putCharsToUtf8(0, sb);
- } catch (Utf8CodingException e) {
- //Expected;
- }
- }
-
- @Test
- public void testPutExtendedAscii() {
- WritableMemory mem = WritableMemory.allocate(1);
- StringBuilder sb = new StringBuilder();
- sb.append("\u07FF");
- try {
- mem.putCharsToUtf8(0, sb);
- } catch (Utf8CodingException e) {
- //Expected;
- }
- }
-
- @Test
- public void testPutOneAsciiToEmpty() {
- WritableMemory mem = WritableMemory.allocate(0);
- StringBuilder sb = new StringBuilder();
- sb.append("a");
- try {
- mem.putCharsToUtf8(0, sb);
- } catch (Utf8CodingException e) {
- //Expected;
- }
- }
-
- @Test
- public void testPutValidSurrogatePair() {
- WritableMemory mem = WritableMemory.allocate(4);
- StringBuilder sb = new StringBuilder();
- sb.append(Character.MIN_HIGH_SURROGATE);
- sb.append(Character.MIN_LOW_SURROGATE);
- mem.putCharsToUtf8(0, sb);
- }
-
- // Test all 1, 2, 3 invalid byte combinations. Valid ones would have been
covered above.
-
- @Test
- public void testOneByte() {
- int valid = 0;
- for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
- ByteString bs = ByteString.copyFrom(new byte[] {(byte) i });
- if (!bs.isValidUtf8()) { //from -128 to -1
- assertInvalid(bs.toByteArray());
- } else {
- valid++; //from 0 to 127
- }
- }
- assertEquals(IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT,
valid);
- }
-
- @Test
- public void testTwoBytes() {
- int valid = 0;
- for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
- for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
- ByteString bs = ByteString.copyFrom(new byte[]{(byte) i, (byte) j});
- if (!bs.isValidUtf8()) {
- assertInvalid(bs.toByteArray());
- } else {
- valid++;
- }
- }
- }
- assertEquals(IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT,
valid);
- }
-
- //@Test
- //This test is very long, and doesn't cover the 4-byte combinations.
- // This is replaced by the test following which does cover some 4-byte
combinations.
- public void testThreeBytes() {
- // Travis' OOM killer doesn't like this test
- if (System.getenv("TRAVIS") == null) {
- int count = 0;
- int valid = 0;
- for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
- for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
- for (int k = Byte.MIN_VALUE; k <= Byte.MAX_VALUE; k++) {
- byte[] bytes = new byte[]{(byte) i, (byte) j, (byte) k};
- ByteString bs = ByteString.copyFrom(bytes);
- if (!bs.isValidUtf8()) {
- assertInvalid(bytes);
- } else {
- valid++;
- }
- count++;
- if ((count % 1000000L) == 0) {
- println("Processed " + (count / 1000000L) + " million
characters");
- }
- }
- }
- }
-
assertEquals(IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT,
valid);
- }
- }
-
- /* These code points can be used by the following test to customize
different regions of the
- * Code Point space. This randomized test can replace the exhaustive
- * combinatorially explosive previous test, which doesn't cover the 4 byte
combinations.
- */
- static final int min1ByteCP = 0; //ASCII
- static final int min2ByteCP = 0X000080;
- static final int min3ByteCP = 0X000800;
- static final int min4ByteCP = Character.MIN_SUPPLEMENTARY_CODE_POINT;
//0X010000;
- static final int minPlane2CP = 0X020000;
- static final int maxCodePoint = Character.MAX_CODE_POINT;
//0X10FFFF
- static final int minSurr = Character.MIN_SURROGATE;
//0X00D800;
- static final int maxSurr = Character.MAX_SURROGATE;
//0X00E000;
-
- @Test
- //randomly selects CP from a range that include 1, 2, 3 and 4 byte encodings.
- // with 50% coming from plane 0 and 50% coming from plane 1.
- public void checkRandomValidCodePoints() {
- RandomCodePoints rcp = new RandomCodePoints(true);
- int numCP = 1000;
- int[] cpArr = new int[numCP];
- rcp.fillCodePointArray(cpArr, 0, minPlane2CP);
- String rcpStr = new String(cpArr, 0, numCP);
- //println(rcpStr);
- WritableMemory wmem = WritableMemory.allocate(4 * numCP);
- int utf8Bytes = (int) wmem.putCharsToUtf8(0, rcpStr);
-
- StringBuilder sb = new StringBuilder();
- try {
- wmem.getCharsFromUtf8(0L, utf8Bytes, (Appendable) sb);
- } catch (IOException | Utf8CodingException e) {
- throw new RuntimeException(e);
- }
- checkStrings(sb.toString(), rcpStr);
-
- CharBuffer cb = CharBuffer.allocate(rcpStr.length());
- try {
- wmem.getCharsFromUtf8(0L, utf8Bytes, cb);
- } catch (IOException | Utf8CodingException e) {
- throw new RuntimeException(e);
- }
- String cbStr = sb.toString();
- assertEquals(cbStr.length(), rcpStr.length());
- checkStrings(cbStr, rcpStr);
- }
-
- @Test
- public void checkRandomValidCodePoints2() {
- //checks the non-deterministic constructor
- @SuppressWarnings("unused")
- RandomCodePoints rcp = new RandomCodePoints(false);
- }
-
-
- /**
- * Tests that round tripping of a sample of four byte permutations work.
- */
- @Test
- public void testInvalid_4BytesSamples() {
- // Bad trailing bytes
- assertInvalid(0xF0, 0xA4, 0xAD, 0x7F);
- assertInvalid(0xF0, 0xA4, 0xAD, 0xC0);
-
- // Special cases for byte2
- assertInvalid(0xF0, 0x8F, 0xAD, 0xA2);
- assertInvalid(0xF4, 0x90, 0xAD, 0xA2);
- }
-
- @Test
- public void testRealStrings() throws IOException {
- // English
- assertRoundTrips("The quick brown fox jumps over the lazy dog");
- // German
- assertRoundTrips("Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens
cirkusklovnen");
- // Japanese
- assertRoundTrips(
-
"\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c\u308b\u3092");
- // Hebrew
- assertRoundTrips(
- "\u05d3\u05d2 \u05e1\u05e7\u05e8\u05df \u05e9\u05d8 \u05d1\u05d9\u05dd
"
- + "\u05de\u05d0\u05d5\u05db\u05d6\u05d1 \u05d5\u05dc\u05e4\u05ea\u05e2"
- + " \u05de\u05e6\u05d0 \u05dc\u05d5 \u05d7\u05d1\u05e8\u05d4 "
- + "\u05d0\u05d9\u05da \u05d4\u05e7\u05dc\u05d9\u05d8\u05d4");
- // Thai
- assertRoundTrips(
- " \u0e08\u0e07\u0e1d\u0e48\u0e32\u0e1f\u0e31\u0e19\u0e1e\u0e31\u0e12"
- + "\u0e19\u0e32\u0e27\u0e34\u0e0a\u0e32\u0e01\u0e32\u0e23");
- // Chinese
- assertRoundTrips(
-
"\u8fd4\u56de\u94fe\u4e2d\u7684\u4e0b\u4e00\u4e2a\u4ee3\u7406\u9879\u9009\u62e9\u5668");
- // Chinese with 4-byte chars
-
assertRoundTrips("\uD841\uDF0E\uD841\uDF31\uD841\uDF79\uD843\uDC53\uD843\uDC78"
- +
"\uD843\uDC96\uD843\uDCCF\uD843\uDCD5\uD843\uDD15\uD843\uDD7C\uD843\uDD7F"
- +
"\uD843\uDE0E\uD843\uDE0F\uD843\uDE77\uD843\uDE9D\uD843\uDEA2");
- // Mixed
- assertRoundTrips(
- "The quick brown
\u3044\u308d\u306f\u306b\u307b\u3078\u8fd4\u56de\u94fe"
- + "\u4e2d\u7684\u4e0b\u4e00");
- }
-
- @Test
- public void checkNonEmptyDestinationForDecode() {
- StringBuilder sb = new StringBuilder();
- sb.append("abc"); //current contents of destination
- int startChars = sb.toString().toCharArray().length;
- String refStr = "Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens
cirkusklovnen";
- byte[] refByteArr = refStr.getBytes(UTF_8);
- int addBytes = refByteArr.length;
- WritableMemory refMem = WritableMemory.writableWrap(refByteArr);
- int decodedChars = refMem.getCharsFromUtf8(0, addBytes, sb);
- String finalStr = sb.toString();
- int finalChars = finalStr.toCharArray().length;
- assertEquals(decodedChars + startChars, finalChars);
- println("Decoded chars: " + decodedChars);
- println("Final chars: " + finalChars);
- println(sb.toString());
- }
-
- @Test
- public void checkNonEmptyDestinationForEncode() {
- String refStr = "Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens
cirkusklovnen";
- byte[] refByteArr = refStr.getBytes(UTF_8);
- int refBytes = refByteArr.length;
- int offset = 100;
- WritableMemory tgtMem = WritableMemory.allocate(refBytes + offset);
- long bytesEncoded = tgtMem.putCharsToUtf8(offset, refStr);
- assertEquals(bytesEncoded, refBytes);
- }
-
- @Test
- public void testOverlong() {
- assertInvalid(0xc0, 0xaf);
- assertInvalid(0xe0, 0x80, 0xaf);
- assertInvalid(0xf0, 0x80, 0x80, 0xaf);
-
- // Max overlong
- assertInvalid(0xc1, 0xbf);
- assertInvalid(0xe0, 0x9f, 0xbf);
- assertInvalid(0xf0 ,0x8f, 0xbf, 0xbf);
-
- // null overlong
- assertInvalid(0xc0, 0x80);
- assertInvalid(0xe0, 0x80, 0x80);
- assertInvalid(0xf0, 0x80, 0x80, 0x80);
- }
-
- @Test
- public void testIllegalCodepoints() {
- // Single surrogate
- assertInvalid(0xed, 0xa0, 0x80);
- assertInvalid(0xed, 0xad, 0xbf);
- assertInvalid(0xed, 0xae, 0x80);
- assertInvalid(0xed, 0xaf, 0xbf);
- assertInvalid(0xed, 0xb0, 0x80);
- assertInvalid(0xed, 0xbe, 0x80);
- assertInvalid(0xed, 0xbf, 0xbf);
-
- // Paired surrogates
- assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80);
- assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf);
- assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80);
- assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf);
- assertInvalid(0xed, 0xae, 0x80, 0xed, 0xb0, 0x80);
- assertInvalid(0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf);
- assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80);
- assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf);
- }
-
- @Test
- public void testBufferSlice() throws IOException {
- String str = "The quick brown fox jumps over the lazy dog";
- assertRoundTrips(str, 4, 10, 4);
- assertRoundTrips(str, 0, str.length(), 0);
- }
-
- @Test
- public void testInvalidBufferSlice() { //these are pure Memory bounds
violations
- byte[] bytes = "The quick brown fox jumps over the lazy
dog".getBytes(UTF_8);
- assertInvalidSlice(bytes, bytes.length - 3, 4);
- assertInvalidSlice(bytes, bytes.length, 1);
- assertInvalidSlice(bytes, bytes.length + 1, 0);
- assertInvalidSlice(bytes, 0, bytes.length + 1);
- }
-
- private static void assertInvalid(int... bytesAsInt) { //invalid byte
sequences
- byte[] bytes = new byte[bytesAsInt.length];
- for (int i = 0; i < bytesAsInt.length; i++) {
- bytes[i] = (byte) bytesAsInt[i];
- }
- assertInvalid(bytes);
- }
-
- private static void assertInvalid(byte[] bytes) {
- int bytesLen = bytes.length;
- try {
- Memory.wrap(bytes).getCharsFromUtf8(0, bytesLen, new StringBuilder());
- fail();
- } catch (Utf8CodingException e) {
- // Expected.
- }
- try {
- CharBuffer cb = CharBuffer.allocate(bytesLen);
- Memory.wrap(bytes).getCharsFromUtf8(0, bytesLen, cb);
- fail();
- } catch (Utf8CodingException | IOException e) {
- // Expected.
- }
- }
-
- private static void assertInvalidSlice(byte[] bytes, int index, int size) {
- try {
- Memory mem = Memory.wrap(bytes);
- mem.getCharsFromUtf8(index, size, new StringBuilder());
- fail();
- } catch (BoundsException e) { //Pure bounds violation
- // Expected.
- }
- }
-
- /**
- * Performs round-trip test using the given reference string
- * @param refStr the reference string
- * @throws IOException
- */
- private static void assertRoundTrips(String refStr) throws IOException {
- assertRoundTrips(refStr, refStr.toCharArray().length, 0, -1);
- }
-
- /**
- * Performs round-trip test using the given reference string
- * @param refStr the reference string
- * @param refSubCharLen the number of characters expected to be decoded
- * @param offsetBytes starting utf8 byte offset
- * @param utf8LengthBytes length of utf8 bytes
- * @throws IOException
- */
- private static void assertRoundTrips(String refStr, int refSubCharLen, int
offsetBytes,
- int utf8LengthBytes) throws IOException {
- byte[] refByteArr = refStr.getBytes(UTF_8);
- if (utf8LengthBytes == -1) {
- utf8LengthBytes = refByteArr.length;
- }
- Memory refMem = Memory.wrap(refByteArr);
-
- byte[] refByteArr2 = new byte[refByteArr.length + 1];
- System.arraycopy(refByteArr, 0, refByteArr2, 1, refByteArr.length);
- Memory refReg = Memory.wrap(refByteArr2).region(1, refByteArr.length);
-
- WritableMemory dstMem = WritableMemory.allocate(refByteArr.length);
- WritableMemory dstMem2 =
- WritableMemory.allocate(refByteArr.length + 1).writableRegion(1,
refByteArr.length);
-
- // Test with Memory objects, where base offset != 0
- assertRoundTrips(refStr, refSubCharLen, offsetBytes, utf8LengthBytes,
refByteArr, refMem, dstMem);
- assertRoundTrips(refStr, refSubCharLen, offsetBytes, utf8LengthBytes,
refByteArr, refMem, dstMem2);
- assertRoundTrips(refStr, refSubCharLen, offsetBytes, utf8LengthBytes,
refByteArr, refReg, dstMem);
- assertRoundTrips(refStr, refSubCharLen, offsetBytes, utf8LengthBytes,
refByteArr, refReg, dstMem2);
- }
-
- private static void assertRoundTrips(String refStr, int refSubCharLen, int
offsetBytes,
- int utf8LengthBytes, byte[] refByteArr, Memory refMem, WritableMemory
dstMem)
- throws IOException {
- StringBuilder sb = new StringBuilder();
-
- int charPos = refMem.getCharsFromUtf8(offsetBytes, utf8LengthBytes, sb);
- checkStrings(sb.toString(), new String(refByteArr, offsetBytes,
utf8LengthBytes, UTF_8));
- assertEquals(charPos, refSubCharLen);
-
- CharBuffer cb = CharBuffer.allocate(refByteArr.length + 1);
- cb.position(1);
- // Make CharBuffer 1-based, to check correct offset handling
- cb = cb.slice();
- refMem.getCharsFromUtf8(offsetBytes, utf8LengthBytes, cb);
- cb.flip();
- checkStrings(cb.toString(), new String(refByteArr, offsetBytes,
utf8LengthBytes, UTF_8));
-
- long encodedUtf8Bytes = dstMem.putCharsToUtf8(0, refStr); //encodes entire
refStr
- assertEquals(encodedUtf8Bytes, refByteArr.length); //compares bytes length
- //compare the actual bytes encoded
- assertEquals(0, dstMem.compareTo(0, refByteArr.length, refMem, 0,
refByteArr.length));
-
- // Test write overflow
- WritableMemory writeMem2 = WritableMemory.allocate(refByteArr.length - 1);
- try {
- writeMem2.putCharsToUtf8(0, refStr);
- fail();
- } catch (Utf8CodingException e) {
- // Expected.
- }
- }
-
- private static boolean isSurrogateCodePoint(final int cp) {
- return (cp >= Character.MIN_SURROGATE) && (cp <= Character.MAX_SURROGATE);
- }
-
- private static void checkStrings(String actual, String expected) {
- if (!expected.equals(actual)) {
- fail("Failure: Expected (" + codepoints(expected) + ") Actual (" +
codepoints(actual) + ")");
- }
- }
-
- private static List<String> codepoints(String str) {
- List<String> codepoints = new ArrayList<>();
- for (int i = 0; i < str.length(); i++) {
- codepoints.add(Long.toHexString(str.charAt(i)));
- }
- return codepoints;
- }
-
- @Test
- public void printlnTest() {
- println("PRINTING: "+this.getClass().getName());
- }
-
- /**
- * @param s value to print
- */
- static void println(String s) {
- //System.out.println(s); //disable here
- }
-}
diff --git a/pom.xml b/pom.xml
index a15297e..24b4272 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,7 +102,7 @@ under the License.
<testng.version>7.5</testng.version>
<!-- System-wide properties -->
- <maven.version>3.5.0</maven.version>
+ <maven.version>3.8.6</maven.version>
<java.version>1.8</java.version>
<jdk-toolchain.version>8</jdk-toolchain.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
@@ -120,7 +120,7 @@ under the License.
<!-- Maven Plugins -->
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version> <!--
overrides parent -->
- <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version> <!--
overrides parent -->
+ <maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version> <!--
overrides parent -->
<maven-deploy-plugin.version>3.0.0-M1</maven-deploy-plugin.version> <!--
overrides parent -->
<maven-enforcer-plugin.version>3.0.0</maven-enforcer-plugin.version> <!--
overrides parent -->
<maven-jar-plugin.version>3.2.0</maven-jar-plugin.version> <!-- overrides
parent -->
@@ -179,13 +179,6 @@ under the License.
<version>${testng.version}</version>
<scope>test</scope>
</dependency>
- <dependency>
- <!-- Used for UTF8 testing -->
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- <version>${protobuf-java.version}</version>
- <scope>test</scope>
- </dependency>
<dependency>
<!-- Used for xxHash testing -->
<groupId>net.openhft</groupId>
@@ -380,7 +373,7 @@ under the License.
<configuration>
<toolchains>
<jdk>
- <version>[1.8,1.9),[8],[11,14),[17,18)</version>
+ <version>[1.8,1.9),[8],[11,12),[17,18)</version>
</jdk>
</toolchains>
</configuration>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]