This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 57a9eae2 feat(java): reduce metastring hashcode payload for small
string(<=16 bytes) (#1909)
57a9eae2 is described below
commit 57a9eae2bab89b7f8ab9ae900359d9374c3b697f
Author: Shawn Yang <[email protected]>
AuthorDate: Sun Oct 27 10:05:35 2024 +0800
feat(java): reduce metastring hashcode payload for small string(<=16 bytes)
(#1909)
## What does this PR do?
If a meta string is less than 16 bytes, we skip write hashcode to reduce
space cost. This will bring a big gain since most metastring-encoded
ClassName/EnumName are less than 16 bytes . And package names are much
less compared to classname, thus can save space by dict encoding
sharing.
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
// old size 391
// Benchmark (bufferType) (objectType) (references) Mode Cnt Score Error
Units
// UserTypeDeserializeSuite.fury_deserialize array MEDIA_CONTENT false
thrpt 100 2751601.402 ± 28811.825 ops/s
// new size: 377
// Benchmark (bufferType) (objectType) (references) Mode Cnt Score Error
Units
// UserTypeDeserializeSuite.fury_deserialize array MEDIA_CONTENT false
thrpt 100 2748329.241 ± 28163.821 ops/s
---
LICENSE | 1 +
go/fury/type.go | 29 ++--
.../org/apache/fury/collection/FuryObjectMap.java | 3 +-
.../org/apache/fury/collection/LongLongMap.java | 157 +++++++++++++++++++++
.../java/org/apache/fury/collection/LongMap.java | 4 +-
.../org/apache/fury/collection/ObjectIntMap.java | 4 +-
.../org/apache/fury/io/BlockedStreamUtils.java | 1 +
.../java/org/apache/fury/memory/LittleEndian.java | 7 +
.../java/org/apache/fury/memory/MemoryBuffer.java | 36 +++++
.../org/apache/fury/resolver/ClassResolver.java | 4 +
.../org/apache/fury/resolver/MetaStringBytes.java | 28 +++-
.../apache/fury/resolver/MetaStringResolver.java | 131 +++++++++++++----
java/fury-core/src/main/resources/META-INF/LICENSE | 1 +
.../fury-core/native-image.properties | 1 +
.../apache/fury/collection/LongLongMapTest.java | 45 ++++++
.../org/apache/fury/memory/MemoryBufferTest.java | 12 ++
.../fury/resolver/MetaStringResolverTest.java | 20 +++
licenserc.toml | 1 +
python/pyfury/_fury.py | 41 ++++--
python/pyfury/_serialization.pyx | 59 +++++---
python/pyfury/_serializer.py | 1 +
python/pyfury/_util.pxd | 2 +
python/pyfury/_util.pyx | 14 ++
23 files changed, 528 insertions(+), 74 deletions(-)
diff --git a/LICENSE b/LICENSE
index 7683b460..ce70ac93 100644
--- a/LICENSE
+++ b/LICENSE
@@ -267,6 +267,7 @@ The text of each license is also included in
licenses/LICENSE-[project].txt.
java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java
java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
+ java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/type/Generics.java
java/fury-core/src/test/java/org/apache/fury/type/GenericsTest.java
diff --git a/go/fury/type.go b/go/fury/type.go
index ac19d74f..98315991 100644
--- a/go/fury/type.go
+++ b/go/fury/type.go
@@ -19,6 +19,7 @@ package fury
import (
"fmt"
+ "github.com/apache/fury/go/fury/meta"
"hash/fnv"
"reflect"
"regexp"
@@ -136,6 +137,7 @@ const (
NotSupportCrossLanguage = 0
useStringValue = 0
useStringId = 1
+ SMALL_STRING_THRESHOLD = 16
)
var (
@@ -551,14 +553,19 @@ func (r *typeResolver) writeMetaString(buffer
*ByteBuffer, str string) error {
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
r.dynamicStringToId[str] = dynamicStringId
- buffer.WriteVarInt32(int32(len(str) << 1))
- // TODO this hash should be unique, since we don't compare data
equality for performance
- h := fnv.New64a()
- if _, err := h.Write([]byte(str)); err != nil {
- return err
+ length := len(str)
+ buffer.WriteVarInt32(int32(length << 1))
+ if length <= SMALL_STRING_THRESHOLD {
+ buffer.WriteByte_(uint8(meta.UTF_8))
+ } else {
+ // TODO this hash should be unique, since we don't
compare data equality for performance
+ h := fnv.New64a()
+ if _, err := h.Write([]byte(str)); err != nil {
+ return err
+ }
+ hash := int64(h.Sum64() & 0xffffffffffffff00)
+ buffer.WriteInt64(hash)
}
- hash := int64(h.Sum64() & 0xffffffffffffff00)
- buffer.WriteInt64(hash)
if len(str) > MaxInt16 {
return fmt.Errorf("too long string: %s", str)
}
@@ -573,8 +580,12 @@ func (r *typeResolver) readMetaString(buffer *ByteBuffer)
(string, error) {
header := buffer.ReadVarInt32()
var length = int(header >> 1)
if header&0b1 == 0 {
- // TODO support use computed hash
- buffer.ReadInt64()
+ if length <= SMALL_STRING_THRESHOLD {
+ buffer.ReadByte_()
+ } else {
+ // TODO support use computed hash
+ buffer.ReadInt64()
+ }
str := string(buffer.ReadBinary(length))
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
diff --git
a/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java
b/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java
index 528fc643..72804235 100644
--- a/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java
@@ -51,6 +51,7 @@ import org.apache.fury.util.Preconditions;
*/
@SuppressWarnings("unchecked")
public class FuryObjectMap<K, V> {
+ static final long MASK_NUMBER = 0x9E3779B97F4A7C15L;
static final Object dummy = new Object();
public int size;
@@ -135,7 +136,7 @@ public class FuryObjectMap<K, V> {
* {@code return item.hashCode() & mask;}
*/
protected int place(K item) {
- return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
+ return (int) (item.hashCode() * MASK_NUMBER >>> shift);
}
/**
diff --git
a/java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
b/java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
new file mode 100644
index 00000000..0e94ef9e
--- /dev/null
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
@@ -0,0 +1,157 @@
+/* Copyright (c) 2008-2023, Nathan Sweet
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided with the
distribution.
+ * - Neither the name of Esoteric Software nor the names of its contributors
may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+package org.apache.fury.collection;
+
+import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;
+
+import org.apache.fury.annotation.Internal;
+import org.apache.fury.util.Preconditions;
+
+/**
+ * A fast linear hash probe based map whose key is two long values `(long k1,
long k2)`. This map
+ * can avoid creating a java object for key to save memory/cpu cost.
+ */
+// The linear probed hash is derived from
+//
https://github.com/EsotericSoftware/kryo/blob/135df69526615bb3f6b34846e58ba3fec3b631c3/src/com/esotericsoftware/kryo/util/IntMap.java.
+@SuppressWarnings("unchecked")
+@Internal
+public final class LongLongMap<V> {
+ private static final class LongLongKey {
+ private final long k1;
+
+ public LongLongKey(long k1, long k2) {
+ this.k1 = k1;
+ this.k2 = k2;
+ }
+
+ private final long k2;
+
+ @Override
+ public String toString() {
+ return "LongLongKey{" + "k1=" + k1 + ", k2=" + k2 + '}';
+ }
+ }
+
+ public int size;
+ LongLongKey[] keyTable;
+ V[] valueTable;
+ private final float loadFactor;
+ private int threshold;
+
+ private int shift;
+
+ private int mask;
+
+ /**
+ * Creates a new map with the specified initial capacity and load factor.
This map will hold
+ * initialCapacity items before growing the backing table.
+ *
+ * @param initialCapacity If not a power of two, it is increased to the next
nearest power of two.
+ */
+ public LongLongMap(int initialCapacity, float loadFactor) {
+ Preconditions.checkArgument(
+ 0 <= loadFactor && loadFactor <= 1, "loadFactor %s must be > 0 and <
1", loadFactor);
+ this.loadFactor = loadFactor;
+ int tableSize = FuryObjectMap.tableSize(initialCapacity, loadFactor);
+ threshold = (int) (tableSize * loadFactor);
+ mask = tableSize - 1;
+ shift = Long.numberOfLeadingZeros(mask);
+ keyTable = new LongLongKey[tableSize];
+ valueTable = (V[]) new Object[tableSize];
+ }
+
+ private int place(long k1, long k2) {
+ return (int) ((k1 * 31 + k2) * MASK_NUMBER >>> shift);
+ }
+
+ /**
+ * Returns the index of the key if already present, else -(index + 1) for
the next empty index.
+ * This can be overridden in this pacakge to compare for equality
differently than {@link
+ * Object#equals(Object)}.
+ */
+ private int locateKey(long k1, long k2) {
+ LongLongKey[] keyTable = this.keyTable;
+ int mask = this.mask;
+ for (int i = place(k1, k2); ; i = i + 1 & mask) {
+ LongLongKey other = keyTable[i];
+ if (other == null) {
+ return -(i + 1); // Empty space is available.
+ }
+ if (other.k1 == k1 && other.k2 == k2) {
+ return i; // Same key was found.
+ }
+ }
+ }
+
+ public V put(long k1, long k2, V value) {
+ int i = locateKey(k1, k2);
+ if (i >= 0) { // Existing key was found.
+ V[] valueTable = this.valueTable;
+ V oldValue = valueTable[i];
+ valueTable[i] = value;
+ return oldValue;
+ }
+ i = -(i + 1); // Empty space was found.
+ keyTable[i] = new LongLongKey(k1, k2);
+ valueTable[i] = value;
+ if (++size >= threshold) {
+ resize(keyTable.length << 1);
+ }
+ return null;
+ }
+
+ public V get(long k1, long k2) {
+ LongLongKey[] keyTable = this.keyTable;
+ for (int i = place(k1, k2); ; i = i + 1 & mask) {
+ LongLongKey other = keyTable[i];
+ if (other == null) {
+ return null;
+ }
+ if (other.k1 == k1 && other.k2 == k2) {
+ return valueTable[i];
+ }
+ }
+ }
+
+ private void resize(int newSize) {
+ int oldCapacity = keyTable.length;
+ threshold = (int) (newSize * loadFactor);
+ mask = newSize - 1;
+ shift = Long.numberOfLeadingZeros(mask);
+ LongLongKey[] oldKeyTable = keyTable;
+ V[] oldValueTable = valueTable;
+ keyTable = new LongLongKey[newSize];
+ valueTable = (V[]) new Object[newSize];
+ if (size > 0) {
+ for (int i = 0; i < oldCapacity; i++) {
+ LongLongKey key = oldKeyTable[i];
+ if (key != null) {
+ for (int j = place(key.k1, key.k2); ; j = (j + 1) & mask) {
+ if (keyTable[j] == null) {
+ keyTable[j] = new LongLongKey(key.k1, key.k2);
+ valueTable[j] = oldValueTable[i];
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git
a/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
b/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
index c7995566..fb13dba8 100644
--- a/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
@@ -19,6 +19,8 @@
package org.apache.fury.collection;
+import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;
+
import java.util.Arrays;
// Derived from
@@ -141,7 +143,7 @@ public class LongMap<V> {
* {@code return item.hashCode() & mask;}
*/
protected int place(long item) {
- return (int) (item * 0x9E3779B97F4A7C15L >>> shift);
+ return (int) (item * MASK_NUMBER >>> shift);
}
/**
diff --git
a/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
b/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
index 5c8ebab4..bdc8a178 100644
--- a/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
@@ -19,6 +19,8 @@
package org.apache.fury.collection;
+import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;
+
import java.util.HashMap;
import java.util.Map;
import java.util.function.BiConsumer;
@@ -59,7 +61,7 @@ public class ObjectIntMap<K> {
}
protected int place(K item) {
- return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
+ return (int) (item.hashCode() * MASK_NUMBER >>> shift);
}
int locateKey(K key) {
diff --git
a/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java
b/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java
index 961b65f0..c03707bd 100644
--- a/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java
+++ b/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java
@@ -96,6 +96,7 @@ public class BlockedStreamUtils {
Fury fury, ReadableByteChannel channel, Function<MemoryBuffer, Object>
action) {
try {
MemoryBuffer buf = fury.getBuffer();
+ buf.readerIndex(0);
ByteBuffer byteBuffer = ByteBuffer.allocate(4);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
readByteBuffer(channel, byteBuffer, 4);
diff --git
a/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java
b/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java
index d519fdf9..b0eb8754 100644
--- a/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java
+++ b/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java
@@ -72,6 +72,13 @@ public class LittleEndian {
return Platform.IS_LITTLE_ENDIAN ? v : Long.reverseBytes(v);
}
+ public static void putInt64(byte[] o, int index, long value) {
+ if (!Platform.IS_LITTLE_ENDIAN) {
+ value = Long.reverseBytes(value);
+ }
+ Platform.putLong(o, Platform.BYTE_ARRAY_OFFSET + index, value);
+ }
+
public static void putFloat32(Object o, long pos, float value) {
int v = Float.floatToRawIntBits(value);
if (!Platform.IS_LITTLE_ENDIAN) {
diff --git
a/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
b/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
index 87b56e6e..9bfa7efe 100644
--- a/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
+++ b/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
@@ -2174,6 +2174,42 @@ public final class MemoryBuffer {
readBytes(dst, 0, dst.length);
}
+ /** Read {@code len} bytes into a long using little-endian order. */
+ public long readBytesAsInt64(int len) {
+ int readerIdx = readerIndex;
+ // use subtract to avoid overflow
+ int remaining = size - readerIdx;
+ if (remaining >= 8) {
+ readerIndex = readerIdx + len;
+ long v =
+ UNSAFE.getLong(heapMemory, address + readerIdx)
+ & (0xffffffffffffffffL >>> ((8 - len) * 8));
+ return LITTLE_ENDIAN ? v : Long.reverseBytes(v);
+ }
+ return slowReadBytesAsInt64(remaining, len);
+ }
+
+ private long slowReadBytesAsInt64(int remaining, int len) {
+ if (remaining < len) {
+ streamReader.fillBuffer(len - remaining);
+ }
+ int readerIdx = readerIndex;
+ readerIndex = readerIdx + len;
+ long result = 0;
+ byte[] heapMemory = this.heapMemory;
+ if (heapMemory != null) {
+ for (int i = 0, start = heapOffset + readerIdx; i < len; i++) {
+ result |= (((long) heapMemory[start + i]) & 0xff) << (i * 8);
+ }
+ } else {
+ long start = address + readerIdx;
+ for (int i = 0; i < len; i++) {
+ result |= ((long) UNSAFE.getByte(null, start + i) & 0xff) << (i * 8);
+ }
+ }
+ return result;
+ }
+
public int read(ByteBuffer dst) {
int readerIdx = readerIndex;
int len = dst.remaining();
diff --git
a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java
b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java
index 5ca4ae70..dbaf4bb4 100644
--- a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java
+++ b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java
@@ -1619,6 +1619,8 @@ public class ClassResolver {
if (classInfo.classId != NO_CLASS_ID) {
buffer.writeVarUint32(classInfo.classId << 1);
} else {
+ // let the lowermost bit of next byte be set, so the deserialization can
know
+ // whether need to read class by name in advance
metaStringResolver.writeMetaStringBytesWithFlag(buffer,
classInfo.packageNameBytes);
metaStringResolver.writeMetaStringBytes(buffer,
classInfo.classNameBytes);
}
@@ -1634,6 +1636,8 @@ public class ClassResolver {
int header = buffer.readVarUint32Small14();
final ClassInfo classInfo;
if ((header & 0b1) != 0) {
+ // let the lowermost bit of next byte be set, so the deserialization can
know
+ // whether need to read class by name in advance
MetaStringBytes packageBytes =
metaStringResolver.readMetaStringBytesWithFlag(buffer, header);
MetaStringBytes simpleClassNameBytes =
metaStringResolver.readMetaStringBytes(buffer);
classInfo = loadBytesToClassInfo(packageBytes, simpleClassNameBytes);
diff --git
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
index 56301645..1612bcaf 100644
--- a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
+++ b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
@@ -21,6 +21,8 @@ package org.apache.fury.resolver;
import java.util.Arrays;
import org.apache.fury.annotation.Internal;
+import org.apache.fury.memory.LittleEndian;
+import org.apache.fury.memory.Platform;
import org.apache.fury.meta.MetaString;
import org.apache.fury.meta.MetaStringDecoder;
import org.apache.fury.util.MurmurHash3;
@@ -32,6 +34,9 @@ public final class MetaStringBytes {
final byte[] bytes;
final long hashCode;
+ final MetaString.Encoding encoding;
+ final long first8Bytes;
+ final long second8Bytes;
short dynamicWriteStringId = DEFAULT_DYNAMIC_WRITE_STRING_ID;
/**
@@ -41,14 +46,23 @@ public final class MetaStringBytes {
* @param hashCode String hash code. This should be unique and has no hash
collision, and be
* deterministic, so we can use cache to reduce hash loop up for read.
*/
- public MetaStringBytes(byte[] bytes, long hashCode) {
+ MetaStringBytes(final byte[] bytes, long hashCode) {
assert hashCode != 0;
this.bytes = bytes;
this.hashCode = hashCode;
+ int header = (int) (hashCode & HEADER_MASK);
+ this.encoding = MetaString.Encoding.fromInt(header);
+ byte[] data = bytes;
+ if (bytes.length < 16) {
+ data = new byte[16];
+ System.arraycopy(bytes, 0, data, 0, bytes.length);
+ }
+ first8Bytes = LittleEndian.getInt64(data, Platform.BYTE_ARRAY_OFFSET);
+ second8Bytes = LittleEndian.getInt64(data, Platform.BYTE_ARRAY_OFFSET + 8);
}
- public MetaStringBytes(MetaString metaString) {
- this.bytes = metaString.getBytes();
+ static MetaStringBytes of(MetaString metaString) {
+ byte[] bytes = metaString.getBytes();
// Set seed to ensure hash is deterministic.
long hashCode = MurmurHash3.murmurhash3_x64_128(bytes, 0, bytes.length,
47)[0];
if (hashCode == 0) {
@@ -56,8 +70,10 @@ public final class MetaStringBytes {
hashCode += 256; // last byte is reserved for header.
}
hashCode &= 0xffffffffffffff00L;
- int header = metaString.getEncoding().getValue() & HEADER_MASK;
- this.hashCode = hashCode | header;
+ MetaString.Encoding encoding = metaString.getEncoding();
+ int header = encoding.getValue() & HEADER_MASK;
+ hashCode = hashCode | header;
+ return new MetaStringBytes(bytes, hashCode);
}
public String decode(char specialChar1, char specialChar2) {
@@ -65,8 +81,6 @@ public final class MetaStringBytes {
}
public String decode(MetaStringDecoder decoder) {
- int header = (int) (hashCode & HEADER_MASK);
- MetaString.Encoding encoding = MetaString.Encoding.values()[header];
return decoder.decode(bytes, encoding);
}
diff --git
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
index c3322d16..af24ce8e 100644
---
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
+++
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
@@ -19,11 +19,15 @@
package org.apache.fury.resolver;
+import java.util.Arrays;
+import org.apache.fury.collection.LongLongMap;
import org.apache.fury.collection.LongMap;
import org.apache.fury.collection.ObjectMap;
+import org.apache.fury.memory.LittleEndian;
import org.apache.fury.memory.MemoryBuffer;
import org.apache.fury.meta.Encoders;
import org.apache.fury.meta.MetaString;
+import org.apache.fury.util.MurmurHash3;
/**
* A resolver for limited string value writing. Currently, we only support
classname dynamic
@@ -35,12 +39,15 @@ public final class MetaStringResolver {
private static final int initialCapacity = 8;
// use a lower load factor to minimize hash collision
private static final float furyMapLoadFactor = 0.25f;
+ private static final int SMALL_STRING_THRESHOLD = 16;
// Every deserialization for unregistered string will query it, performance
is important.
private final ObjectMap<MetaStringBytes, String> metaStringBytes2StringMap =
new ObjectMap<>(initialCapacity, furyMapLoadFactor);
private final LongMap<MetaStringBytes> hash2MetaStringBytesMap =
new LongMap<>(initialCapacity, furyMapLoadFactor);
+ private final LongLongMap<MetaStringBytes> longLongMap =
+ new LongLongMap<>(initialCapacity, furyMapLoadFactor);
// Every enum bytes should be singleton at every fury, since we keep state
in it.
private final ObjectMap<MetaString, MetaStringBytes> metaString2BytesMap =
new ObjectMap<>(initialCapacity, furyMapLoadFactor);
@@ -57,7 +64,7 @@ public final class MetaStringResolver {
public MetaStringBytes getOrCreateMetaStringBytes(MetaString str) {
MetaStringBytes metaStringBytes = metaString2BytesMap.get(str);
if (metaStringBytes == null) {
- metaStringBytes = new MetaStringBytes(str);
+ metaStringBytes = MetaStringBytes.of(str);
metaString2BytesMap.put(str, metaStringBytes);
}
return metaStringBytes;
@@ -66,6 +73,7 @@ public final class MetaStringResolver {
public void writeMetaStringBytesWithFlag(MemoryBuffer buffer,
MetaStringBytes byteString) {
short id = byteString.dynamicWriteStringId;
if (id == MetaStringBytes.DEFAULT_DYNAMIC_WRITE_STRING_ID) {
+ // noinspection Duplicates
id = dynamicWriteStringId++;
byteString.dynamicWriteStringId = id;
MetaStringBytes[] dynamicWrittenMetaString = this.dynamicWrittenString;
@@ -73,8 +81,13 @@ public final class MetaStringResolver {
dynamicWrittenMetaString = growWrite(id);
}
dynamicWrittenMetaString[id] = byteString;
- buffer.writeVarUint32Small7(byteString.bytes.length << 2 | 0b1);
- buffer.writeInt64(byteString.hashCode);
+ int length = byteString.bytes.length;
+ buffer.writeVarUint32Small7(length << 2 | 0b1);
+ if (length > SMALL_STRING_THRESHOLD) {
+ buffer.writeInt64(byteString.hashCode);
+ } else {
+ buffer.writeByte(byteString.encoding.getValue());
+ }
buffer.writeBytes(byteString.bytes);
} else {
buffer.writeVarUint32Small7(((id + 1) << 2) | 0b11);
@@ -84,6 +97,7 @@ public final class MetaStringResolver {
public void writeMetaStringBytes(MemoryBuffer buffer, MetaStringBytes
byteString) {
short id = byteString.dynamicWriteStringId;
if (id == MetaStringBytes.DEFAULT_DYNAMIC_WRITE_STRING_ID) {
+ // noinspection Duplicates
id = dynamicWriteStringId++;
byteString.dynamicWriteStringId = id;
MetaStringBytes[] dynamicWrittenMetaString = this.dynamicWrittenString;
@@ -91,8 +105,13 @@ public final class MetaStringResolver {
dynamicWrittenMetaString = growWrite(id);
}
dynamicWrittenMetaString[id] = byteString;
- buffer.writeVarUint32Small7(byteString.bytes.length << 1);
- buffer.writeInt64(byteString.hashCode);
+ int length = byteString.bytes.length;
+ buffer.writeVarUint32Small7(length << 1);
+ if (length > SMALL_STRING_THRESHOLD) {
+ buffer.writeInt64(byteString.hashCode);
+ } else {
+ buffer.writeByte(byteString.encoding.getValue());
+ }
buffer.writeBytes(byteString.bytes);
} else {
buffer.writeVarUint32Small7(((id + 1) << 1) | 1);
@@ -119,8 +138,10 @@ public final class MetaStringResolver {
public MetaStringBytes readMetaStringBytesWithFlag(MemoryBuffer buffer, int
header) {
int len = header >>> 2;
if ((header & 0b10) == 0) {
- long hashCode = buffer.readInt64();
- MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len,
hashCode);
+ MetaStringBytes byteString =
+ len <= SMALL_STRING_THRESHOLD
+ ? readSmallMetaStringBytes(buffer, len)
+ : readBigMetaStringBytes(buffer, len, buffer.readInt64());
updateDynamicString(byteString);
return byteString;
} else {
@@ -132,14 +153,10 @@ public final class MetaStringResolver {
MemoryBuffer buffer, MetaStringBytes cache, int header) {
int len = header >>> 2;
if ((header & 0b10) == 0) {
- long hashCode = buffer.readInt64();
- if (cache.hashCode == hashCode) {
- // skip byteString data
- buffer.increaseReaderIndex(len);
- updateDynamicString(cache);
- return cache;
- }
- MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len,
hashCode);
+ MetaStringBytes byteString =
+ len <= SMALL_STRING_THRESHOLD
+ ? readSmallMetaStringBytes(buffer, cache, len)
+ : readBigMetaStringBytes(buffer, cache, len);
updateDynamicString(byteString);
return byteString;
} else {
@@ -151,8 +168,10 @@ public final class MetaStringResolver {
int header = buffer.readVarUint32Small7();
int len = header >>> 1;
if ((header & 0b1) == 0) {
- long hashCode = buffer.readInt64();
- MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len,
hashCode);
+ MetaStringBytes byteString =
+ len > SMALL_STRING_THRESHOLD
+ ? readBigMetaStringBytes(buffer, len, buffer.readInt64())
+ : readSmallMetaStringBytes(buffer, len);
updateDynamicString(byteString);
return byteString;
} else {
@@ -164,24 +183,31 @@ public final class MetaStringResolver {
int header = buffer.readVarUint32Small7();
int len = header >>> 1;
if ((header & 0b1) == 0) {
- long hashCode = buffer.readInt64();
- if (cache.hashCode == hashCode) {
- // skip byteString data
- buffer.increaseReaderIndex(len);
- updateDynamicString(cache);
- return cache;
- } else {
- MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len,
hashCode);
- updateDynamicString(byteString);
- return byteString;
- }
+ MetaStringBytes byteString =
+ len <= SMALL_STRING_THRESHOLD
+ ? readSmallMetaStringBytes(buffer, cache, len)
+ : readBigMetaStringBytes(buffer, cache, len);
+ updateDynamicString(byteString);
+ return byteString;
} else {
return dynamicReadStringIds[len - 1];
}
}
+ private MetaStringBytes readBigMetaStringBytes(
+ MemoryBuffer buffer, MetaStringBytes cache, int len) {
+ long hashCode = buffer.readInt64();
+ if (cache.hashCode == hashCode) {
+ // skip byteString data
+ buffer.increaseReaderIndex(len);
+ return cache;
+ } else {
+ return readBigMetaStringBytes(buffer, len, hashCode);
+ }
+ }
+
/** Read enum string by try to reuse previous read {@link MetaStringBytes}
object. */
- private MetaStringBytes trySkipMetaStringBytes(MemoryBuffer buffer, int len,
long hashCode) {
+ private MetaStringBytes readBigMetaStringBytes(MemoryBuffer buffer, int len,
long hashCode) {
MetaStringBytes byteString = hash2MetaStringBytesMap.get(hashCode);
if (byteString == null) {
byteString = new MetaStringBytes(buffer.readBytes(len), hashCode);
@@ -193,6 +219,53 @@ public final class MetaStringResolver {
return byteString;
}
+ private MetaStringBytes readSmallMetaStringBytes(MemoryBuffer buffer, int
len) {
+ long v1, v2 = 0;
+ byte encoding = buffer.readByte();
+ if (len <= 8) {
+ v1 = buffer.readBytesAsInt64(len);
+ } else {
+ v1 = buffer.readInt64();
+ v2 = buffer.readBytesAsInt64(len - 8);
+ }
+ MetaStringBytes byteString = longLongMap.get(v1, v2);
+ if (byteString == null) {
+ byteString = createSmallMetaStringBytes(len, encoding, v1, v2);
+ }
+ return byteString;
+ }
+
+ private MetaStringBytes readSmallMetaStringBytes(
+ MemoryBuffer buffer, MetaStringBytes cache, int len) {
+ long v1, v2 = 0;
+ byte encoding = buffer.readByte();
+ if (len <= 8) {
+ v1 = buffer.readBytesAsInt64(len);
+ } else {
+ v1 = buffer.readInt64();
+ v2 = buffer.readBytesAsInt64(len - 8);
+ }
+ if (cache.first8Bytes == v1 && cache.second8Bytes == v2) {
+ return cache;
+ }
+ MetaStringBytes byteString = longLongMap.get(v1, v2);
+ if (byteString == null) {
+ byteString = createSmallMetaStringBytes(len, encoding, v1, v2);
+ }
+ return byteString;
+ }
+
+ private MetaStringBytes createSmallMetaStringBytes(int len, byte encoding,
long v1, long v2) {
+ byte[] data = new byte[16];
+ LittleEndian.putInt64(data, 0, v1);
+ LittleEndian.putInt64(data, 8, v2);
+ long hashCode = MurmurHash3.murmurhash3_x64_128(data, 0, len, 47)[0];
+ hashCode = ((hashCode) & 0xffffffffffffff00L) | encoding;
+ MetaStringBytes metaStringBytes = new MetaStringBytes(Arrays.copyOf(data,
len), hashCode);
+ longLongMap.put(v1, v2, metaStringBytes);
+ return metaStringBytes;
+ }
+
private void updateDynamicString(MetaStringBytes byteString) {
short currentDynamicReadId = dynamicReadStringId++;
MetaStringBytes[] dynamicReadStringIds = this.dynamicReadStringIds;
diff --git a/java/fury-core/src/main/resources/META-INF/LICENSE
b/java/fury-core/src/main/resources/META-INF/LICENSE
index 3fe4b8e3..29f6f372 100644
--- a/java/fury-core/src/main/resources/META-INF/LICENSE
+++ b/java/fury-core/src/main/resources/META-INF/LICENSE
@@ -243,6 +243,7 @@ The text of each license is also included in
licenses/LICENSE-[project].txt.
java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java
java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
+ java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/type/Generics.java
diff --git
a/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
b/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
index eb578bd5..7c751490 100644
---
a/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
+++
b/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
@@ -204,6 +204,7 @@
Args=--initialize-at-build-time=org.apache.fury.memory.MemoryBuffer,\
org.apache.fury.collection.IntArray,\
org.apache.fury.collection.LazyMap,\
org.apache.fury.collection.LongMap,\
+ org.apache.fury.collection.LongLongMap,\
org.apache.fury.collection.MapStatistics,\
org.apache.fury.collection.MultiKeyWeakMap,\
org.apache.fury.collection.ObjectArray,\
diff --git
a/java/fury-core/src/test/java/org/apache/fury/collection/LongLongMapTest.java
b/java/fury-core/src/test/java/org/apache/fury/collection/LongLongMapTest.java
new file mode 100644
index 00000000..524c4a21
--- /dev/null
+++
b/java/fury-core/src/test/java/org/apache/fury/collection/LongLongMapTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fury.collection;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class LongLongMapTest {
+
+ @Test
+ public void testPut() {
+ LongLongMap<String> map = new LongLongMap<>(10, 0.5f);
+ map.put(1, 1, "a");
+ map.put(1, 2, "b");
+ map.put(1, 3, "c");
+ map.put(2, 1, "d");
+ map.put(3, 1, "f");
+ Assert.assertEquals(map.get(1, 1), "a");
+ Assert.assertEquals(map.get(1, 2), "b");
+ Assert.assertEquals(map.get(1, 3), "c");
+ Assert.assertEquals(map.get(2, 1), "d");
+ Assert.assertEquals(map.get(3, 1), "f");
+ for (int i = 1; i < 100; i++) {
+ map.put(i, i, "a" + i);
+ Assert.assertEquals(map.get(i, i), "a" + i);
+ }
+ }
+}
diff --git
a/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
b/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
index 48ce9962..42728202 100644
--- a/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
+++ b/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
@@ -634,4 +634,16 @@ public class MemoryBufferTest {
assertEquals(buf.readVarUint36Small(), 0); // overflow
}
}
+
+ @Test
+ public void testReadBytesAsInt64() {
+ for (MemoryBuffer buffer :
+ new MemoryBuffer[] {
+ MemoryUtils.buffer(16),
MemoryUtils.wrap(ByteBuffer.allocateDirect(32)),
+ }) {
+ buffer.writeByte(10);
+ buffer.writeByte(20);
+ assertEquals(buffer.readBytesAsInt64(2), (20 << 8) | 10);
+ }
+ }
}
diff --git
a/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
b/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
index 9d8446d3..4fea7ef3 100644
---
a/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
+++
b/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
@@ -22,6 +22,7 @@ package org.apache.fury.resolver;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
+import java.nio.ByteBuffer;
import org.apache.fury.memory.MemoryBuffer;
import org.apache.fury.memory.MemoryUtils;
import org.apache.fury.meta.MetaString;
@@ -48,4 +49,23 @@ public class MetaStringResolverTest {
}
assertTrue(buffer.writerIndex() < str.getBytes().length + 128 * 4);
}
+
+ @Test
+ public void testWriteSmallMetaString() {
+ for (MemoryBuffer buffer :
+ new MemoryBuffer[] {
+ MemoryUtils.buffer(32),
MemoryUtils.wrap(ByteBuffer.allocateDirect(32)),
+ }) {
+ for (int i = 0; i < 32; i++) {
+ String str = StringUtils.random(i, 0);
+ MetaStringResolver resolver = new MetaStringResolver();
+ resolver.writeMetaStringBytes(
+ buffer,
+ resolver.getOrCreateMetaStringBytes(new MetaStringEncoder('.',
'_').encode(str)));
+ String metaString2 = resolver.readMetaString(buffer);
+ assertEquals(metaString2.hashCode(), str.hashCode());
+ assertEquals(metaString2.getBytes(), str.getBytes());
+ }
+ }
+ }
}
diff --git a/licenserc.toml b/licenserc.toml
index 46cd3e58..39821976 100644
--- a/licenserc.toml
+++ b/licenserc.toml
@@ -40,6 +40,7 @@ excludes = [
"java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java",
"java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java",
"java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java",
+ "java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java",
"java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java",
"java/fury-core/src/main/java/org/apache/fury/io/ClassLoaderObjectInputStream.java",
"java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java",
diff --git a/python/pyfury/_fury.py b/python/pyfury/_fury.py
index 6552fe36..19a624ed 100644
--- a/python/pyfury/_fury.py
+++ b/python/pyfury/_fury.py
@@ -29,6 +29,7 @@ from typing import Dict, Tuple, TypeVar, Union, Iterable
from pyfury.lib import mmh3
from pyfury.buffer import Buffer
+from pyfury.meta.metastring import Encoding
from pyfury.resolver import (
MapRefResolver,
NoRefResolver,
@@ -58,6 +59,7 @@ from pyfury._serializer import (
PICKLE_STRONG_CACHE_CLASS_ID,
PICKLE_CACHE_CLASS_ID,
PickleCacheStub,
+ SMALL_STRING_THRESHOLD,
)
from pyfury.type import (
FuryType,
@@ -510,7 +512,11 @@ class ClassResolver:
self._dynamic_write_string_id += 1
self._dynamic_written_enum_string.append(enum_string_bytes)
buffer.write_varint32(enum_string_bytes.length << 1)
- buffer.write_int64(enum_string_bytes.hashcode)
+ if enum_string_bytes.length <= SMALL_STRING_THRESHOLD:
+ # TODO(chaokunyang) support meta string encoding
+ buffer.write_int8(Encoding.UTF_8.value)
+ else:
+ buffer.write_int64(enum_string_bytes.hashcode)
buffer.write_bytes(enum_string_bytes.data)
else:
buffer.write_varint32(((dynamic_write_string_id + 1) << 1) | 1)
@@ -520,15 +526,30 @@ class ClassResolver:
length = header >> 1
if header & 0b1 != 0:
return self._dynamic_id_to_enum_str_list[length - 1]
- hashcode = buffer.read_int64()
- reader_index = buffer.reader_index
- buffer.check_bound(reader_index, length)
- buffer.reader_index = reader_index + length
- enum_str = self._hash_to_enum_string.get(hashcode)
- if enum_str is None:
- str_bytes = buffer.get_bytes(reader_index, length)
- enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
- self._hash_to_enum_string[hashcode] = enum_str
+ if length <= SMALL_STRING_THRESHOLD:
+ buffer.read_int8()
+ if length <= 8:
+ v1 = buffer.read_bytes_as_int64(length)
+ v2 = 0
+ else:
+ v1 = buffer.read_int64()
+ v2 = buffer.read_bytes_as_int64(length - 8)
+ hashcode = v1 * 31 + v2
+ enum_str = self._hash_to_enum_string.get(hashcode)
+ if enum_str is None:
+ str_bytes = buffer.get_bytes(buffer.reader_index - length,
length)
+ enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+ self._hash_to_enum_string[hashcode] = enum_str
+ else:
+ hashcode = buffer.read_int64()
+ reader_index = buffer.reader_index
+ buffer.check_bound(reader_index, length)
+ buffer.reader_index = reader_index + length
+ enum_str = self._hash_to_enum_string.get(hashcode)
+ if enum_str is None:
+ str_bytes = buffer.get_bytes(reader_index, length)
+ enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+ self._hash_to_enum_string[hashcode] = enum_str
self._dynamic_id_to_enum_str_list.append(enum_str)
return enum_str
diff --git a/python/pyfury/_serialization.pyx b/python/pyfury/_serialization.pyx
index 00641a42..3f2c6041 100644
--- a/python/pyfury/_serialization.pyx
+++ b/python/pyfury/_serialization.pyx
@@ -35,6 +35,7 @@ from pyfury._fury import _PicklerStub, _UnpicklerStub,
Pickler, Unpickler
from pyfury._fury import _ENABLE_CLASS_REGISTRATION_FORCIBLY
from pyfury.error import ClassNotCompatibleError
from pyfury.lib import mmh3
+from pyfury.meta.metastring import Encoding
from pyfury.type import is_primitive_type, FuryType, Int8Type, Int16Type,
Int32Type, \
Int64Type, Float32Type, Float64Type, Int16ArrayType, Int32ArrayType, \
Int64ArrayType, Float32ArrayType, Float64ArrayType, infer_field, load_class
@@ -45,6 +46,7 @@ from libcpp.vector cimport vector
from cpython cimport PyObject
from cpython.ref cimport *
from libcpp cimport bool as c_bool
+from libcpp.utility cimport pair
from cython.operator cimport dereference as deref
from pyfury._util cimport Buffer
from pyfury.includes.libabsl cimport flat_hash_map
@@ -233,6 +235,7 @@ cdef int32_t NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG &
0b11111111 | \
(PYBOOL_CLASS_ID << 9)
cdef int32_t NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \
(STRING_CLASS_ID << 9)
+cdef int32_t SMALL_STRING_THRESHOLD = 16
cdef class BufferObject:
@@ -307,6 +310,7 @@ cdef class ClassResolver:
flat_hash_map[int64_t, PyObject*] _c_hash_to_classinfo
# hash -> MetaStringBytes
flat_hash_map[int64_t, PyObject*] _c_hash_to_enum_string_bytes
+ flat_hash_map[pair[int64_t, int64_t], PyObject*]
_c_hash_to_small_metastring_bytes
# classname MetaStringBytes address -> class
flat_hash_map[uint64_t, PyObject*] _c_str_bytes_to_class
# classname MetaStringBytes address -> str
@@ -659,13 +663,17 @@ cdef class ClassResolver:
cdef inline _write_enum_string_bytes(
self, Buffer buffer, MetaStringBytes enum_string_bytes):
cdef int16_t dynamic_class_id =
enum_string_bytes.dynamic_write_string_id
+ cdef int32_t length = enum_string_bytes.length
if dynamic_class_id == DEFAULT_DYNAMIC_WRITE_STRING_ID:
dynamic_class_id = self.dynamic_write_string_id
enum_string_bytes.dynamic_write_string_id = dynamic_class_id
self.dynamic_write_string_id += 1
self._c_dynamic_written_enum_string.push_back(<PyObject*>enum_string_bytes)
- buffer.write_varint32(enum_string_bytes.length << 1)
- buffer.write_int64(enum_string_bytes.hashcode)
+ buffer.write_varint32(length << 1)
+ if length <= SMALL_STRING_THRESHOLD:
+ buffer.write_int8(Encoding.UTF_8.value)
+ else:
+ buffer.write_int64(enum_string_bytes.hashcode)
buffer.write_bytes(enum_string_bytes.data)
else:
buffer.write_varint32(((dynamic_class_id + 1) << 1) | 1)
@@ -675,21 +683,40 @@ cdef class ClassResolver:
cdef int32_t length = header >> 1
if header & 0b1 != 0:
return
<MetaStringBytes>self._c_dynamic_id_to_enum_string_vec[length - 1]
- cdef int64_t hashcode = buffer.read_int64()
- cdef int32_t reader_index = buffer.reader_index
- buffer.check_bound(reader_index, length)
- buffer.reader_index = reader_index + length
- cdef PyObject* enum_str_ptr =
self._c_hash_to_enum_string_bytes[hashcode]
- if enum_str_ptr != NULL:
- self._c_dynamic_id_to_enum_string_vec.push_back(enum_str_ptr)
- return <MetaStringBytes>enum_str_ptr
- cdef bytes str_bytes = buffer.get_bytes(reader_index, length)
- cdef MetaStringBytes enum_str = MetaStringBytes(str_bytes,
hashcode=hashcode)
- self._enum_str_set.add(enum_str)
- enum_str_ptr = <PyObject*>enum_str
- self._c_hash_to_enum_string_bytes[hashcode] = enum_str_ptr
+ cdef int64_t v1 = 0, v2 = 0, hashcode
+ cdef PyObject* enum_str_ptr
+ cdef int32_t reader_index
+ if length <= SMALL_STRING_THRESHOLD:
+ # TODO(chaokunyang) support metastring encoding
+ buffer.read_int8()
+ if length <= 8:
+ v1 = buffer.read_bytes_as_int64(length)
+ else:
+ v1 = buffer.read_int64()
+ v2 = buffer.read_bytes_as_int64(length - 8)
+ hashcode = v1 * 31 + v2
+ enum_str_ptr =
self._c_hash_to_small_metastring_bytes[pair[int64_t, int64_t](v1, v2)]
+ if enum_str_ptr == NULL:
+ reader_index = buffer.reader_index
+ str_bytes = buffer.get_bytes(reader_index - length, length)
+ enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+ self._enum_str_set.add(enum_str)
+ enum_str_ptr = <PyObject*>enum_str
+ self._c_hash_to_small_metastring_bytes[pair[int64_t,
int64_t](v1, v2)] = enum_str_ptr
+ else:
+ hashcode = buffer.read_int64()
+ reader_index = buffer.reader_index
+ buffer.check_bound(reader_index, length)
+ buffer.reader_index = reader_index + length
+ enum_str_ptr = self._c_hash_to_enum_string_bytes[hashcode]
+ if enum_str_ptr == NULL:
+ str_bytes = buffer.get_bytes(reader_index, length)
+ enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+ self._enum_str_set.add(enum_str)
+ enum_str_ptr = <PyObject*>enum_str
+ self._c_hash_to_enum_string_bytes[hashcode] = enum_str_ptr
self._c_dynamic_id_to_enum_string_vec.push_back(enum_str_ptr)
- return enum_str
+ return <MetaStringBytes>enum_str_ptr
cpdef inline xwrite_class(self, Buffer buffer, cls):
cdef PyObject* classinfo_ptr =
self._c_classes_info[<uintptr_t><PyObject*>cls]
diff --git a/python/pyfury/_serializer.py b/python/pyfury/_serializer.py
index e3c8dbea..c6f39b16 100644
--- a/python/pyfury/_serializer.py
+++ b/python/pyfury/_serializer.py
@@ -59,6 +59,7 @@ NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 |
(PYINT_CLASS_ID << 9)
NOT_NULL_PYFLOAT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYFLOAT_CLASS_ID
<< 9)
NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYBOOL_CLASS_ID <<
9)
NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (STRING_CLASS_ID <<
9)
+SMALL_STRING_THRESHOLD = 16
class _PickleStub:
diff --git a/python/pyfury/_util.pxd b/python/pyfury/_util.pxd
index 76ebe0fc..77f76ace 100644
--- a/python/pyfury/_util.pxd
+++ b/python/pyfury/_util.pxd
@@ -152,6 +152,8 @@ cdef class Buffer:
cpdef inline bytes read_bytes(self, int32_t length)
+ cpdef inline int64_t read_bytes_as_int64(self, int32_t length)
+
cpdef inline put_bytes(self, uint32_t offset, bytes value)
cpdef inline bytes get_bytes(self, uint32_t offset, uint32_t nbytes)
diff --git a/python/pyfury/_util.pyx b/python/pyfury/_util.pyx
index 57a913e8..b7587056 100644
--- a/python/pyfury/_util.pyx
+++ b/python/pyfury/_util.pyx
@@ -237,6 +237,20 @@ cdef class Buffer:
self.reader_index += length
return value
+ cpdef inline int64_t read_bytes_as_int64(self, int32_t length):
+ cdef int32_t size_ = self.c_buffer.get().size()
+ cdef int64_t result
+ cdef int32_t i
+ # if offset + length > size_:
+ if size_- (self.reader_index + 8) > 0:
+ result = self.get_int64(self.reader_index)
+ result = result & (0xffffffffffffffffL >> ((8 - length) * 8))
+ else:
+ for i in range(length):
+ result = result | (<int64_t>(self.read_int8()) & 0xff) << (i *
8)
+ self.reader_index += length
+ return result
+
cpdef inline put_bytes(self, uint32_t offset, bytes value):
cdef const unsigned char[:] data = value
cdef int32_t length = data.nbytes
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]