This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git


The following commit(s) were added to refs/heads/main by this push:
     new 57a9eae2 feat(java): reduce metastring hashcode payload for small 
string(<=16 bytes) (#1909)
57a9eae2 is described below

commit 57a9eae2bab89b7f8ab9ae900359d9374c3b697f
Author: Shawn Yang <[email protected]>
AuthorDate: Sun Oct 27 10:05:35 2024 +0800

    feat(java): reduce metastring hashcode payload for small string(<=16 bytes) 
(#1909)
    
    ## What does this PR do?
    If a meta string is less than 16 bytes, we skip write hashcode to reduce
    space cost. This will bring a big gain since most metastring-encoded
    ClassName/EnumName are less than 16 bytes . And package names are much
    less compared to classname, thus can save space by dict encoding
    sharing.
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fury/issues/new/choose) describing the
    need to do so and update the document if necessary.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
        // old size 391
    // Benchmark (bufferType) (objectType) (references) Mode Cnt Score Error
    Units
    // UserTypeDeserializeSuite.fury_deserialize array MEDIA_CONTENT false
    thrpt 100 2751601.402 ± 28811.825 ops/s
    
        // new size: 377
    // Benchmark (bufferType) (objectType) (references) Mode Cnt Score Error
    Units
    // UserTypeDeserializeSuite.fury_deserialize array MEDIA_CONTENT false
    thrpt 100 2748329.241 ± 28163.821 ops/s
---
 LICENSE                                            |   1 +
 go/fury/type.go                                    |  29 ++--
 .../org/apache/fury/collection/FuryObjectMap.java  |   3 +-
 .../org/apache/fury/collection/LongLongMap.java    | 157 +++++++++++++++++++++
 .../java/org/apache/fury/collection/LongMap.java   |   4 +-
 .../org/apache/fury/collection/ObjectIntMap.java   |   4 +-
 .../org/apache/fury/io/BlockedStreamUtils.java     |   1 +
 .../java/org/apache/fury/memory/LittleEndian.java  |   7 +
 .../java/org/apache/fury/memory/MemoryBuffer.java  |  36 +++++
 .../org/apache/fury/resolver/ClassResolver.java    |   4 +
 .../org/apache/fury/resolver/MetaStringBytes.java  |  28 +++-
 .../apache/fury/resolver/MetaStringResolver.java   | 131 +++++++++++++----
 java/fury-core/src/main/resources/META-INF/LICENSE |   1 +
 .../fury-core/native-image.properties              |   1 +
 .../apache/fury/collection/LongLongMapTest.java    |  45 ++++++
 .../org/apache/fury/memory/MemoryBufferTest.java   |  12 ++
 .../fury/resolver/MetaStringResolverTest.java      |  20 +++
 licenserc.toml                                     |   1 +
 python/pyfury/_fury.py                             |  41 ++++--
 python/pyfury/_serialization.pyx                   |  59 +++++---
 python/pyfury/_serializer.py                       |   1 +
 python/pyfury/_util.pxd                            |   2 +
 python/pyfury/_util.pyx                            |  14 ++
 23 files changed, 528 insertions(+), 74 deletions(-)

diff --git a/LICENSE b/LICENSE
index 7683b460..ce70ac93 100644
--- a/LICENSE
+++ b/LICENSE
@@ -267,6 +267,7 @@ The text of each license is also included in 
licenses/LICENSE-[project].txt.
       java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java
       
java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java
       java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
+      java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
       java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
       java/fury-core/src/main/java/org/apache/fury/type/Generics.java
       java/fury-core/src/test/java/org/apache/fury/type/GenericsTest.java
diff --git a/go/fury/type.go b/go/fury/type.go
index ac19d74f..98315991 100644
--- a/go/fury/type.go
+++ b/go/fury/type.go
@@ -19,6 +19,7 @@ package fury
 
 import (
        "fmt"
+       "github.com/apache/fury/go/fury/meta"
        "hash/fnv"
        "reflect"
        "regexp"
@@ -136,6 +137,7 @@ const (
        NotSupportCrossLanguage = 0
        useStringValue          = 0
        useStringId             = 1
+       SMALL_STRING_THRESHOLD  = 16
 )
 
 var (
@@ -551,14 +553,19 @@ func (r *typeResolver) writeMetaString(buffer 
*ByteBuffer, str string) error {
                dynamicStringId := r.dynamicStringId
                r.dynamicStringId += 1
                r.dynamicStringToId[str] = dynamicStringId
-               buffer.WriteVarInt32(int32(len(str) << 1))
-               // TODO this hash should be unique, since we don't compare data 
equality for performance
-               h := fnv.New64a()
-               if _, err := h.Write([]byte(str)); err != nil {
-                       return err
+               length := len(str)
+               buffer.WriteVarInt32(int32(length << 1))
+               if length <= SMALL_STRING_THRESHOLD {
+                       buffer.WriteByte_(uint8(meta.UTF_8))
+               } else {
+                       // TODO this hash should be unique, since we don't 
compare data equality for performance
+                       h := fnv.New64a()
+                       if _, err := h.Write([]byte(str)); err != nil {
+                               return err
+                       }
+                       hash := int64(h.Sum64() & 0xffffffffffffff00)
+                       buffer.WriteInt64(hash)
                }
-               hash := int64(h.Sum64() & 0xffffffffffffff00)
-               buffer.WriteInt64(hash)
                if len(str) > MaxInt16 {
                        return fmt.Errorf("too long string: %s", str)
                }
@@ -573,8 +580,12 @@ func (r *typeResolver) readMetaString(buffer *ByteBuffer) 
(string, error) {
        header := buffer.ReadVarInt32()
        var length = int(header >> 1)
        if header&0b1 == 0 {
-               // TODO support use computed hash
-               buffer.ReadInt64()
+               if length <= SMALL_STRING_THRESHOLD {
+                       buffer.ReadByte_()
+               } else {
+                       // TODO support use computed hash
+                       buffer.ReadInt64()
+               }
                str := string(buffer.ReadBinary(length))
                dynamicStringId := r.dynamicStringId
                r.dynamicStringId += 1
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java 
b/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java
index 528fc643..72804235 100644
--- a/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/FuryObjectMap.java
@@ -51,6 +51,7 @@ import org.apache.fury.util.Preconditions;
  */
 @SuppressWarnings("unchecked")
 public class FuryObjectMap<K, V> {
+  static final long MASK_NUMBER = 0x9E3779B97F4A7C15L;
   static final Object dummy = new Object();
 
   public int size;
@@ -135,7 +136,7 @@ public class FuryObjectMap<K, V> {
    * {@code return item.hashCode() & mask;}
    */
   protected int place(K item) {
-    return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
+    return (int) (item.hashCode() * MASK_NUMBER >>> shift);
   }
 
   /**
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java 
b/java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
new file mode 100644
index 00000000..0e94ef9e
--- /dev/null
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
@@ -0,0 +1,157 @@
+/* Copyright (c) 2008-2023, Nathan Sweet
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, 
this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, 
this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided with the 
distribution.
+ * - Neither the name of Esoteric Software nor the names of its contributors 
may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+package org.apache.fury.collection;
+
+import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;
+
+import org.apache.fury.annotation.Internal;
+import org.apache.fury.util.Preconditions;
+
+/**
+ * A fast linear hash probe based map whose key is two long values `(long k1, 
long k2)`. This map
+ * can avoid creating a java object for key to save memory/cpu cost.
+ */
+// The linear probed hash is derived from
+// 
https://github.com/EsotericSoftware/kryo/blob/135df69526615bb3f6b34846e58ba3fec3b631c3/src/com/esotericsoftware/kryo/util/IntMap.java.
+@SuppressWarnings("unchecked")
+@Internal
+public final class LongLongMap<V> {
+  private static final class LongLongKey {
+    private final long k1;
+
+    public LongLongKey(long k1, long k2) {
+      this.k1 = k1;
+      this.k2 = k2;
+    }
+
+    private final long k2;
+
+    @Override
+    public String toString() {
+      return "LongLongKey{" + "k1=" + k1 + ", k2=" + k2 + '}';
+    }
+  }
+
+  public int size;
+  LongLongKey[] keyTable;
+  V[] valueTable;
+  private final float loadFactor;
+  private int threshold;
+
+  private int shift;
+
+  private int mask;
+
+  /**
+   * Creates a new map with the specified initial capacity and load factor. 
This map will hold
+   * initialCapacity items before growing the backing table.
+   *
+   * @param initialCapacity If not a power of two, it is increased to the next 
nearest power of two.
+   */
+  public LongLongMap(int initialCapacity, float loadFactor) {
+    Preconditions.checkArgument(
+        0 <= loadFactor && loadFactor <= 1, "loadFactor %s must be > 0 and < 
1", loadFactor);
+    this.loadFactor = loadFactor;
+    int tableSize = FuryObjectMap.tableSize(initialCapacity, loadFactor);
+    threshold = (int) (tableSize * loadFactor);
+    mask = tableSize - 1;
+    shift = Long.numberOfLeadingZeros(mask);
+    keyTable = new LongLongKey[tableSize];
+    valueTable = (V[]) new Object[tableSize];
+  }
+
+  private int place(long k1, long k2) {
+    return (int) ((k1 * 31 + k2) * MASK_NUMBER >>> shift);
+  }
+
+  /**
+   * Returns the index of the key if already present, else -(index + 1) for 
the next empty index.
+   * This can be overridden in this pacakge to compare for equality 
differently than {@link
+   * Object#equals(Object)}.
+   */
+  private int locateKey(long k1, long k2) {
+    LongLongKey[] keyTable = this.keyTable;
+    int mask = this.mask;
+    for (int i = place(k1, k2); ; i = i + 1 & mask) {
+      LongLongKey other = keyTable[i];
+      if (other == null) {
+        return -(i + 1); // Empty space is available.
+      }
+      if (other.k1 == k1 && other.k2 == k2) {
+        return i; // Same key was found.
+      }
+    }
+  }
+
+  public V put(long k1, long k2, V value) {
+    int i = locateKey(k1, k2);
+    if (i >= 0) { // Existing key was found.
+      V[] valueTable = this.valueTable;
+      V oldValue = valueTable[i];
+      valueTable[i] = value;
+      return oldValue;
+    }
+    i = -(i + 1); // Empty space was found.
+    keyTable[i] = new LongLongKey(k1, k2);
+    valueTable[i] = value;
+    if (++size >= threshold) {
+      resize(keyTable.length << 1);
+    }
+    return null;
+  }
+
+  public V get(long k1, long k2) {
+    LongLongKey[] keyTable = this.keyTable;
+    for (int i = place(k1, k2); ; i = i + 1 & mask) {
+      LongLongKey other = keyTable[i];
+      if (other == null) {
+        return null;
+      }
+      if (other.k1 == k1 && other.k2 == k2) {
+        return valueTable[i];
+      }
+    }
+  }
+
+  private void resize(int newSize) {
+    int oldCapacity = keyTable.length;
+    threshold = (int) (newSize * loadFactor);
+    mask = newSize - 1;
+    shift = Long.numberOfLeadingZeros(mask);
+    LongLongKey[] oldKeyTable = keyTable;
+    V[] oldValueTable = valueTable;
+    keyTable = new LongLongKey[newSize];
+    valueTable = (V[]) new Object[newSize];
+    if (size > 0) {
+      for (int i = 0; i < oldCapacity; i++) {
+        LongLongKey key = oldKeyTable[i];
+        if (key != null) {
+          for (int j = place(key.k1, key.k2); ; j = (j + 1) & mask) {
+            if (keyTable[j] == null) {
+              keyTable[j] = new LongLongKey(key.k1, key.k2);
+              valueTable[j] = oldValueTable[i];
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java 
b/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
index c7995566..fb13dba8 100644
--- a/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
@@ -19,6 +19,8 @@
 
 package org.apache.fury.collection;
 
+import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;
+
 import java.util.Arrays;
 
 // Derived from
@@ -141,7 +143,7 @@ public class LongMap<V> {
    * {@code return item.hashCode() & mask;}
    */
   protected int place(long item) {
-    return (int) (item * 0x9E3779B97F4A7C15L >>> shift);
+    return (int) (item * MASK_NUMBER >>> shift);
   }
 
   /**
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java 
b/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
index 5c8ebab4..bdc8a178 100644
--- a/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
+++ b/java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
@@ -19,6 +19,8 @@
 
 package org.apache.fury.collection;
 
+import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;
+
 import java.util.HashMap;
 import java.util.Map;
 import java.util.function.BiConsumer;
@@ -59,7 +61,7 @@ public class ObjectIntMap<K> {
   }
 
   protected int place(K item) {
-    return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
+    return (int) (item.hashCode() * MASK_NUMBER >>> shift);
   }
 
   int locateKey(K key) {
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java 
b/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java
index 961b65f0..c03707bd 100644
--- a/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java
+++ b/java/fury-core/src/main/java/org/apache/fury/io/BlockedStreamUtils.java
@@ -96,6 +96,7 @@ public class BlockedStreamUtils {
       Fury fury, ReadableByteChannel channel, Function<MemoryBuffer, Object> 
action) {
     try {
       MemoryBuffer buf = fury.getBuffer();
+      buf.readerIndex(0);
       ByteBuffer byteBuffer = ByteBuffer.allocate(4);
       byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
       readByteBuffer(channel, byteBuffer, 4);
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java 
b/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java
index d519fdf9..b0eb8754 100644
--- a/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java
+++ b/java/fury-core/src/main/java/org/apache/fury/memory/LittleEndian.java
@@ -72,6 +72,13 @@ public class LittleEndian {
     return Platform.IS_LITTLE_ENDIAN ? v : Long.reverseBytes(v);
   }
 
+  public static void putInt64(byte[] o, int index, long value) {
+    if (!Platform.IS_LITTLE_ENDIAN) {
+      value = Long.reverseBytes(value);
+    }
+    Platform.putLong(o, Platform.BYTE_ARRAY_OFFSET + index, value);
+  }
+
   public static void putFloat32(Object o, long pos, float value) {
     int v = Float.floatToRawIntBits(value);
     if (!Platform.IS_LITTLE_ENDIAN) {
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java 
b/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
index 87b56e6e..9bfa7efe 100644
--- a/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
+++ b/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
@@ -2174,6 +2174,42 @@ public final class MemoryBuffer {
     readBytes(dst, 0, dst.length);
   }
 
+  /** Read {@code len} bytes into a long using little-endian order. */
+  public long readBytesAsInt64(int len) {
+    int readerIdx = readerIndex;
+    // use subtract to avoid overflow
+    int remaining = size - readerIdx;
+    if (remaining >= 8) {
+      readerIndex = readerIdx + len;
+      long v =
+          UNSAFE.getLong(heapMemory, address + readerIdx)
+              & (0xffffffffffffffffL >>> ((8 - len) * 8));
+      return LITTLE_ENDIAN ? v : Long.reverseBytes(v);
+    }
+    return slowReadBytesAsInt64(remaining, len);
+  }
+
+  private long slowReadBytesAsInt64(int remaining, int len) {
+    if (remaining < len) {
+      streamReader.fillBuffer(len - remaining);
+    }
+    int readerIdx = readerIndex;
+    readerIndex = readerIdx + len;
+    long result = 0;
+    byte[] heapMemory = this.heapMemory;
+    if (heapMemory != null) {
+      for (int i = 0, start = heapOffset + readerIdx; i < len; i++) {
+        result |= (((long) heapMemory[start + i]) & 0xff) << (i * 8);
+      }
+    } else {
+      long start = address + readerIdx;
+      for (int i = 0; i < len; i++) {
+        result |= ((long) UNSAFE.getByte(null, start + i) & 0xff) << (i * 8);
+      }
+    }
+    return result;
+  }
+
   public int read(ByteBuffer dst) {
     int readerIdx = readerIndex;
     int len = dst.remaining();
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java 
b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java
index 5ca4ae70..dbaf4bb4 100644
--- a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java
+++ b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java
@@ -1619,6 +1619,8 @@ public class ClassResolver {
     if (classInfo.classId != NO_CLASS_ID) {
       buffer.writeVarUint32(classInfo.classId << 1);
     } else {
+      // let the lowermost bit of next byte be set, so the deserialization can 
know
+      // whether need to read class by name in advance
       metaStringResolver.writeMetaStringBytesWithFlag(buffer, 
classInfo.packageNameBytes);
       metaStringResolver.writeMetaStringBytes(buffer, 
classInfo.classNameBytes);
     }
@@ -1634,6 +1636,8 @@ public class ClassResolver {
     int header = buffer.readVarUint32Small14();
     final ClassInfo classInfo;
     if ((header & 0b1) != 0) {
+      // let the lowermost bit of next byte be set, so the deserialization can 
know
+      // whether need to read class by name in advance
       MetaStringBytes packageBytes = 
metaStringResolver.readMetaStringBytesWithFlag(buffer, header);
       MetaStringBytes simpleClassNameBytes = 
metaStringResolver.readMetaStringBytes(buffer);
       classInfo = loadBytesToClassInfo(packageBytes, simpleClassNameBytes);
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java 
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
index 56301645..1612bcaf 100644
--- a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
+++ b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
@@ -21,6 +21,8 @@ package org.apache.fury.resolver;
 
 import java.util.Arrays;
 import org.apache.fury.annotation.Internal;
+import org.apache.fury.memory.LittleEndian;
+import org.apache.fury.memory.Platform;
 import org.apache.fury.meta.MetaString;
 import org.apache.fury.meta.MetaStringDecoder;
 import org.apache.fury.util.MurmurHash3;
@@ -32,6 +34,9 @@ public final class MetaStringBytes {
 
   final byte[] bytes;
   final long hashCode;
+  final MetaString.Encoding encoding;
+  final long first8Bytes;
+  final long second8Bytes;
   short dynamicWriteStringId = DEFAULT_DYNAMIC_WRITE_STRING_ID;
 
   /**
@@ -41,14 +46,23 @@ public final class MetaStringBytes {
    * @param hashCode String hash code. This should be unique and has no hash 
collision, and be
    *     deterministic, so we can use cache to reduce hash loop up for read.
    */
-  public MetaStringBytes(byte[] bytes, long hashCode) {
+  MetaStringBytes(final byte[] bytes, long hashCode) {
     assert hashCode != 0;
     this.bytes = bytes;
     this.hashCode = hashCode;
+    int header = (int) (hashCode & HEADER_MASK);
+    this.encoding = MetaString.Encoding.fromInt(header);
+    byte[] data = bytes;
+    if (bytes.length < 16) {
+      data = new byte[16];
+      System.arraycopy(bytes, 0, data, 0, bytes.length);
+    }
+    first8Bytes = LittleEndian.getInt64(data, Platform.BYTE_ARRAY_OFFSET);
+    second8Bytes = LittleEndian.getInt64(data, Platform.BYTE_ARRAY_OFFSET + 8);
   }
 
-  public MetaStringBytes(MetaString metaString) {
-    this.bytes = metaString.getBytes();
+  static MetaStringBytes of(MetaString metaString) {
+    byte[] bytes = metaString.getBytes();
     // Set seed to ensure hash is deterministic.
     long hashCode = MurmurHash3.murmurhash3_x64_128(bytes, 0, bytes.length, 
47)[0];
     if (hashCode == 0) {
@@ -56,8 +70,10 @@ public final class MetaStringBytes {
       hashCode += 256; // last byte is reserved for header.
     }
     hashCode &= 0xffffffffffffff00L;
-    int header = metaString.getEncoding().getValue() & HEADER_MASK;
-    this.hashCode = hashCode | header;
+    MetaString.Encoding encoding = metaString.getEncoding();
+    int header = encoding.getValue() & HEADER_MASK;
+    hashCode = hashCode | header;
+    return new MetaStringBytes(bytes, hashCode);
   }
 
   public String decode(char specialChar1, char specialChar2) {
@@ -65,8 +81,6 @@ public final class MetaStringBytes {
   }
 
   public String decode(MetaStringDecoder decoder) {
-    int header = (int) (hashCode & HEADER_MASK);
-    MetaString.Encoding encoding = MetaString.Encoding.values()[header];
     return decoder.decode(bytes, encoding);
   }
 
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java 
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
index c3322d16..af24ce8e 100644
--- 
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
+++ 
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
@@ -19,11 +19,15 @@
 
 package org.apache.fury.resolver;
 
+import java.util.Arrays;
+import org.apache.fury.collection.LongLongMap;
 import org.apache.fury.collection.LongMap;
 import org.apache.fury.collection.ObjectMap;
+import org.apache.fury.memory.LittleEndian;
 import org.apache.fury.memory.MemoryBuffer;
 import org.apache.fury.meta.Encoders;
 import org.apache.fury.meta.MetaString;
+import org.apache.fury.util.MurmurHash3;
 
 /**
  * A resolver for limited string value writing. Currently, we only support 
classname dynamic
@@ -35,12 +39,15 @@ public final class MetaStringResolver {
   private static final int initialCapacity = 8;
   // use a lower load factor to minimize hash collision
   private static final float furyMapLoadFactor = 0.25f;
+  private static final int SMALL_STRING_THRESHOLD = 16;
 
   // Every deserialization for unregistered string will query it, performance 
is important.
   private final ObjectMap<MetaStringBytes, String> metaStringBytes2StringMap =
       new ObjectMap<>(initialCapacity, furyMapLoadFactor);
   private final LongMap<MetaStringBytes> hash2MetaStringBytesMap =
       new LongMap<>(initialCapacity, furyMapLoadFactor);
+  private final LongLongMap<MetaStringBytes> longLongMap =
+      new LongLongMap<>(initialCapacity, furyMapLoadFactor);
   // Every enum bytes should be singleton at every fury, since we keep state 
in it.
   private final ObjectMap<MetaString, MetaStringBytes> metaString2BytesMap =
       new ObjectMap<>(initialCapacity, furyMapLoadFactor);
@@ -57,7 +64,7 @@ public final class MetaStringResolver {
   public MetaStringBytes getOrCreateMetaStringBytes(MetaString str) {
     MetaStringBytes metaStringBytes = metaString2BytesMap.get(str);
     if (metaStringBytes == null) {
-      metaStringBytes = new MetaStringBytes(str);
+      metaStringBytes = MetaStringBytes.of(str);
       metaString2BytesMap.put(str, metaStringBytes);
     }
     return metaStringBytes;
@@ -66,6 +73,7 @@ public final class MetaStringResolver {
   public void writeMetaStringBytesWithFlag(MemoryBuffer buffer, 
MetaStringBytes byteString) {
     short id = byteString.dynamicWriteStringId;
     if (id == MetaStringBytes.DEFAULT_DYNAMIC_WRITE_STRING_ID) {
+      // noinspection Duplicates
       id = dynamicWriteStringId++;
       byteString.dynamicWriteStringId = id;
       MetaStringBytes[] dynamicWrittenMetaString = this.dynamicWrittenString;
@@ -73,8 +81,13 @@ public final class MetaStringResolver {
         dynamicWrittenMetaString = growWrite(id);
       }
       dynamicWrittenMetaString[id] = byteString;
-      buffer.writeVarUint32Small7(byteString.bytes.length << 2 | 0b1);
-      buffer.writeInt64(byteString.hashCode);
+      int length = byteString.bytes.length;
+      buffer.writeVarUint32Small7(length << 2 | 0b1);
+      if (length > SMALL_STRING_THRESHOLD) {
+        buffer.writeInt64(byteString.hashCode);
+      } else {
+        buffer.writeByte(byteString.encoding.getValue());
+      }
       buffer.writeBytes(byteString.bytes);
     } else {
       buffer.writeVarUint32Small7(((id + 1) << 2) | 0b11);
@@ -84,6 +97,7 @@ public final class MetaStringResolver {
   public void writeMetaStringBytes(MemoryBuffer buffer, MetaStringBytes 
byteString) {
     short id = byteString.dynamicWriteStringId;
     if (id == MetaStringBytes.DEFAULT_DYNAMIC_WRITE_STRING_ID) {
+      // noinspection Duplicates
       id = dynamicWriteStringId++;
       byteString.dynamicWriteStringId = id;
       MetaStringBytes[] dynamicWrittenMetaString = this.dynamicWrittenString;
@@ -91,8 +105,13 @@ public final class MetaStringResolver {
         dynamicWrittenMetaString = growWrite(id);
       }
       dynamicWrittenMetaString[id] = byteString;
-      buffer.writeVarUint32Small7(byteString.bytes.length << 1);
-      buffer.writeInt64(byteString.hashCode);
+      int length = byteString.bytes.length;
+      buffer.writeVarUint32Small7(length << 1);
+      if (length > SMALL_STRING_THRESHOLD) {
+        buffer.writeInt64(byteString.hashCode);
+      } else {
+        buffer.writeByte(byteString.encoding.getValue());
+      }
       buffer.writeBytes(byteString.bytes);
     } else {
       buffer.writeVarUint32Small7(((id + 1) << 1) | 1);
@@ -119,8 +138,10 @@ public final class MetaStringResolver {
   public MetaStringBytes readMetaStringBytesWithFlag(MemoryBuffer buffer, int 
header) {
     int len = header >>> 2;
     if ((header & 0b10) == 0) {
-      long hashCode = buffer.readInt64();
-      MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len, 
hashCode);
+      MetaStringBytes byteString =
+          len <= SMALL_STRING_THRESHOLD
+              ? readSmallMetaStringBytes(buffer, len)
+              : readBigMetaStringBytes(buffer, len, buffer.readInt64());
       updateDynamicString(byteString);
       return byteString;
     } else {
@@ -132,14 +153,10 @@ public final class MetaStringResolver {
       MemoryBuffer buffer, MetaStringBytes cache, int header) {
     int len = header >>> 2;
     if ((header & 0b10) == 0) {
-      long hashCode = buffer.readInt64();
-      if (cache.hashCode == hashCode) {
-        // skip byteString data
-        buffer.increaseReaderIndex(len);
-        updateDynamicString(cache);
-        return cache;
-      }
-      MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len, 
hashCode);
+      MetaStringBytes byteString =
+          len <= SMALL_STRING_THRESHOLD
+              ? readSmallMetaStringBytes(buffer, cache, len)
+              : readBigMetaStringBytes(buffer, cache, len);
       updateDynamicString(byteString);
       return byteString;
     } else {
@@ -151,8 +168,10 @@ public final class MetaStringResolver {
     int header = buffer.readVarUint32Small7();
     int len = header >>> 1;
     if ((header & 0b1) == 0) {
-      long hashCode = buffer.readInt64();
-      MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len, 
hashCode);
+      MetaStringBytes byteString =
+          len > SMALL_STRING_THRESHOLD
+              ? readBigMetaStringBytes(buffer, len, buffer.readInt64())
+              : readSmallMetaStringBytes(buffer, len);
       updateDynamicString(byteString);
       return byteString;
     } else {
@@ -164,24 +183,31 @@ public final class MetaStringResolver {
     int header = buffer.readVarUint32Small7();
     int len = header >>> 1;
     if ((header & 0b1) == 0) {
-      long hashCode = buffer.readInt64();
-      if (cache.hashCode == hashCode) {
-        // skip byteString data
-        buffer.increaseReaderIndex(len);
-        updateDynamicString(cache);
-        return cache;
-      } else {
-        MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len, 
hashCode);
-        updateDynamicString(byteString);
-        return byteString;
-      }
+      MetaStringBytes byteString =
+          len <= SMALL_STRING_THRESHOLD
+              ? readSmallMetaStringBytes(buffer, cache, len)
+              : readBigMetaStringBytes(buffer, cache, len);
+      updateDynamicString(byteString);
+      return byteString;
     } else {
       return dynamicReadStringIds[len - 1];
     }
   }
 
+  private MetaStringBytes readBigMetaStringBytes(
+      MemoryBuffer buffer, MetaStringBytes cache, int len) {
+    long hashCode = buffer.readInt64();
+    if (cache.hashCode == hashCode) {
+      // skip byteString data
+      buffer.increaseReaderIndex(len);
+      return cache;
+    } else {
+      return readBigMetaStringBytes(buffer, len, hashCode);
+    }
+  }
+
   /** Read enum string by try to reuse previous read {@link MetaStringBytes} 
object. */
-  private MetaStringBytes trySkipMetaStringBytes(MemoryBuffer buffer, int len, 
long hashCode) {
+  private MetaStringBytes readBigMetaStringBytes(MemoryBuffer buffer, int len, 
long hashCode) {
     MetaStringBytes byteString = hash2MetaStringBytesMap.get(hashCode);
     if (byteString == null) {
       byteString = new MetaStringBytes(buffer.readBytes(len), hashCode);
@@ -193,6 +219,53 @@ public final class MetaStringResolver {
     return byteString;
   }
 
+  private MetaStringBytes readSmallMetaStringBytes(MemoryBuffer buffer, int 
len) {
+    long v1, v2 = 0;
+    byte encoding = buffer.readByte();
+    if (len <= 8) {
+      v1 = buffer.readBytesAsInt64(len);
+    } else {
+      v1 = buffer.readInt64();
+      v2 = buffer.readBytesAsInt64(len - 8);
+    }
+    MetaStringBytes byteString = longLongMap.get(v1, v2);
+    if (byteString == null) {
+      byteString = createSmallMetaStringBytes(len, encoding, v1, v2);
+    }
+    return byteString;
+  }
+
+  private MetaStringBytes readSmallMetaStringBytes(
+      MemoryBuffer buffer, MetaStringBytes cache, int len) {
+    long v1, v2 = 0;
+    byte encoding = buffer.readByte();
+    if (len <= 8) {
+      v1 = buffer.readBytesAsInt64(len);
+    } else {
+      v1 = buffer.readInt64();
+      v2 = buffer.readBytesAsInt64(len - 8);
+    }
+    if (cache.first8Bytes == v1 && cache.second8Bytes == v2) {
+      return cache;
+    }
+    MetaStringBytes byteString = longLongMap.get(v1, v2);
+    if (byteString == null) {
+      byteString = createSmallMetaStringBytes(len, encoding, v1, v2);
+    }
+    return byteString;
+  }
+
+  private MetaStringBytes createSmallMetaStringBytes(int len, byte encoding, 
long v1, long v2) {
+    byte[] data = new byte[16];
+    LittleEndian.putInt64(data, 0, v1);
+    LittleEndian.putInt64(data, 8, v2);
+    long hashCode = MurmurHash3.murmurhash3_x64_128(data, 0, len, 47)[0];
+    hashCode = ((hashCode) & 0xffffffffffffff00L) | encoding;
+    MetaStringBytes metaStringBytes = new MetaStringBytes(Arrays.copyOf(data, 
len), hashCode);
+    longLongMap.put(v1, v2, metaStringBytes);
+    return metaStringBytes;
+  }
+
   private void updateDynamicString(MetaStringBytes byteString) {
     short currentDynamicReadId = dynamicReadStringId++;
     MetaStringBytes[] dynamicReadStringIds = this.dynamicReadStringIds;
diff --git a/java/fury-core/src/main/resources/META-INF/LICENSE 
b/java/fury-core/src/main/resources/META-INF/LICENSE
index 3fe4b8e3..29f6f372 100644
--- a/java/fury-core/src/main/resources/META-INF/LICENSE
+++ b/java/fury-core/src/main/resources/META-INF/LICENSE
@@ -243,6 +243,7 @@ The text of each license is also included in 
licenses/LICENSE-[project].txt.
       java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java
       
java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java
       java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
+      java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
       java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
       java/fury-core/src/main/java/org/apache/fury/type/Generics.java
 
diff --git 
a/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
 
b/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
index eb578bd5..7c751490 100644
--- 
a/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
+++ 
b/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties
@@ -204,6 +204,7 @@ 
Args=--initialize-at-build-time=org.apache.fury.memory.MemoryBuffer,\
     org.apache.fury.collection.IntArray,\
     org.apache.fury.collection.LazyMap,\
     org.apache.fury.collection.LongMap,\
+    org.apache.fury.collection.LongLongMap,\
     org.apache.fury.collection.MapStatistics,\
     org.apache.fury.collection.MultiKeyWeakMap,\
     org.apache.fury.collection.ObjectArray,\
diff --git 
a/java/fury-core/src/test/java/org/apache/fury/collection/LongLongMapTest.java 
b/java/fury-core/src/test/java/org/apache/fury/collection/LongLongMapTest.java
new file mode 100644
index 00000000..524c4a21
--- /dev/null
+++ 
b/java/fury-core/src/test/java/org/apache/fury/collection/LongLongMapTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fury.collection;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class LongLongMapTest {
+
+  @Test
+  public void testPut() {
+    LongLongMap<String> map = new LongLongMap<>(10, 0.5f);
+    map.put(1, 1, "a");
+    map.put(1, 2, "b");
+    map.put(1, 3, "c");
+    map.put(2, 1, "d");
+    map.put(3, 1, "f");
+    Assert.assertEquals(map.get(1, 1), "a");
+    Assert.assertEquals(map.get(1, 2), "b");
+    Assert.assertEquals(map.get(1, 3), "c");
+    Assert.assertEquals(map.get(2, 1), "d");
+    Assert.assertEquals(map.get(3, 1), "f");
+    for (int i = 1; i < 100; i++) {
+      map.put(i, i, "a" + i);
+      Assert.assertEquals(map.get(i, i), "a" + i);
+    }
+  }
+}
diff --git 
a/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java 
b/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
index 48ce9962..42728202 100644
--- a/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
+++ b/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
@@ -634,4 +634,16 @@ public class MemoryBufferTest {
       assertEquals(buf.readVarUint36Small(), 0); // overflow
     }
   }
+
+  @Test
+  public void testReadBytesAsInt64() {
+    for (MemoryBuffer buffer :
+        new MemoryBuffer[] {
+          MemoryUtils.buffer(16), 
MemoryUtils.wrap(ByteBuffer.allocateDirect(32)),
+        }) {
+      buffer.writeByte(10);
+      buffer.writeByte(20);
+      assertEquals(buffer.readBytesAsInt64(2), (20 << 8) | 10);
+    }
+  }
 }
diff --git 
a/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
 
b/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
index 9d8446d3..4fea7ef3 100644
--- 
a/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
+++ 
b/java/fury-core/src/test/java/org/apache/fury/resolver/MetaStringResolverTest.java
@@ -22,6 +22,7 @@ package org.apache.fury.resolver;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertTrue;
 
+import java.nio.ByteBuffer;
 import org.apache.fury.memory.MemoryBuffer;
 import org.apache.fury.memory.MemoryUtils;
 import org.apache.fury.meta.MetaString;
@@ -48,4 +49,23 @@ public class MetaStringResolverTest {
     }
     assertTrue(buffer.writerIndex() < str.getBytes().length + 128 * 4);
   }
+
+  @Test
+  public void testWriteSmallMetaString() {
+    for (MemoryBuffer buffer :
+        new MemoryBuffer[] {
+          MemoryUtils.buffer(32), 
MemoryUtils.wrap(ByteBuffer.allocateDirect(32)),
+        }) {
+      for (int i = 0; i < 32; i++) {
+        String str = StringUtils.random(i, 0);
+        MetaStringResolver resolver = new MetaStringResolver();
+        resolver.writeMetaStringBytes(
+            buffer,
+            resolver.getOrCreateMetaStringBytes(new MetaStringEncoder('.', 
'_').encode(str)));
+        String metaString2 = resolver.readMetaString(buffer);
+        assertEquals(metaString2.hashCode(), str.hashCode());
+        assertEquals(metaString2.getBytes(), str.getBytes());
+      }
+    }
+  }
 }
diff --git a/licenserc.toml b/licenserc.toml
index 46cd3e58..39821976 100644
--- a/licenserc.toml
+++ b/licenserc.toml
@@ -40,6 +40,7 @@ excludes = [
     "java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java",
     
"java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java",
     "java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java",
+    "java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java",
     
"java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java",
     
"java/fury-core/src/main/java/org/apache/fury/io/ClassLoaderObjectInputStream.java",
     "java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java",
diff --git a/python/pyfury/_fury.py b/python/pyfury/_fury.py
index 6552fe36..19a624ed 100644
--- a/python/pyfury/_fury.py
+++ b/python/pyfury/_fury.py
@@ -29,6 +29,7 @@ from typing import Dict, Tuple, TypeVar, Union, Iterable
 from pyfury.lib import mmh3
 
 from pyfury.buffer import Buffer
+from pyfury.meta.metastring import Encoding
 from pyfury.resolver import (
     MapRefResolver,
     NoRefResolver,
@@ -58,6 +59,7 @@ from pyfury._serializer import (
     PICKLE_STRONG_CACHE_CLASS_ID,
     PICKLE_CACHE_CLASS_ID,
     PickleCacheStub,
+    SMALL_STRING_THRESHOLD,
 )
 from pyfury.type import (
     FuryType,
@@ -510,7 +512,11 @@ class ClassResolver:
             self._dynamic_write_string_id += 1
             self._dynamic_written_enum_string.append(enum_string_bytes)
             buffer.write_varint32(enum_string_bytes.length << 1)
-            buffer.write_int64(enum_string_bytes.hashcode)
+            if enum_string_bytes.length <= SMALL_STRING_THRESHOLD:
+                # TODO(chaokunyang) support meta string encoding
+                buffer.write_int8(Encoding.UTF_8.value)
+            else:
+                buffer.write_int64(enum_string_bytes.hashcode)
             buffer.write_bytes(enum_string_bytes.data)
         else:
             buffer.write_varint32(((dynamic_write_string_id + 1) << 1) | 1)
@@ -520,15 +526,30 @@ class ClassResolver:
         length = header >> 1
         if header & 0b1 != 0:
             return self._dynamic_id_to_enum_str_list[length - 1]
-        hashcode = buffer.read_int64()
-        reader_index = buffer.reader_index
-        buffer.check_bound(reader_index, length)
-        buffer.reader_index = reader_index + length
-        enum_str = self._hash_to_enum_string.get(hashcode)
-        if enum_str is None:
-            str_bytes = buffer.get_bytes(reader_index, length)
-            enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
-            self._hash_to_enum_string[hashcode] = enum_str
+        if length <= SMALL_STRING_THRESHOLD:
+            buffer.read_int8()
+            if length <= 8:
+                v1 = buffer.read_bytes_as_int64(length)
+                v2 = 0
+            else:
+                v1 = buffer.read_int64()
+                v2 = buffer.read_bytes_as_int64(length - 8)
+            hashcode = v1 * 31 + v2
+            enum_str = self._hash_to_enum_string.get(hashcode)
+            if enum_str is None:
+                str_bytes = buffer.get_bytes(buffer.reader_index - length, 
length)
+                enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+                self._hash_to_enum_string[hashcode] = enum_str
+        else:
+            hashcode = buffer.read_int64()
+            reader_index = buffer.reader_index
+            buffer.check_bound(reader_index, length)
+            buffer.reader_index = reader_index + length
+            enum_str = self._hash_to_enum_string.get(hashcode)
+            if enum_str is None:
+                str_bytes = buffer.get_bytes(reader_index, length)
+                enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+                self._hash_to_enum_string[hashcode] = enum_str
         self._dynamic_id_to_enum_str_list.append(enum_str)
         return enum_str
 
diff --git a/python/pyfury/_serialization.pyx b/python/pyfury/_serialization.pyx
index 00641a42..3f2c6041 100644
--- a/python/pyfury/_serialization.pyx
+++ b/python/pyfury/_serialization.pyx
@@ -35,6 +35,7 @@ from pyfury._fury import _PicklerStub, _UnpicklerStub, 
Pickler, Unpickler
 from pyfury._fury import _ENABLE_CLASS_REGISTRATION_FORCIBLY
 from pyfury.error import ClassNotCompatibleError
 from pyfury.lib import mmh3
+from pyfury.meta.metastring import Encoding
 from pyfury.type import is_primitive_type, FuryType, Int8Type, Int16Type, 
Int32Type, \
     Int64Type, Float32Type, Float64Type, Int16ArrayType, Int32ArrayType, \
     Int64ArrayType, Float32ArrayType, Float64ArrayType, infer_field, load_class
@@ -45,6 +46,7 @@ from libcpp.vector cimport vector
 from cpython cimport PyObject
 from cpython.ref cimport *
 from libcpp cimport bool as c_bool
+from libcpp.utility cimport pair
 from cython.operator cimport dereference as deref
 from pyfury._util cimport Buffer
 from pyfury.includes.libabsl cimport flat_hash_map
@@ -233,6 +235,7 @@ cdef int32_t NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 
0b11111111 | \
                                     (PYBOOL_CLASS_ID << 9)
 cdef int32_t NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \
                                     (STRING_CLASS_ID << 9)
+cdef int32_t SMALL_STRING_THRESHOLD = 16
 
 
 cdef class BufferObject:
@@ -307,6 +310,7 @@ cdef class ClassResolver:
         flat_hash_map[int64_t, PyObject*] _c_hash_to_classinfo
         # hash -> MetaStringBytes
         flat_hash_map[int64_t, PyObject*] _c_hash_to_enum_string_bytes
+        flat_hash_map[pair[int64_t, int64_t], PyObject*] 
_c_hash_to_small_metastring_bytes
         # classname MetaStringBytes address -> class
         flat_hash_map[uint64_t, PyObject*] _c_str_bytes_to_class
         # classname MetaStringBytes address -> str
@@ -659,13 +663,17 @@ cdef class ClassResolver:
     cdef inline _write_enum_string_bytes(
             self, Buffer buffer, MetaStringBytes enum_string_bytes):
         cdef int16_t dynamic_class_id = 
enum_string_bytes.dynamic_write_string_id
+        cdef int32_t length = enum_string_bytes.length
         if dynamic_class_id == DEFAULT_DYNAMIC_WRITE_STRING_ID:
             dynamic_class_id = self.dynamic_write_string_id
             enum_string_bytes.dynamic_write_string_id = dynamic_class_id
             self.dynamic_write_string_id += 1
             
self._c_dynamic_written_enum_string.push_back(<PyObject*>enum_string_bytes)
-            buffer.write_varint32(enum_string_bytes.length << 1)
-            buffer.write_int64(enum_string_bytes.hashcode)
+            buffer.write_varint32(length << 1)
+            if length <= SMALL_STRING_THRESHOLD:
+                buffer.write_int8(Encoding.UTF_8.value)
+            else:
+                buffer.write_int64(enum_string_bytes.hashcode)
             buffer.write_bytes(enum_string_bytes.data)
         else:
             buffer.write_varint32(((dynamic_class_id + 1) << 1) | 1)
@@ -675,21 +683,40 @@ cdef class ClassResolver:
         cdef int32_t length = header >> 1
         if header & 0b1 != 0:
             return 
<MetaStringBytes>self._c_dynamic_id_to_enum_string_vec[length - 1]
-        cdef int64_t hashcode = buffer.read_int64()
-        cdef int32_t reader_index = buffer.reader_index
-        buffer.check_bound(reader_index, length)
-        buffer.reader_index = reader_index + length
-        cdef PyObject* enum_str_ptr = 
self._c_hash_to_enum_string_bytes[hashcode]
-        if enum_str_ptr != NULL:
-            self._c_dynamic_id_to_enum_string_vec.push_back(enum_str_ptr)
-            return <MetaStringBytes>enum_str_ptr
-        cdef bytes str_bytes = buffer.get_bytes(reader_index, length)
-        cdef MetaStringBytes enum_str = MetaStringBytes(str_bytes, 
hashcode=hashcode)
-        self._enum_str_set.add(enum_str)
-        enum_str_ptr = <PyObject*>enum_str
-        self._c_hash_to_enum_string_bytes[hashcode] = enum_str_ptr
+        cdef int64_t v1 = 0, v2 = 0, hashcode
+        cdef PyObject* enum_str_ptr
+        cdef int32_t reader_index
+        if length <= SMALL_STRING_THRESHOLD:
+            # TODO(chaokunyang) support metastring encoding
+            buffer.read_int8()
+            if length <= 8:
+                v1 = buffer.read_bytes_as_int64(length)
+            else:
+                v1 = buffer.read_int64()
+                v2 = buffer.read_bytes_as_int64(length - 8)
+            hashcode = v1 * 31 + v2
+            enum_str_ptr = 
self._c_hash_to_small_metastring_bytes[pair[int64_t, int64_t](v1, v2)]
+            if enum_str_ptr == NULL:
+                reader_index = buffer.reader_index
+                str_bytes = buffer.get_bytes(reader_index - length, length)
+                enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+                self._enum_str_set.add(enum_str)
+                enum_str_ptr = <PyObject*>enum_str
+                self._c_hash_to_small_metastring_bytes[pair[int64_t, 
int64_t](v1, v2)] = enum_str_ptr
+        else:
+            hashcode = buffer.read_int64()
+            reader_index = buffer.reader_index
+            buffer.check_bound(reader_index, length)
+            buffer.reader_index = reader_index + length
+            enum_str_ptr = self._c_hash_to_enum_string_bytes[hashcode]
+            if enum_str_ptr == NULL:
+                str_bytes = buffer.get_bytes(reader_index, length)
+                enum_str = MetaStringBytes(str_bytes, hashcode=hashcode)
+                self._enum_str_set.add(enum_str)
+                enum_str_ptr = <PyObject*>enum_str
+                self._c_hash_to_enum_string_bytes[hashcode] = enum_str_ptr
         self._c_dynamic_id_to_enum_string_vec.push_back(enum_str_ptr)
-        return enum_str
+        return <MetaStringBytes>enum_str_ptr
 
     cpdef inline xwrite_class(self, Buffer buffer, cls):
         cdef PyObject* classinfo_ptr = 
self._c_classes_info[<uintptr_t><PyObject*>cls]
diff --git a/python/pyfury/_serializer.py b/python/pyfury/_serializer.py
index e3c8dbea..c6f39b16 100644
--- a/python/pyfury/_serializer.py
+++ b/python/pyfury/_serializer.py
@@ -59,6 +59,7 @@ NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | 
(PYINT_CLASS_ID << 9)
 NOT_NULL_PYFLOAT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYFLOAT_CLASS_ID 
<< 9)
 NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYBOOL_CLASS_ID << 
9)
 NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (STRING_CLASS_ID << 
9)
+SMALL_STRING_THRESHOLD = 16
 
 
 class _PickleStub:
diff --git a/python/pyfury/_util.pxd b/python/pyfury/_util.pxd
index 76ebe0fc..77f76ace 100644
--- a/python/pyfury/_util.pxd
+++ b/python/pyfury/_util.pxd
@@ -152,6 +152,8 @@ cdef class Buffer:
 
     cpdef inline bytes read_bytes(self, int32_t length)
 
+    cpdef inline int64_t read_bytes_as_int64(self, int32_t length)
+
     cpdef inline put_bytes(self, uint32_t offset, bytes value)
 
     cpdef inline bytes get_bytes(self, uint32_t offset, uint32_t nbytes)
diff --git a/python/pyfury/_util.pyx b/python/pyfury/_util.pyx
index 57a913e8..b7587056 100644
--- a/python/pyfury/_util.pyx
+++ b/python/pyfury/_util.pyx
@@ -237,6 +237,20 @@ cdef class Buffer:
         self.reader_index += length
         return value
 
+    cpdef inline int64_t read_bytes_as_int64(self, int32_t length):
+        cdef int32_t size_ = self.c_buffer.get().size()
+        cdef int64_t result
+        cdef int32_t i
+        # if offset + length > size_:
+        if size_- (self.reader_index + 8) > 0:
+            result = self.get_int64(self.reader_index)
+            result = result & (0xffffffffffffffffL >> ((8 - length) * 8))
+        else:
+            for i in range(length):
+                result = result | (<int64_t>(self.read_int8()) & 0xff) << (i * 
8)
+        self.reader_index += length
+        return result
+
     cpdef inline put_bytes(self, uint32_t offset, bytes value):
         cdef const unsigned char[:] data = value
         cdef int32_t length = data.nbytes


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to