This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 2199d4a78 feat(java/python): align java and python compatible mode 
serialization (#2602)
2199d4a78 is described below

commit 2199d4a78efcdb215bf7ff76c2cdccea94531d1e
Author: Shawn Yang <[email protected]>
AuthorDate: Thu Sep 11 00:38:40 2025 +0800

    feat(java/python): align java and python compatible mode serialization 
(#2602)
    
    ## Why?
    
    align java and python compatible mode serialization
    
    ## What does this PR do?
    
    <!-- Describe the details of this PR. -->
    
    ## Related issues
    
    https://github.com/apache/fory/pull/2509
    https://github.com/apache/fory/issues/1938
    https://github.com/apache/fory/issues/2160
    https://github.com/apache/fory/pull/2278
    #2593
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fory/issues/new/choose) describing the
    need to do so and update the document if necessary.
    
    Delete section if not applicable.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    
    Delete section if not applicable.
    -->
---
 cpp/fory/type/type.h                               | 12 ++++
 .../src/main/java/org/apache/fory/Fory.java        |  4 ++
 .../org/apache/fory/resolver/ClassResolver.java    | 13 ++--
 .../org/apache/fory/resolver/XtypeResolver.java    | 33 +++++++---
 .../java/org/apache/fory/CrossLanguageTest.java    | 15 +++--
 .../test/java/org/apache/fory/ForyTestBase.java    |  5 ++
 python/pyfory/_fory.py                             |  5 +-
 python/pyfory/_registry.py                         | 24 +++----
 python/pyfory/_serialization.pyx                   | 75 ++++++++++++++--------
 python/pyfory/_struct.py                           |  6 +-
 python/pyfory/includes/libserialization.pxd        |  1 +
 python/pyfory/meta/metastring.py                   |  4 +-
 python/pyfory/meta/typedef.py                      | 23 ++++---
 python/pyfory/meta/typedef_decoder.py              | 11 ++--
 python/pyfory/meta/typedef_encoder.py              | 13 ++--
 python/pyfory/tests/test_cross_language.py         |  8 ++-
 python/pyfory/type.py                              |  2 +-
 17 files changed, 165 insertions(+), 89 deletions(-)

diff --git a/cpp/fory/type/type.h b/cpp/fory/type/type.h
index a70987826..a2e8d8078 100644
--- a/cpp/fory/type/type.h
+++ b/cpp/fory/type/type.h
@@ -119,4 +119,16 @@ inline bool IsNamespacedType(int32_t type_id) {
   }
 }
 
+inline bool IsTypeShareMeta(int32_t type_id) {
+  switch (static_cast<TypeId>(type_id)) {
+  case TypeId::NAMED_ENUM:
+  case TypeId::NAMED_STRUCT:
+  case TypeId::NAMED_EXT:
+  case TypeId::COMPATIBLE_STRUCT:
+  case TypeId::NAMED_COMPATIBLE_STRUCT:
+    return true;
+  default:
+    return false;
+  }
+}
 } // namespace fory
diff --git a/java/fory-core/src/main/java/org/apache/fory/Fory.java 
b/java/fory-core/src/main/java/org/apache/fory/Fory.java
index e01a7994d..61073d13b 100644
--- a/java/fory-core/src/main/java/org/apache/fory/Fory.java
+++ b/java/fory-core/src/main/java/org/apache/fory/Fory.java
@@ -1703,6 +1703,10 @@ public final class Fory implements BaseFory {
     return crossLanguage;
   }
 
+  public boolean isCompatible() {
+    return config.getCompatibleMode() == CompatibleMode.COMPATIBLE;
+  }
+
   public boolean trackingRef() {
     return refTracking;
   }
diff --git 
a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java 
b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java
index 471a6069e..818b56c22 100644
--- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java
+++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java
@@ -1725,16 +1725,11 @@ public class ClassResolver implements TypeResolver {
 
   private void writeClassDefs(
       MemoryBuffer buffer, ObjectArray<ClassDef> writingClassDefs, int size) {
-    int writerIndex = buffer.writerIndex();
     for (int i = 0; i < size; i++) {
-      byte[] encoded = writingClassDefs.get(i).getEncoded();
-      int bytesLen = encoded.length;
-      buffer.ensure(writerIndex + bytesLen);
-      final byte[] targetArray = buffer.getHeapMemory();
-      System.arraycopy(encoded, 0, targetArray, writerIndex, bytesLen);
-      writerIndex += bytesLen;
-    }
-    buffer.writerIndex(writerIndex);
+      buffer.writeBytes(writingClassDefs.get(i).getEncoded());
+      MemoryBuffer memoryBuffer = 
MemoryBuffer.fromByteArray(writingClassDefs.get(i).getEncoded());
+      ClassDef.readClassDef(fory, memoryBuffer, memoryBuffer.readInt64());
+    }
   }
 
   /**
diff --git 
a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java 
b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java
index ca7f83e2c..3c32204c9 100644
--- a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java
+++ b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java
@@ -38,11 +38,13 @@ import java.time.Instant;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -210,7 +212,8 @@ public class XtypeResolver implements TypeResolver {
     short xtypeId;
     if (serializer != null) {
       if (isStructType(serializer)) {
-        xtypeId = Types.NAMED_STRUCT;
+        xtypeId =
+            (short) (fory.isCompatible() ? Types.NAMED_COMPATIBLE_STRUCT : 
Types.NAMED_STRUCT);
       } else if (serializer instanceof EnumSerializer) {
         xtypeId = Types.NAMED_ENUM;
       } else {
@@ -220,7 +223,8 @@ public class XtypeResolver implements TypeResolver {
       if (type.isEnum()) {
         xtypeId = Types.NAMED_ENUM;
       } else {
-        xtypeId = Types.NAMED_STRUCT;
+        xtypeId =
+            (short) (fory.isCompatible() ? Types.NAMED_COMPATIBLE_STRUCT : 
Types.NAMED_STRUCT);
       }
     }
     register(type, serializer, namespace, typeName, xtypeId);
@@ -530,9 +534,9 @@ public class XtypeResolver implements TypeResolver {
     registerDefaultTypes(Types.INT64_ARRAY, long[].class);
     registerDefaultTypes(Types.FLOAT32_ARRAY, float[].class);
     registerDefaultTypes(Types.FLOAT64_ARRAY, double[].class);
-    registerDefaultTypes(Types.LIST, ArrayList.class, Object[].class);
-    registerDefaultTypes(Types.SET, HashSet.class, LinkedHashSet.class);
-    registerDefaultTypes(Types.MAP, HashMap.class, LinkedHashMap.class);
+    registerDefaultTypes(Types.LIST, ArrayList.class, Object[].class, 
List.class, Collection.class);
+    registerDefaultTypes(Types.SET, HashSet.class, LinkedHashSet.class, 
Set.class);
+    registerDefaultTypes(Types.MAP, HashMap.class, LinkedHashMap.class, 
Map.class);
     registerDefaultTypes(Types.LOCAL_DATE, LocalDate.class);
   }
 
@@ -542,8 +546,12 @@ public class XtypeResolver implements TypeResolver {
     classInfoMap.put(defaultType, classInfo);
     xtypeIdToClassMap.put(xtypeId, classInfo);
     for (Class<?> otherType : otherTypes) {
-      classInfo = newClassInfo(otherType, 
classResolver.getSerializer(otherType), (short) xtypeId);
-      classInfoMap.put(otherType, classInfo);
+      Serializer<?> serializer =
+          ReflectionUtils.isAbstract(otherType)
+              ? classInfo.serializer
+              : classResolver.getSerializer(otherType);
+      ClassInfo info = newClassInfo(otherType, serializer, (short) xtypeId);
+      classInfoMap.put(otherType, info);
     }
   }
 
@@ -561,7 +569,6 @@ public class XtypeResolver implements TypeResolver {
     switch (internalTypeId) {
       case Types.NAMED_ENUM:
       case Types.NAMED_STRUCT:
-      case Types.NAMED_COMPATIBLE_STRUCT:
       case Types.NAMED_EXT:
         if (shareMeta) {
           writeSharedClassMeta(buffer, classInfo);
@@ -572,6 +579,11 @@ public class XtypeResolver implements TypeResolver {
         assert classInfo.typeNameBytes != null;
         metaStringResolver.writeMetaStringBytes(buffer, 
classInfo.typeNameBytes);
         break;
+      case Types.NAMED_COMPATIBLE_STRUCT:
+      case Types.COMPATIBLE_STRUCT:
+        assert shareMeta : "Meta share must be enabled for compatible mode";
+        writeSharedClassMeta(buffer, classInfo);
+        break;
       default:
         break;
     }
@@ -634,7 +646,6 @@ public class XtypeResolver implements TypeResolver {
     switch (internalTypeId) {
       case Types.NAMED_ENUM:
       case Types.NAMED_STRUCT:
-      case Types.NAMED_COMPATIBLE_STRUCT:
       case Types.NAMED_EXT:
         if (shareMeta) {
           return readSharedClassMeta(buffer);
@@ -642,6 +653,10 @@ public class XtypeResolver implements TypeResolver {
         MetaStringBytes packageBytes = 
metaStringResolver.readMetaStringBytes(buffer);
         MetaStringBytes simpleClassNameBytes = 
metaStringResolver.readMetaStringBytes(buffer);
         return loadBytesToClassInfo(internalTypeId, packageBytes, 
simpleClassNameBytes);
+      case Types.NAMED_COMPATIBLE_STRUCT:
+      case Types.COMPATIBLE_STRUCT:
+        assert shareMeta : "Meta share must be enabled for compatible mode";
+        return readSharedClassMeta(buffer);
       case Types.LIST:
         return getListClassInfo();
       case Types.TIMESTAMP:
diff --git 
a/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java 
b/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
index 21d49a386..bc74fef00 100644
--- a/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
+++ b/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
@@ -483,11 +483,13 @@ public class CrossLanguageTest extends ForyTestBase {
     roundBytes("test_struct_hash", buffer.getBytes(0, 4));
   }
 
-  @Test
-  public void testSerializeSimpleStruct() throws Exception {
+  @Test(dataProvider = "compatible")
+  public void testSerializeSimpleStruct(boolean compatible) throws Exception {
     Fory fory =
         Fory.builder()
             .withLanguage(Language.XLANG)
+            .withCompatibleMode(
+                compatible ? CompatibleMode.COMPATIBLE : 
CompatibleMode.SCHEMA_CONSISTENT)
             .withRefTracking(true)
             .requireClassRegistration(false)
             .build();
@@ -495,7 +497,7 @@ public class CrossLanguageTest extends ForyTestBase {
     ComplexObject2 obj2 = new ComplexObject2();
     obj2.f1 = true;
     obj2.f2 = new HashMap<>(ImmutableMap.of((byte) -1, 2));
-    structRoundBack(fory, obj2, "test_serialize_simple_struct");
+    structRoundBack(fory, obj2, "test_serialize_simple_struct" + (compatible ? 
"_compatible" : ""));
   }
 
   @Test
@@ -530,10 +532,13 @@ public class CrossLanguageTest extends ForyTestBase {
     Assert.assertEquals(fory.deserialize(serialized), obj);
   }
 
-  public void testSerializeComplexStruct() throws Exception {
+  @Test(dataProvider = "compatible")
+  public void testSerializeComplexStruct(boolean compatible) throws Exception {
     Fory fory =
         Fory.builder()
             .withLanguage(Language.XLANG)
+            .withCompatibleMode(
+                compatible ? CompatibleMode.COMPATIBLE : 
CompatibleMode.SCHEMA_CONSISTENT)
             .withRefTracking(true)
             .requireClassRegistration(false)
             .build();
@@ -556,7 +561,7 @@ public class CrossLanguageTest extends ForyTestBase {
     obj.f11 = new short[] {(short) 1, (short) 2};
     obj.f12 = ImmutableList.of((short) -1, (short) 4);
 
-    structRoundBack(fory, obj, "test_serialize_complex_struct");
+    structRoundBack(fory, obj, "test_serialize_complex_struct" + (compatible ? 
"_compatible" : ""));
   }
 
   private void structRoundBack(Fory fory, Object obj, String testName) throws 
IOException {
diff --git a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java 
b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java
index 2bb24606e..f2d595688 100644
--- a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java
+++ b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java
@@ -131,6 +131,11 @@ public abstract class ForyTestBase {
     return new Object[][] {{false}, {true}};
   }
 
+  @DataProvider
+  public static Object[][] compatible() {
+    return new Object[][] {{false}, {true}};
+  }
+
   @DataProvider
   public static Object[][] compressNumber() {
     return new Object[][] {{false}, {true}};
diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py
index b05b37ffc..0d7f6adbe 100644
--- a/python/pyfory/_fory.py
+++ b/python/pyfory/_fory.py
@@ -144,15 +144,14 @@ class Fory:
             self.ref_resolver = MapRefResolver()
         else:
             self.ref_resolver = NoRefResolver()
-        from pyfory._serialization import MetaStringResolver
+        from pyfory._serialization import MetaStringResolver, 
SerializationContext
         from pyfory._registry import TypeResolver
 
         self.metastring_resolver = MetaStringResolver()
         self.type_resolver = TypeResolver(self, meta_share=compatible)
+        self.serialization_context = SerializationContext(fory=self, 
scoped_meta_share_enabled=compatible)
         self.type_resolver.initialize()
-        from pyfory._serialization import SerializationContext
 
-        self.serialization_context = 
SerializationContext(scoped_meta_share_enabled=compatible)
         self.buffer = Buffer.allocate(32)
         if not require_type_registration:
             warnings.warn(
diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py
index 8b421f036..6f4ae76e9 100644
--- a/python/pyfory/_registry.py
+++ b/python/pyfory/_registry.py
@@ -169,6 +169,7 @@ class TypeResolver:
         "_type_id_to_typeinfo",
         "_meta_shared_typeinfo",
         "meta_share",
+        "serialization_context",
     )
 
     def __init__(self, fory, meta_share=False):
@@ -202,6 +203,7 @@ class TypeResolver:
         self._initialize_xlang()
         if self.fory.language == Language.PYTHON:
             self._initialize_py()
+        self.serialization_context = self.fory.serialization_context
 
     def _initialize_py(self):
         register = functools.partial(self._register_type, internal=True)
@@ -369,7 +371,10 @@ class TypeResolver:
                 type_id = TypeId.NAMED_EXT if type_id is None else ((type_id 
<< 8) + TypeId.EXT)
             else:
                 serializer = None
-                type_id = TypeId.NAMED_STRUCT if type_id is None else 
((type_id << 8) + TypeId.STRUCT)
+                if self.meta_share:
+                    type_id = TypeId.NAMED_COMPATIBLE_STRUCT if type_id is 
None else ((type_id << 8) + TypeId.COMPATIBLE_STRUCT)
+                else:
+                    type_id = TypeId.NAMED_STRUCT if type_id is None else 
((type_id << 8) + TypeId.STRUCT)
         elif not internal:
             type_id = TypeId.NAMED_EXT if type_id is None else ((type_id << 8) 
+ TypeId.EXT)
         return self.__register_type(
@@ -607,14 +612,12 @@ class TypeResolver:
     def write_typeinfo(self, buffer, typeinfo):
         if typeinfo.dynamic_type:
             return
-        type_id = typeinfo.type_id
-        internal_type_id = type_id & 0xFF
-
         # Check if meta share is enabled first
         if self.meta_share:
             self.write_shared_type_meta(buffer, typeinfo)
             return
-
+        type_id = typeinfo.type_id
+        internal_type_id = type_id & 0xFF
         buffer.write_varuint32(type_id)
         if TypeId.is_namespaced_type(internal_type_id):
             self.metastring_resolver.write_meta_string_bytes(buffer, 
typeinfo.namespace_bytes)
@@ -659,17 +662,14 @@ class TypeResolver:
 
     def write_shared_type_meta(self, buffer, typeinfo):
         """Write shared type meta information."""
-        assert typeinfo.type_def is not None, "Type info must be set when meta 
share is enabled"
         meta_context = self.fory.serialization_context.meta_context
-        meta_context.write_typeinfo(buffer, typeinfo)
+        meta_context.write_shared_typeinfo(buffer, typeinfo)
 
     def read_shared_type_meta(self, buffer):
         """Read shared type meta information."""
-        meta_context = self.fory.serialization_context.meta_context
+        meta_context = self.serialization_context.meta_context
         assert meta_context is not None, "Meta context must be set when meta 
share is enabled"
-        type_id = buffer.read_varuint32()
-        typeinfo = meta_context.get_read_type_info(type_id)
-        assert typeinfo is not None, f"Type info not found for ID {type_id}"
+        typeinfo = meta_context.read_shared_typeinfo(buffer)
         return typeinfo
 
     def write_type_defs(self, buffer):
@@ -704,7 +704,7 @@ class TypeResolver:
                 type_info = self._build_type_info_from_typedef(type_def)
                 # Cache the tuple for future use
                 self._meta_shared_typeinfo[header] = type_info
-            meta_context.add_read_type_info(type_info)
+            meta_context.add_read_typeinfo(type_info)
 
     def _build_type_info_from_typedef(self, type_def):
         """Build TypeInfo from TypeDef using TypeDef's create_serializer 
method."""
diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx
index 458eb640f..ad9ec5033 100644
--- a/python/pyfory/_serialization.pyx
+++ b/python/pyfory/_serialization.pyx
@@ -37,7 +37,7 @@ from pyfory.meta.metastring import Encoding
 from pyfory.type import is_primitive_type
 from pyfory.util import is_little_endian
 from pyfory.includes.libserialization cimport \
-    (TypeId, IsNamespacedType, Fory_PyBooleanSequenceWriteToBuffer, 
Fory_PyFloatSequenceWriteToBuffer)
+    (TypeId, IsNamespacedType, IsTypeShareMeta, 
Fory_PyBooleanSequenceWriteToBuffer, Fory_PyFloatSequenceWriteToBuffer)
 
 from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, uint64_t
 from libc.stdint cimport *
@@ -447,7 +447,7 @@ cdef class TypeResolver:
         flat_hash_map[pair[int64_t, int64_t], PyObject *] 
_c_meta_hash_to_typeinfo
         MetaStringResolver meta_string_resolver
         c_bool meta_share
-        SerializationContext serialization_context
+        readonly SerializationContext serialization_context
 
     def __init__(self, fory, meta_share=False):
         self.fory = fory
@@ -455,12 +455,12 @@ cdef class TypeResolver:
         self.meta_share = meta_share
         from pyfory._registry import TypeResolver
         self._resolver = TypeResolver(fory, meta_share=meta_share)
-        self.serialization_context = fory.serialization_context
 
     def initialize(self):
         self._resolver.initialize()
         for typeinfo in self._resolver._types_info.values():
             self._populate_typeinfo(typeinfo)
+        self.serialization_context = self.fory.serialization_context
 
     def register(
         self,
@@ -583,7 +583,7 @@ cdef class TypeResolver:
             int32_t type_id = buffer.read_varuint32()
         if type_id < 0:
             type_id = -type_id
-        if type_id > self._c_registered_id_to_type_info.size():
+        if type_id >= self._c_registered_id_to_type_info.size():
             raise ValueError(f"Unexpected type_id {type_id}")
         cdef:
             int32_t internal_type_id = type_id & 0xFF
@@ -598,8 +598,14 @@ cdef class TypeResolver:
         typeinfo = <TypeInfo> typeinfo_ptr
         return typeinfo
     
-    def get_typeinfo_by_id(self, type_id):
-        return self._resolver.get_typeinfo_by_id(type_id=type_id)
+    cpdef inline TypeInfo get_typeinfo_by_id(self, int32_t type_id):
+        if type_id >= self._c_registered_id_to_type_info.size() or type_id < 0 
or IsNamespacedType(type_id & 0xFF):
+            raise ValueError(f"Unexpected type_id {type_id}")
+        typeinfo_ptr = self._c_registered_id_to_type_info[type_id]
+        if typeinfo_ptr == NULL:
+            raise ValueError(f"Unexpected type_id {type_id}")
+        typeinfo = <TypeInfo> typeinfo_ptr
+        return typeinfo        
 
     def get_typeinfo_by_name(self, namespace, typename):
         return self._resolver.get_typeinfo_by_name(namespace=namespace, 
typename=typename)
@@ -610,16 +616,12 @@ cdef class TypeResolver:
     cpdef write_shared_type_meta(self, Buffer buffer, TypeInfo typeinfo):
         """Write shared type meta information."""
         meta_context = self.serialization_context.meta_context
-        assert meta_context is not None, "Meta context must be set when meta 
share is enabled"
-        meta_context.write_typeinfo(buffer, typeinfo)
+        meta_context.write_shared_typeinfo(buffer, typeinfo)
 
     cpdef TypeInfo read_shared_type_meta(self, Buffer buffer):
         """Read shared type meta information."""
         meta_context = self.serialization_context.meta_context
-        assert meta_context is not None, "Meta context must be set when meta 
share is enabled"
-        type_id = buffer.read_varuint32()
-        typeinfo = meta_context.get_read_type_info(type_id)
-        assert typeinfo is not None, f"Type info not found for ID {type_id}"
+        typeinfo = meta_context.read_shared_typeinfo(buffer)
         return typeinfo
 
     cpdef write_type_defs(self, Buffer buffer):
@@ -646,7 +648,7 @@ cdef class MetaContext:
     Context for sharing type meta across multiple serialization. Type name, 
field name and field
     type will be shared between different serialization.
     
-    This is the Cython-optimized equivalent of Java's MetaContext class.
+    Note that this context is not thread-safe, you should use it with one Fory 
instance.
     """
     cdef:
         # Types which have sent definitions to peer
@@ -656,14 +658,24 @@ cdef class MetaContext:
         # Counter for assigning new IDs
         list _writing_type_defs
         list _read_type_infos
+        object fory
+        object type_resolver
     
-    def __cinit__(self):
+    def __cinit__(self, object fory):
+        self.fory = fory
+        self.type_resolver = fory.type_resolver
         self._writing_type_defs = []
         self._read_type_infos = []
     
-    cpdef inline int32_t write_typeinfo(self, Buffer buffer, typeinfo):
+    cpdef inline void write_shared_typeinfo(self, Buffer buffer, typeinfo):
         """Add a type definition to the writing queue."""
         type_cls = typeinfo.cls
+        cdef int32_t type_id = typeinfo.type_id
+        cdef int32_t internal_type_id = type_id & 0xFF
+        buffer.write_varuint32(type_id)
+        if not IsTypeShareMeta(internal_type_id):
+            return
+
         cdef uint64_t type_addr = <uint64_t> <PyObject *> type_cls
         cdef flat_hash_map[uint64_t, int32_t].iterator it = 
self._c_type_map.find(type_addr)
         if it != self._c_type_map.end():
@@ -684,13 +696,16 @@ cdef class MetaContext:
         self._writing_type_defs.clear()
         self._c_type_map.clear()
     
-    cpdef inline add_read_type_info(self, type_info):
+    cpdef inline add_read_typeinfo(self, type_info):
         """Add a type info read from peer."""
         self._read_type_infos.append(type_info)
     
-    cpdef inline get_read_type_info(self, int32_t index):
-        """Get a type info by index."""
-        return self._read_type_infos[index]
+    cpdef inline read_shared_typeinfo(self, Buffer buffer):
+        """Read a type info from buffer."""
+        cdef type_id = buffer.read_varuint32()
+        if IsTypeShareMeta(type_id & 0xFF):
+            return self._read_type_infos[buffer.read_varuint32()]
+        return self.type_resolver.get_typeinfo_by_id(type_id)
     
     cpdef inline reset_read(self):
         """Reset read state."""
@@ -701,26 +716,34 @@ cdef class MetaContext:
         self.reset_write()
         self.reset_read()
     
+    def __str__(self):
+        return self.__repr__()
+    
     def __repr__(self):
         return (f"MetaContext("
-                f"read_defs={len(self._read_type_defs)}, "
-                f"read_infos={len(self._read_type_infos)}, "
-                f"writing_defs={len(self._writing_type_defs)})")
+                f"read_infos={self._read_type_infos}, "
+                f"writing_defs={self._writing_type_defs})")
 
 
 @cython.final
 cdef class SerializationContext:
+    """
+    Context for sharing data across multiple serialization.
+    Note that this context is not thread-safe, you should use it with one Fory 
instance.
+    """
     cdef dict objects
     cdef readonly bint scoped_meta_share_enabled
-    cdef public object meta_context
+    cdef public MetaContext meta_context
+    cdef public object fory
 
-    def __init__(self, scoped_meta_share_enabled: bool = False):
+    def __init__(self, object fory, scoped_meta_share_enabled: bool = False):
         self.objects = dict()
         self.scoped_meta_share_enabled = scoped_meta_share_enabled
         if scoped_meta_share_enabled:
-            self.meta_context = MetaContext()
+            self.meta_context = MetaContext(fory)
         else:
             self.meta_context = None
+        self.fory = fory
 
     def add(self, key, obj):
         self.objects[id(key)] = obj
@@ -801,8 +824,8 @@ cdef class Fory:
         self.ref_resolver = MapRefResolver(ref_tracking)
         self.is_py = self.language == Language.PYTHON
         self.metastring_resolver = MetaStringResolver()
-        self.serialization_context = 
SerializationContext(scoped_meta_share_enabled=compatible)
         self.type_resolver = TypeResolver(self, meta_share=compatible)
+        self.serialization_context = SerializationContext(fory=self, 
scoped_meta_share_enabled=compatible)
         self.type_resolver.initialize()
         self.buffer = Buffer.allocate(32)
         if not require_type_registration:
diff --git a/python/pyfory/_struct.py b/python/pyfory/_struct.py
index ef1cdc68a..032740f7f 100644
--- a/python/pyfory/_struct.py
+++ b/python/pyfory/_struct.py
@@ -228,8 +228,10 @@ class StructTypeIdVisitor(TypeVisitor):
     def __init__(
         self,
         fory,
+        cls,
     ):
         self.fory = fory
+        self.cls = cls
 
     def visit_list(self, field_name, elem_type, types_path=None):
         # Infer type recursively for type such as List[Dict[str, str]]
@@ -243,7 +245,9 @@ class StructTypeIdVisitor(TypeVisitor):
         return TypeId.MAP, key_ids, value_ids
 
     def visit_customized(self, field_name, type_, types_path=None):
-        typeinfo = self.fory.type_resolver.get_typeinfo(type_)
+        typeinfo = self.fory.type_resolver.get_typeinfo(type_, create=False)
+        if typeinfo is None:
+            return [TypeId.UNKNOWN]
         return [typeinfo.type_id]
 
     def visit_other(self, field_name, type_, types_path=None):
diff --git a/python/pyfory/includes/libserialization.pxd 
b/python/pyfory/includes/libserialization.pxd
index d09bebf10..d25b37bf7 100644
--- a/python/pyfory/includes/libserialization.pxd
+++ b/python/pyfory/includes/libserialization.pxd
@@ -65,6 +65,7 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil:
         BOUND = 64
 
     cdef c_bool IsNamespacedType(int32_t type_id)
+    cdef c_bool IsTypeShareMeta(int32_t type_id)
 
 cdef extern from "fory/python/pyfory.h" namespace "fory":
     int Fory_PyBooleanSequenceWriteToBuffer(object collection, CBuffer 
*buffer, Py_ssize_t start_index)
diff --git a/python/pyfory/meta/metastring.py b/python/pyfory/meta/metastring.py
index f0dc4eca2..a3832be9e 100644
--- a/python/pyfory/meta/metastring.py
+++ b/python/pyfory/meta/metastring.py
@@ -268,7 +268,7 @@ class MetaStringEncoder:
         self.special_char1 = special_char1
         self.special_char2 = special_char2
 
-    def encode(self, input_string: str) -> MetaString:
+    def encode(self, input_string: str, encodings: List[Encoding] = None) -> 
MetaString:
         """
         Encodes the input string into a MetaString object.
 
@@ -291,7 +291,7 @@ class MetaStringEncoder:
                 self.special_char2,
             )
 
-        encoding = self.compute_encoding(input_string)
+        encoding = self.compute_encoding(input_string, encodings)
         return self.encode_with_encoding(input_string, encoding)
 
     def encode_with_encoding(self, input_string: str, encoding: Encoding) -> 
MetaString:
diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py
index cdc43e686..00f2710aa 100644
--- a/python/pyfory/meta/typedef.py
+++ b/python/pyfory/meta/typedef.py
@@ -19,7 +19,7 @@ from typing import List
 import typing
 from pyfory.type import TypeId
 from pyfory._util import Buffer
-from pyfory.type import infer_field, is_primitive_type, is_polymorphic_type, 
is_struct_type
+from pyfory.type import infer_field, is_polymorphic_type
 from pyfory.meta.metastring import Encoding
 
 
@@ -32,6 +32,9 @@ HAS_FIELDS_META_FLAG = 0b1 << 12
 META_SIZE_MASKS = 0xFFF
 NUM_HASH_BITS = 50
 
+NAMESPACE_ENCODINGS = [Encoding.UTF_8, Encoding.ALL_TO_LOWER_SPECIAL, 
Encoding.LOWER_UPPER_DIGIT_SPECIAL]
+TYPE_NAME_ENCODINGS = [Encoding.UTF_8, Encoding.LOWER_UPPER_DIGIT_SPECIAL, 
Encoding.FIRST_TO_LOWER_SPECIAL, Encoding.ALL_TO_LOWER_SPECIAL]
+
 # Field name encoding constants
 FIELD_NAME_ENCODING_UTF8 = 0b00
 FIELD_NAME_ENCODING_ALL_TO_LOWER_SPECIAL = 0b01
@@ -219,7 +222,7 @@ def build_field_infos(type_resolver, cls):
     type_hints = typing.get_type_hints(cls)
 
     field_infos = []
-    visitor = StructTypeIdVisitor(type_resolver.fory)
+    visitor = StructTypeIdVisitor(type_resolver.fory, cls)
 
     for field_name in field_names:
         field_type_hint = type_hints.get(field_name, typing.Any)
@@ -241,14 +244,19 @@ def build_field_infos(type_resolver, cls):
 def build_field_type(type_resolver, field_name: str, type_hint, visitor):
     """Build field type from type hint."""
     type_ids = infer_field(field_name, type_hint, visitor)
-    return build_field_type_from_type_ids(type_resolver, field_name, type_ids, 
visitor)
+    try:
+        return build_field_type_from_type_ids(type_resolver, field_name, 
type_ids, visitor)
+    except Exception as e:
+        raise TypeError(f"Error building field type for field: {field_name} 
with type hint: {type_hint} in class: {visitor.cls}") from e
 
 
 def build_field_type_from_type_ids(type_resolver, field_name: str, type_ids, 
visitor):
     tracking_ref = type_resolver.fory.ref_tracking
     type_id = type_ids[0]
-    if type_id is not None and type_id >= 0:
-        type_id = type_id & 0xFF
+    if type_id is None:
+        type_id = TypeId.UNKNOWN
+    assert type_id >= 0, f"Unknown type: {type_id} for field: {field_name}"
+    type_id = type_id & 0xFF
     morphic = not is_polymorphic_type(type_id)
     if type_id in [TypeId.SET, TypeId.LIST]:
         elem_type = build_field_type_from_type_ids(type_resolver, field_name, 
type_ids[1], visitor)
@@ -260,7 +268,6 @@ def build_field_type_from_type_ids(type_resolver, 
field_name: str, type_ids, vis
     elif type_id in [TypeId.UNKNOWN, TypeId.EXT, TypeId.STRUCT, 
TypeId.NAMED_STRUCT, TypeId.COMPATIBLE_STRUCT, TypeId.NAMED_COMPATIBLE_STRUCT]:
         return DynamicFieldType(type_id, False, True, tracking_ref)
     else:
-        assert is_primitive_type(type_id) or type_id in [TypeId.STRING, 
TypeId.ENUM, TypeId.NAMED_ENUM] or is_struct_type(type_id), (
-            f"Unknown type: {type_id} for field: {field_name}"
-        )
+        if type_id <= 0 or type_id >= TypeId.BOUND:
+            raise TypeError(f"Unknown type: {type_id} for field: {field_name}")
         return FieldType(type_id, morphic, True, tracking_ref)
diff --git a/python/pyfory/meta/typedef_decoder.py 
b/python/pyfory/meta/typedef_decoder.py
index 2cf5633c7..9446e3c45 100644
--- a/python/pyfory/meta/typedef_decoder.py
+++ b/python/pyfory/meta/typedef_decoder.py
@@ -32,6 +32,8 @@ from pyfory.meta.typedef import (
     HAS_FIELDS_META_FLAG,
     META_SIZE_MASKS,
     FIELD_NAME_ENCODINGS,
+    NAMESPACE_ENCODINGS,
+    TYPE_NAME_ENCODINGS,
 )
 from pyfory.type import TypeId, record_class_factory
 from pyfory.meta.metastring import MetaStringDecoder, Encoding
@@ -110,6 +112,7 @@ def decode_typedef(buffer: Buffer, resolver, header=None) 
-> TypeDef:
         type_info = resolver.get_typeinfo_by_name(namespace, typename)
         if type_info:
             type_id = type_info.type_id
+            type_cls = type_info.cls
         else:
             # Fallback to COMPATIBLE_STRUCT if not found
             type_id = TypeId.COMPATIBLE_STRUCT
@@ -137,15 +140,15 @@ def decode_typedef(buffer: Buffer, resolver, header=None) 
-> TypeDef:
 
 def read_namespace(buffer: Buffer) -> str:
     """Read namespace from the buffer."""
-    return read_meta_string(buffer, NAMESPACE_DECODER)
+    return read_meta_string(buffer, NAMESPACE_DECODER, NAMESPACE_ENCODINGS)
 
 
 def read_typename(buffer: Buffer) -> str:
     """Read typename from the buffer."""
-    return read_meta_string(buffer, TYPENAME_DECODER)
+    return read_meta_string(buffer, TYPENAME_DECODER, TYPE_NAME_ENCODINGS)
 
 
-def read_meta_string(buffer: Buffer, decoder: MetaStringDecoder) -> str:
+def read_meta_string(buffer: Buffer, decoder: MetaStringDecoder, encodings: 
List[Encoding]) -> str:
     """Read a meta string from the buffer."""
     # Read encoding and length combined in first byte
     header = buffer.read_uint8()
@@ -154,7 +157,7 @@ def read_meta_string(buffer: Buffer, decoder: 
MetaStringDecoder) -> str:
     encoding_value = header & 0b11
     size_value = (header >> 2) & 0b111111
 
-    encoding = Encoding(encoding_value)
+    encoding = encodings[encoding_value]
 
     # Read length - same logic as encoder
     length = 0
diff --git a/python/pyfory/meta/typedef_encoder.py 
b/python/pyfory/meta/typedef_encoder.py
index 7d8b5fdb3..e3d19d22d 100644
--- a/python/pyfory/meta/typedef_encoder.py
+++ b/python/pyfory/meta/typedef_encoder.py
@@ -29,6 +29,8 @@ from pyfory.meta.typedef import (
     META_SIZE_MASKS,
     NUM_HASH_BITS,
     FIELD_NAME_ENCODINGS,
+    NAMESPACE_ENCODINGS,
+    TYPE_NAME_ENCODINGS,
 )
 from pyfory.meta.metastring import MetaStringEncoder
 
@@ -141,8 +143,8 @@ def write_namespace(buffer: Buffer, namespace: str):
     #    - Header: `6 bits size | 2 bits encoding flags`.
     #      The `6 bits size: 0~63`  will be used to indicate size `0~62`,
     #      the value `63` the size need more byte to read, the encoding will 
encode `size - 62` as a varint next.
-    meta_string = NAMESPACE_ENCODER.encode(namespace)
-    write_meta_string(buffer, meta_string)
+    meta_string = NAMESPACE_ENCODER.encode(namespace, NAMESPACE_ENCODINGS)
+    write_meta_string(buffer, meta_string, 
NAMESPACE_ENCODINGS.index(meta_string.encoding))
 
 
 def write_typename(buffer: Buffer, typename: str):
@@ -153,15 +155,14 @@ def write_typename(buffer: Buffer, typename: str):
     #     - header: `6 bits size | 2 bits encoding flags`.
     #       The `6 bits size: 0~63`  will be used to indicate size `1~64`,
     #       the value `63` the size need more byte to read, the encoding will 
encode `size - 63` as a varint next.
-    meta_string = TYPENAME_ENCODER.encode(typename)
-    write_meta_string(buffer, meta_string)
+    meta_string = TYPENAME_ENCODER.encode(typename, TYPE_NAME_ENCODINGS)
+    write_meta_string(buffer, meta_string, 
TYPE_NAME_ENCODINGS.index(meta_string.encoding))
 
 
-def write_meta_string(buffer: Buffer, meta_string):
+def write_meta_string(buffer: Buffer, meta_string, encoding_value: int):
     """Write a meta string to the buffer."""
     # Write encoding and length combined in first byte
     length = len(meta_string.encoded_data)
-    encoding_value = meta_string.encoding.value
 
     if length >= FIELD_NAME_SIZE_THRESHOLD:
         # Use threshold value and write additional length
diff --git a/python/pyfory/tests/test_cross_language.py 
b/python/pyfory/tests/test_cross_language.py
index 7b7e1b488..5740caeb4 100644
--- a/python/pyfory/tests/test_cross_language.py
+++ b/python/pyfory/tests/test_cross_language.py
@@ -456,7 +456,8 @@ def test_serialize_simple_struct_local():
 
 @cross_language_test
 def test_serialize_simple_struct(data_file_path):
-    fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True)
+    compatible = "compatible" in data_file_path
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True, 
compatible=compatible)
     fory.register_type(ComplexObject2, namespace="test", 
typename="ComplexObject2")
     obj = ComplexObject2(f1=True, f2={-1: 2})
     struct_round_back(data_file_path, fory, obj)
@@ -537,7 +538,8 @@ def test_struct_hash(data_file_path):
 
 @cross_language_test
 def test_serialize_complex_struct(data_file_path):
-    fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True)
+    compatible = "compatible" in data_file_path
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True, 
compatible=compatible)
     fory.register_type(ComplexObject1, namespace="test", 
typename="ComplexObject1")
     fory.register_type(ComplexObject2, namespace="test", 
typename="ComplexObject2")
 
@@ -687,7 +689,7 @@ if __name__ == "__main__":
     try:
         args = sys.argv[1:]
         assert len(args) > 0
-        func = getattr(sys.modules[__name__], args[0])
+        func = getattr(sys.modules[__name__], args[0].replace("_compatible", 
""))
         if not func:
             raise Exception("Unknown args {}".format(args))
         func(*args[1:])
diff --git a/python/pyfory/type.py b/python/pyfory/type.py
index 1ff93e509..13be4dadc 100644
--- a/python/pyfory/type.py
+++ b/python/pyfory/type.py
@@ -130,7 +130,6 @@ class TypeId:
     See `org.apache.fory.types.Type`
     """
 
-    UNKNOWN = -1
     # null value
     NA = 0
     # a boolean value (true or false).
@@ -216,6 +215,7 @@ class TypeId:
     ARROW_RECORD_BATCH = 38
     # an arrow [table](https://arrow.apache.org/docs/cpp/tables.html#tables) 
object.
     ARROW_TABLE = 39
+    UNKNOWN = 63
 
     # BOUND id remains at 64
     BOUND = 64


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to