This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 2199d4a78 feat(java/python): align java and python compatible mode
serialization (#2602)
2199d4a78 is described below
commit 2199d4a78efcdb215bf7ff76c2cdccea94531d1e
Author: Shawn Yang <[email protected]>
AuthorDate: Thu Sep 11 00:38:40 2025 +0800
feat(java/python): align java and python compatible mode serialization
(#2602)
## Why?
align java and python compatible mode serialization
## What does this PR do?
<!-- Describe the details of this PR. -->
## Related issues
https://github.com/apache/fory/pull/2509
https://github.com/apache/fory/issues/1938
https://github.com/apache/fory/issues/2160
https://github.com/apache/fory/pull/2278
#2593
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fory/issues/new/choose) describing the
need to do so and update the document if necessary.
Delete section if not applicable.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
Delete section if not applicable.
-->
---
cpp/fory/type/type.h | 12 ++++
.../src/main/java/org/apache/fory/Fory.java | 4 ++
.../org/apache/fory/resolver/ClassResolver.java | 13 ++--
.../org/apache/fory/resolver/XtypeResolver.java | 33 +++++++---
.../java/org/apache/fory/CrossLanguageTest.java | 15 +++--
.../test/java/org/apache/fory/ForyTestBase.java | 5 ++
python/pyfory/_fory.py | 5 +-
python/pyfory/_registry.py | 24 +++----
python/pyfory/_serialization.pyx | 75 ++++++++++++++--------
python/pyfory/_struct.py | 6 +-
python/pyfory/includes/libserialization.pxd | 1 +
python/pyfory/meta/metastring.py | 4 +-
python/pyfory/meta/typedef.py | 23 ++++---
python/pyfory/meta/typedef_decoder.py | 11 ++--
python/pyfory/meta/typedef_encoder.py | 13 ++--
python/pyfory/tests/test_cross_language.py | 8 ++-
python/pyfory/type.py | 2 +-
17 files changed, 165 insertions(+), 89 deletions(-)
diff --git a/cpp/fory/type/type.h b/cpp/fory/type/type.h
index a70987826..a2e8d8078 100644
--- a/cpp/fory/type/type.h
+++ b/cpp/fory/type/type.h
@@ -119,4 +119,16 @@ inline bool IsNamespacedType(int32_t type_id) {
}
}
+inline bool IsTypeShareMeta(int32_t type_id) {
+ switch (static_cast<TypeId>(type_id)) {
+ case TypeId::NAMED_ENUM:
+ case TypeId::NAMED_STRUCT:
+ case TypeId::NAMED_EXT:
+ case TypeId::COMPATIBLE_STRUCT:
+ case TypeId::NAMED_COMPATIBLE_STRUCT:
+ return true;
+ default:
+ return false;
+ }
+}
} // namespace fory
diff --git a/java/fory-core/src/main/java/org/apache/fory/Fory.java
b/java/fory-core/src/main/java/org/apache/fory/Fory.java
index e01a7994d..61073d13b 100644
--- a/java/fory-core/src/main/java/org/apache/fory/Fory.java
+++ b/java/fory-core/src/main/java/org/apache/fory/Fory.java
@@ -1703,6 +1703,10 @@ public final class Fory implements BaseFory {
return crossLanguage;
}
+ public boolean isCompatible() {
+ return config.getCompatibleMode() == CompatibleMode.COMPATIBLE;
+ }
+
public boolean trackingRef() {
return refTracking;
}
diff --git
a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java
b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java
index 471a6069e..818b56c22 100644
--- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java
+++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java
@@ -1725,16 +1725,11 @@ public class ClassResolver implements TypeResolver {
private void writeClassDefs(
MemoryBuffer buffer, ObjectArray<ClassDef> writingClassDefs, int size) {
- int writerIndex = buffer.writerIndex();
for (int i = 0; i < size; i++) {
- byte[] encoded = writingClassDefs.get(i).getEncoded();
- int bytesLen = encoded.length;
- buffer.ensure(writerIndex + bytesLen);
- final byte[] targetArray = buffer.getHeapMemory();
- System.arraycopy(encoded, 0, targetArray, writerIndex, bytesLen);
- writerIndex += bytesLen;
- }
- buffer.writerIndex(writerIndex);
+ buffer.writeBytes(writingClassDefs.get(i).getEncoded());
+ MemoryBuffer memoryBuffer =
MemoryBuffer.fromByteArray(writingClassDefs.get(i).getEncoded());
+ ClassDef.readClassDef(fory, memoryBuffer, memoryBuffer.readInt64());
+ }
}
/**
diff --git
a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java
b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java
index ca7f83e2c..3c32204c9 100644
--- a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java
+++ b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java
@@ -38,11 +38,13 @@ import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -210,7 +212,8 @@ public class XtypeResolver implements TypeResolver {
short xtypeId;
if (serializer != null) {
if (isStructType(serializer)) {
- xtypeId = Types.NAMED_STRUCT;
+ xtypeId =
+ (short) (fory.isCompatible() ? Types.NAMED_COMPATIBLE_STRUCT :
Types.NAMED_STRUCT);
} else if (serializer instanceof EnumSerializer) {
xtypeId = Types.NAMED_ENUM;
} else {
@@ -220,7 +223,8 @@ public class XtypeResolver implements TypeResolver {
if (type.isEnum()) {
xtypeId = Types.NAMED_ENUM;
} else {
- xtypeId = Types.NAMED_STRUCT;
+ xtypeId =
+ (short) (fory.isCompatible() ? Types.NAMED_COMPATIBLE_STRUCT :
Types.NAMED_STRUCT);
}
}
register(type, serializer, namespace, typeName, xtypeId);
@@ -530,9 +534,9 @@ public class XtypeResolver implements TypeResolver {
registerDefaultTypes(Types.INT64_ARRAY, long[].class);
registerDefaultTypes(Types.FLOAT32_ARRAY, float[].class);
registerDefaultTypes(Types.FLOAT64_ARRAY, double[].class);
- registerDefaultTypes(Types.LIST, ArrayList.class, Object[].class);
- registerDefaultTypes(Types.SET, HashSet.class, LinkedHashSet.class);
- registerDefaultTypes(Types.MAP, HashMap.class, LinkedHashMap.class);
+ registerDefaultTypes(Types.LIST, ArrayList.class, Object[].class,
List.class, Collection.class);
+ registerDefaultTypes(Types.SET, HashSet.class, LinkedHashSet.class,
Set.class);
+ registerDefaultTypes(Types.MAP, HashMap.class, LinkedHashMap.class,
Map.class);
registerDefaultTypes(Types.LOCAL_DATE, LocalDate.class);
}
@@ -542,8 +546,12 @@ public class XtypeResolver implements TypeResolver {
classInfoMap.put(defaultType, classInfo);
xtypeIdToClassMap.put(xtypeId, classInfo);
for (Class<?> otherType : otherTypes) {
- classInfo = newClassInfo(otherType,
classResolver.getSerializer(otherType), (short) xtypeId);
- classInfoMap.put(otherType, classInfo);
+ Serializer<?> serializer =
+ ReflectionUtils.isAbstract(otherType)
+ ? classInfo.serializer
+ : classResolver.getSerializer(otherType);
+ ClassInfo info = newClassInfo(otherType, serializer, (short) xtypeId);
+ classInfoMap.put(otherType, info);
}
}
@@ -561,7 +569,6 @@ public class XtypeResolver implements TypeResolver {
switch (internalTypeId) {
case Types.NAMED_ENUM:
case Types.NAMED_STRUCT:
- case Types.NAMED_COMPATIBLE_STRUCT:
case Types.NAMED_EXT:
if (shareMeta) {
writeSharedClassMeta(buffer, classInfo);
@@ -572,6 +579,11 @@ public class XtypeResolver implements TypeResolver {
assert classInfo.typeNameBytes != null;
metaStringResolver.writeMetaStringBytes(buffer,
classInfo.typeNameBytes);
break;
+ case Types.NAMED_COMPATIBLE_STRUCT:
+ case Types.COMPATIBLE_STRUCT:
+ assert shareMeta : "Meta share must be enabled for compatible mode";
+ writeSharedClassMeta(buffer, classInfo);
+ break;
default:
break;
}
@@ -634,7 +646,6 @@ public class XtypeResolver implements TypeResolver {
switch (internalTypeId) {
case Types.NAMED_ENUM:
case Types.NAMED_STRUCT:
- case Types.NAMED_COMPATIBLE_STRUCT:
case Types.NAMED_EXT:
if (shareMeta) {
return readSharedClassMeta(buffer);
@@ -642,6 +653,10 @@ public class XtypeResolver implements TypeResolver {
MetaStringBytes packageBytes =
metaStringResolver.readMetaStringBytes(buffer);
MetaStringBytes simpleClassNameBytes =
metaStringResolver.readMetaStringBytes(buffer);
return loadBytesToClassInfo(internalTypeId, packageBytes,
simpleClassNameBytes);
+ case Types.NAMED_COMPATIBLE_STRUCT:
+ case Types.COMPATIBLE_STRUCT:
+ assert shareMeta : "Meta share must be enabled for compatible mode";
+ return readSharedClassMeta(buffer);
case Types.LIST:
return getListClassInfo();
case Types.TIMESTAMP:
diff --git
a/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
b/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
index 21d49a386..bc74fef00 100644
--- a/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
+++ b/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
@@ -483,11 +483,13 @@ public class CrossLanguageTest extends ForyTestBase {
roundBytes("test_struct_hash", buffer.getBytes(0, 4));
}
- @Test
- public void testSerializeSimpleStruct() throws Exception {
+ @Test(dataProvider = "compatible")
+ public void testSerializeSimpleStruct(boolean compatible) throws Exception {
Fory fory =
Fory.builder()
.withLanguage(Language.XLANG)
+ .withCompatibleMode(
+ compatible ? CompatibleMode.COMPATIBLE :
CompatibleMode.SCHEMA_CONSISTENT)
.withRefTracking(true)
.requireClassRegistration(false)
.build();
@@ -495,7 +497,7 @@ public class CrossLanguageTest extends ForyTestBase {
ComplexObject2 obj2 = new ComplexObject2();
obj2.f1 = true;
obj2.f2 = new HashMap<>(ImmutableMap.of((byte) -1, 2));
- structRoundBack(fory, obj2, "test_serialize_simple_struct");
+ structRoundBack(fory, obj2, "test_serialize_simple_struct" + (compatible ?
"_compatible" : ""));
}
@Test
@@ -530,10 +532,13 @@ public class CrossLanguageTest extends ForyTestBase {
Assert.assertEquals(fory.deserialize(serialized), obj);
}
- public void testSerializeComplexStruct() throws Exception {
+ @Test(dataProvider = "compatible")
+ public void testSerializeComplexStruct(boolean compatible) throws Exception {
Fory fory =
Fory.builder()
.withLanguage(Language.XLANG)
+ .withCompatibleMode(
+ compatible ? CompatibleMode.COMPATIBLE :
CompatibleMode.SCHEMA_CONSISTENT)
.withRefTracking(true)
.requireClassRegistration(false)
.build();
@@ -556,7 +561,7 @@ public class CrossLanguageTest extends ForyTestBase {
obj.f11 = new short[] {(short) 1, (short) 2};
obj.f12 = ImmutableList.of((short) -1, (short) 4);
- structRoundBack(fory, obj, "test_serialize_complex_struct");
+ structRoundBack(fory, obj, "test_serialize_complex_struct" + (compatible ?
"_compatible" : ""));
}
private void structRoundBack(Fory fory, Object obj, String testName) throws
IOException {
diff --git a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java
b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java
index 2bb24606e..f2d595688 100644
--- a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java
+++ b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java
@@ -131,6 +131,11 @@ public abstract class ForyTestBase {
return new Object[][] {{false}, {true}};
}
+ @DataProvider
+ public static Object[][] compatible() {
+ return new Object[][] {{false}, {true}};
+ }
+
@DataProvider
public static Object[][] compressNumber() {
return new Object[][] {{false}, {true}};
diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py
index b05b37ffc..0d7f6adbe 100644
--- a/python/pyfory/_fory.py
+++ b/python/pyfory/_fory.py
@@ -144,15 +144,14 @@ class Fory:
self.ref_resolver = MapRefResolver()
else:
self.ref_resolver = NoRefResolver()
- from pyfory._serialization import MetaStringResolver
+ from pyfory._serialization import MetaStringResolver,
SerializationContext
from pyfory._registry import TypeResolver
self.metastring_resolver = MetaStringResolver()
self.type_resolver = TypeResolver(self, meta_share=compatible)
+ self.serialization_context = SerializationContext(fory=self,
scoped_meta_share_enabled=compatible)
self.type_resolver.initialize()
- from pyfory._serialization import SerializationContext
- self.serialization_context =
SerializationContext(scoped_meta_share_enabled=compatible)
self.buffer = Buffer.allocate(32)
if not require_type_registration:
warnings.warn(
diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py
index 8b421f036..6f4ae76e9 100644
--- a/python/pyfory/_registry.py
+++ b/python/pyfory/_registry.py
@@ -169,6 +169,7 @@ class TypeResolver:
"_type_id_to_typeinfo",
"_meta_shared_typeinfo",
"meta_share",
+ "serialization_context",
)
def __init__(self, fory, meta_share=False):
@@ -202,6 +203,7 @@ class TypeResolver:
self._initialize_xlang()
if self.fory.language == Language.PYTHON:
self._initialize_py()
+ self.serialization_context = self.fory.serialization_context
def _initialize_py(self):
register = functools.partial(self._register_type, internal=True)
@@ -369,7 +371,10 @@ class TypeResolver:
type_id = TypeId.NAMED_EXT if type_id is None else ((type_id
<< 8) + TypeId.EXT)
else:
serializer = None
- type_id = TypeId.NAMED_STRUCT if type_id is None else
((type_id << 8) + TypeId.STRUCT)
+ if self.meta_share:
+ type_id = TypeId.NAMED_COMPATIBLE_STRUCT if type_id is
None else ((type_id << 8) + TypeId.COMPATIBLE_STRUCT)
+ else:
+ type_id = TypeId.NAMED_STRUCT if type_id is None else
((type_id << 8) + TypeId.STRUCT)
elif not internal:
type_id = TypeId.NAMED_EXT if type_id is None else ((type_id << 8)
+ TypeId.EXT)
return self.__register_type(
@@ -607,14 +612,12 @@ class TypeResolver:
def write_typeinfo(self, buffer, typeinfo):
if typeinfo.dynamic_type:
return
- type_id = typeinfo.type_id
- internal_type_id = type_id & 0xFF
-
# Check if meta share is enabled first
if self.meta_share:
self.write_shared_type_meta(buffer, typeinfo)
return
-
+ type_id = typeinfo.type_id
+ internal_type_id = type_id & 0xFF
buffer.write_varuint32(type_id)
if TypeId.is_namespaced_type(internal_type_id):
self.metastring_resolver.write_meta_string_bytes(buffer,
typeinfo.namespace_bytes)
@@ -659,17 +662,14 @@ class TypeResolver:
def write_shared_type_meta(self, buffer, typeinfo):
"""Write shared type meta information."""
- assert typeinfo.type_def is not None, "Type info must be set when meta
share is enabled"
meta_context = self.fory.serialization_context.meta_context
- meta_context.write_typeinfo(buffer, typeinfo)
+ meta_context.write_shared_typeinfo(buffer, typeinfo)
def read_shared_type_meta(self, buffer):
"""Read shared type meta information."""
- meta_context = self.fory.serialization_context.meta_context
+ meta_context = self.serialization_context.meta_context
assert meta_context is not None, "Meta context must be set when meta
share is enabled"
- type_id = buffer.read_varuint32()
- typeinfo = meta_context.get_read_type_info(type_id)
- assert typeinfo is not None, f"Type info not found for ID {type_id}"
+ typeinfo = meta_context.read_shared_typeinfo(buffer)
return typeinfo
def write_type_defs(self, buffer):
@@ -704,7 +704,7 @@ class TypeResolver:
type_info = self._build_type_info_from_typedef(type_def)
# Cache the tuple for future use
self._meta_shared_typeinfo[header] = type_info
- meta_context.add_read_type_info(type_info)
+ meta_context.add_read_typeinfo(type_info)
def _build_type_info_from_typedef(self, type_def):
"""Build TypeInfo from TypeDef using TypeDef's create_serializer
method."""
diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx
index 458eb640f..ad9ec5033 100644
--- a/python/pyfory/_serialization.pyx
+++ b/python/pyfory/_serialization.pyx
@@ -37,7 +37,7 @@ from pyfory.meta.metastring import Encoding
from pyfory.type import is_primitive_type
from pyfory.util import is_little_endian
from pyfory.includes.libserialization cimport \
- (TypeId, IsNamespacedType, Fory_PyBooleanSequenceWriteToBuffer,
Fory_PyFloatSequenceWriteToBuffer)
+ (TypeId, IsNamespacedType, IsTypeShareMeta,
Fory_PyBooleanSequenceWriteToBuffer, Fory_PyFloatSequenceWriteToBuffer)
from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, uint64_t
from libc.stdint cimport *
@@ -447,7 +447,7 @@ cdef class TypeResolver:
flat_hash_map[pair[int64_t, int64_t], PyObject *]
_c_meta_hash_to_typeinfo
MetaStringResolver meta_string_resolver
c_bool meta_share
- SerializationContext serialization_context
+ readonly SerializationContext serialization_context
def __init__(self, fory, meta_share=False):
self.fory = fory
@@ -455,12 +455,12 @@ cdef class TypeResolver:
self.meta_share = meta_share
from pyfory._registry import TypeResolver
self._resolver = TypeResolver(fory, meta_share=meta_share)
- self.serialization_context = fory.serialization_context
def initialize(self):
self._resolver.initialize()
for typeinfo in self._resolver._types_info.values():
self._populate_typeinfo(typeinfo)
+ self.serialization_context = self.fory.serialization_context
def register(
self,
@@ -583,7 +583,7 @@ cdef class TypeResolver:
int32_t type_id = buffer.read_varuint32()
if type_id < 0:
type_id = -type_id
- if type_id > self._c_registered_id_to_type_info.size():
+ if type_id >= self._c_registered_id_to_type_info.size():
raise ValueError(f"Unexpected type_id {type_id}")
cdef:
int32_t internal_type_id = type_id & 0xFF
@@ -598,8 +598,14 @@ cdef class TypeResolver:
typeinfo = <TypeInfo> typeinfo_ptr
return typeinfo
- def get_typeinfo_by_id(self, type_id):
- return self._resolver.get_typeinfo_by_id(type_id=type_id)
+ cpdef inline TypeInfo get_typeinfo_by_id(self, int32_t type_id):
+ if type_id >= self._c_registered_id_to_type_info.size() or type_id < 0
or IsNamespacedType(type_id & 0xFF):
+ raise ValueError(f"Unexpected type_id {type_id}")
+ typeinfo_ptr = self._c_registered_id_to_type_info[type_id]
+ if typeinfo_ptr == NULL:
+ raise ValueError(f"Unexpected type_id {type_id}")
+ typeinfo = <TypeInfo> typeinfo_ptr
+ return typeinfo
def get_typeinfo_by_name(self, namespace, typename):
return self._resolver.get_typeinfo_by_name(namespace=namespace,
typename=typename)
@@ -610,16 +616,12 @@ cdef class TypeResolver:
cpdef write_shared_type_meta(self, Buffer buffer, TypeInfo typeinfo):
"""Write shared type meta information."""
meta_context = self.serialization_context.meta_context
- assert meta_context is not None, "Meta context must be set when meta
share is enabled"
- meta_context.write_typeinfo(buffer, typeinfo)
+ meta_context.write_shared_typeinfo(buffer, typeinfo)
cpdef TypeInfo read_shared_type_meta(self, Buffer buffer):
"""Read shared type meta information."""
meta_context = self.serialization_context.meta_context
- assert meta_context is not None, "Meta context must be set when meta
share is enabled"
- type_id = buffer.read_varuint32()
- typeinfo = meta_context.get_read_type_info(type_id)
- assert typeinfo is not None, f"Type info not found for ID {type_id}"
+ typeinfo = meta_context.read_shared_typeinfo(buffer)
return typeinfo
cpdef write_type_defs(self, Buffer buffer):
@@ -646,7 +648,7 @@ cdef class MetaContext:
Context for sharing type meta across multiple serialization. Type name,
field name and field
type will be shared between different serialization.
- This is the Cython-optimized equivalent of Java's MetaContext class.
+ Note that this context is not thread-safe, you should use it with one Fory
instance.
"""
cdef:
# Types which have sent definitions to peer
@@ -656,14 +658,24 @@ cdef class MetaContext:
# Counter for assigning new IDs
list _writing_type_defs
list _read_type_infos
+ object fory
+ object type_resolver
- def __cinit__(self):
+ def __cinit__(self, object fory):
+ self.fory = fory
+ self.type_resolver = fory.type_resolver
self._writing_type_defs = []
self._read_type_infos = []
- cpdef inline int32_t write_typeinfo(self, Buffer buffer, typeinfo):
+ cpdef inline void write_shared_typeinfo(self, Buffer buffer, typeinfo):
"""Add a type definition to the writing queue."""
type_cls = typeinfo.cls
+ cdef int32_t type_id = typeinfo.type_id
+ cdef int32_t internal_type_id = type_id & 0xFF
+ buffer.write_varuint32(type_id)
+ if not IsTypeShareMeta(internal_type_id):
+ return
+
cdef uint64_t type_addr = <uint64_t> <PyObject *> type_cls
cdef flat_hash_map[uint64_t, int32_t].iterator it =
self._c_type_map.find(type_addr)
if it != self._c_type_map.end():
@@ -684,13 +696,16 @@ cdef class MetaContext:
self._writing_type_defs.clear()
self._c_type_map.clear()
- cpdef inline add_read_type_info(self, type_info):
+ cpdef inline add_read_typeinfo(self, type_info):
"""Add a type info read from peer."""
self._read_type_infos.append(type_info)
- cpdef inline get_read_type_info(self, int32_t index):
- """Get a type info by index."""
- return self._read_type_infos[index]
+ cpdef inline read_shared_typeinfo(self, Buffer buffer):
+ """Read a type info from buffer."""
+ cdef type_id = buffer.read_varuint32()
+ if IsTypeShareMeta(type_id & 0xFF):
+ return self._read_type_infos[buffer.read_varuint32()]
+ return self.type_resolver.get_typeinfo_by_id(type_id)
cpdef inline reset_read(self):
"""Reset read state."""
@@ -701,26 +716,34 @@ cdef class MetaContext:
self.reset_write()
self.reset_read()
+ def __str__(self):
+ return self.__repr__()
+
def __repr__(self):
return (f"MetaContext("
- f"read_defs={len(self._read_type_defs)}, "
- f"read_infos={len(self._read_type_infos)}, "
- f"writing_defs={len(self._writing_type_defs)})")
+ f"read_infos={self._read_type_infos}, "
+ f"writing_defs={self._writing_type_defs})")
@cython.final
cdef class SerializationContext:
+ """
+ Context for sharing data across multiple serialization.
+ Note that this context is not thread-safe, you should use it with one Fory
instance.
+ """
cdef dict objects
cdef readonly bint scoped_meta_share_enabled
- cdef public object meta_context
+ cdef public MetaContext meta_context
+ cdef public object fory
- def __init__(self, scoped_meta_share_enabled: bool = False):
+ def __init__(self, object fory, scoped_meta_share_enabled: bool = False):
self.objects = dict()
self.scoped_meta_share_enabled = scoped_meta_share_enabled
if scoped_meta_share_enabled:
- self.meta_context = MetaContext()
+ self.meta_context = MetaContext(fory)
else:
self.meta_context = None
+ self.fory = fory
def add(self, key, obj):
self.objects[id(key)] = obj
@@ -801,8 +824,8 @@ cdef class Fory:
self.ref_resolver = MapRefResolver(ref_tracking)
self.is_py = self.language == Language.PYTHON
self.metastring_resolver = MetaStringResolver()
- self.serialization_context =
SerializationContext(scoped_meta_share_enabled=compatible)
self.type_resolver = TypeResolver(self, meta_share=compatible)
+ self.serialization_context = SerializationContext(fory=self,
scoped_meta_share_enabled=compatible)
self.type_resolver.initialize()
self.buffer = Buffer.allocate(32)
if not require_type_registration:
diff --git a/python/pyfory/_struct.py b/python/pyfory/_struct.py
index ef1cdc68a..032740f7f 100644
--- a/python/pyfory/_struct.py
+++ b/python/pyfory/_struct.py
@@ -228,8 +228,10 @@ class StructTypeIdVisitor(TypeVisitor):
def __init__(
self,
fory,
+ cls,
):
self.fory = fory
+ self.cls = cls
def visit_list(self, field_name, elem_type, types_path=None):
# Infer type recursively for type such as List[Dict[str, str]]
@@ -243,7 +245,9 @@ class StructTypeIdVisitor(TypeVisitor):
return TypeId.MAP, key_ids, value_ids
def visit_customized(self, field_name, type_, types_path=None):
- typeinfo = self.fory.type_resolver.get_typeinfo(type_)
+ typeinfo = self.fory.type_resolver.get_typeinfo(type_, create=False)
+ if typeinfo is None:
+ return [TypeId.UNKNOWN]
return [typeinfo.type_id]
def visit_other(self, field_name, type_, types_path=None):
diff --git a/python/pyfory/includes/libserialization.pxd
b/python/pyfory/includes/libserialization.pxd
index d09bebf10..d25b37bf7 100644
--- a/python/pyfory/includes/libserialization.pxd
+++ b/python/pyfory/includes/libserialization.pxd
@@ -65,6 +65,7 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil:
BOUND = 64
cdef c_bool IsNamespacedType(int32_t type_id)
+ cdef c_bool IsTypeShareMeta(int32_t type_id)
cdef extern from "fory/python/pyfory.h" namespace "fory":
int Fory_PyBooleanSequenceWriteToBuffer(object collection, CBuffer
*buffer, Py_ssize_t start_index)
diff --git a/python/pyfory/meta/metastring.py b/python/pyfory/meta/metastring.py
index f0dc4eca2..a3832be9e 100644
--- a/python/pyfory/meta/metastring.py
+++ b/python/pyfory/meta/metastring.py
@@ -268,7 +268,7 @@ class MetaStringEncoder:
self.special_char1 = special_char1
self.special_char2 = special_char2
- def encode(self, input_string: str) -> MetaString:
+ def encode(self, input_string: str, encodings: List[Encoding] = None) ->
MetaString:
"""
Encodes the input string into a MetaString object.
@@ -291,7 +291,7 @@ class MetaStringEncoder:
self.special_char2,
)
- encoding = self.compute_encoding(input_string)
+ encoding = self.compute_encoding(input_string, encodings)
return self.encode_with_encoding(input_string, encoding)
def encode_with_encoding(self, input_string: str, encoding: Encoding) ->
MetaString:
diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py
index cdc43e686..00f2710aa 100644
--- a/python/pyfory/meta/typedef.py
+++ b/python/pyfory/meta/typedef.py
@@ -19,7 +19,7 @@ from typing import List
import typing
from pyfory.type import TypeId
from pyfory._util import Buffer
-from pyfory.type import infer_field, is_primitive_type, is_polymorphic_type,
is_struct_type
+from pyfory.type import infer_field, is_polymorphic_type
from pyfory.meta.metastring import Encoding
@@ -32,6 +32,9 @@ HAS_FIELDS_META_FLAG = 0b1 << 12
META_SIZE_MASKS = 0xFFF
NUM_HASH_BITS = 50
+NAMESPACE_ENCODINGS = [Encoding.UTF_8, Encoding.ALL_TO_LOWER_SPECIAL,
Encoding.LOWER_UPPER_DIGIT_SPECIAL]
+TYPE_NAME_ENCODINGS = [Encoding.UTF_8, Encoding.LOWER_UPPER_DIGIT_SPECIAL,
Encoding.FIRST_TO_LOWER_SPECIAL, Encoding.ALL_TO_LOWER_SPECIAL]
+
# Field name encoding constants
FIELD_NAME_ENCODING_UTF8 = 0b00
FIELD_NAME_ENCODING_ALL_TO_LOWER_SPECIAL = 0b01
@@ -219,7 +222,7 @@ def build_field_infos(type_resolver, cls):
type_hints = typing.get_type_hints(cls)
field_infos = []
- visitor = StructTypeIdVisitor(type_resolver.fory)
+ visitor = StructTypeIdVisitor(type_resolver.fory, cls)
for field_name in field_names:
field_type_hint = type_hints.get(field_name, typing.Any)
@@ -241,14 +244,19 @@ def build_field_infos(type_resolver, cls):
def build_field_type(type_resolver, field_name: str, type_hint, visitor):
"""Build field type from type hint."""
type_ids = infer_field(field_name, type_hint, visitor)
- return build_field_type_from_type_ids(type_resolver, field_name, type_ids,
visitor)
+ try:
+ return build_field_type_from_type_ids(type_resolver, field_name,
type_ids, visitor)
+ except Exception as e:
+ raise TypeError(f"Error building field type for field: {field_name}
with type hint: {type_hint} in class: {visitor.cls}") from e
def build_field_type_from_type_ids(type_resolver, field_name: str, type_ids,
visitor):
tracking_ref = type_resolver.fory.ref_tracking
type_id = type_ids[0]
- if type_id is not None and type_id >= 0:
- type_id = type_id & 0xFF
+ if type_id is None:
+ type_id = TypeId.UNKNOWN
+ assert type_id >= 0, f"Unknown type: {type_id} for field: {field_name}"
+ type_id = type_id & 0xFF
morphic = not is_polymorphic_type(type_id)
if type_id in [TypeId.SET, TypeId.LIST]:
elem_type = build_field_type_from_type_ids(type_resolver, field_name,
type_ids[1], visitor)
@@ -260,7 +268,6 @@ def build_field_type_from_type_ids(type_resolver,
field_name: str, type_ids, vis
elif type_id in [TypeId.UNKNOWN, TypeId.EXT, TypeId.STRUCT,
TypeId.NAMED_STRUCT, TypeId.COMPATIBLE_STRUCT, TypeId.NAMED_COMPATIBLE_STRUCT]:
return DynamicFieldType(type_id, False, True, tracking_ref)
else:
- assert is_primitive_type(type_id) or type_id in [TypeId.STRING,
TypeId.ENUM, TypeId.NAMED_ENUM] or is_struct_type(type_id), (
- f"Unknown type: {type_id} for field: {field_name}"
- )
+ if type_id <= 0 or type_id >= TypeId.BOUND:
+ raise TypeError(f"Unknown type: {type_id} for field: {field_name}")
return FieldType(type_id, morphic, True, tracking_ref)
diff --git a/python/pyfory/meta/typedef_decoder.py
b/python/pyfory/meta/typedef_decoder.py
index 2cf5633c7..9446e3c45 100644
--- a/python/pyfory/meta/typedef_decoder.py
+++ b/python/pyfory/meta/typedef_decoder.py
@@ -32,6 +32,8 @@ from pyfory.meta.typedef import (
HAS_FIELDS_META_FLAG,
META_SIZE_MASKS,
FIELD_NAME_ENCODINGS,
+ NAMESPACE_ENCODINGS,
+ TYPE_NAME_ENCODINGS,
)
from pyfory.type import TypeId, record_class_factory
from pyfory.meta.metastring import MetaStringDecoder, Encoding
@@ -110,6 +112,7 @@ def decode_typedef(buffer: Buffer, resolver, header=None)
-> TypeDef:
type_info = resolver.get_typeinfo_by_name(namespace, typename)
if type_info:
type_id = type_info.type_id
+ type_cls = type_info.cls
else:
# Fallback to COMPATIBLE_STRUCT if not found
type_id = TypeId.COMPATIBLE_STRUCT
@@ -137,15 +140,15 @@ def decode_typedef(buffer: Buffer, resolver, header=None)
-> TypeDef:
def read_namespace(buffer: Buffer) -> str:
"""Read namespace from the buffer."""
- return read_meta_string(buffer, NAMESPACE_DECODER)
+ return read_meta_string(buffer, NAMESPACE_DECODER, NAMESPACE_ENCODINGS)
def read_typename(buffer: Buffer) -> str:
"""Read typename from the buffer."""
- return read_meta_string(buffer, TYPENAME_DECODER)
+ return read_meta_string(buffer, TYPENAME_DECODER, TYPE_NAME_ENCODINGS)
-def read_meta_string(buffer: Buffer, decoder: MetaStringDecoder) -> str:
+def read_meta_string(buffer: Buffer, decoder: MetaStringDecoder, encodings:
List[Encoding]) -> str:
"""Read a meta string from the buffer."""
# Read encoding and length combined in first byte
header = buffer.read_uint8()
@@ -154,7 +157,7 @@ def read_meta_string(buffer: Buffer, decoder:
MetaStringDecoder) -> str:
encoding_value = header & 0b11
size_value = (header >> 2) & 0b111111
- encoding = Encoding(encoding_value)
+ encoding = encodings[encoding_value]
# Read length - same logic as encoder
length = 0
diff --git a/python/pyfory/meta/typedef_encoder.py
b/python/pyfory/meta/typedef_encoder.py
index 7d8b5fdb3..e3d19d22d 100644
--- a/python/pyfory/meta/typedef_encoder.py
+++ b/python/pyfory/meta/typedef_encoder.py
@@ -29,6 +29,8 @@ from pyfory.meta.typedef import (
META_SIZE_MASKS,
NUM_HASH_BITS,
FIELD_NAME_ENCODINGS,
+ NAMESPACE_ENCODINGS,
+ TYPE_NAME_ENCODINGS,
)
from pyfory.meta.metastring import MetaStringEncoder
@@ -141,8 +143,8 @@ def write_namespace(buffer: Buffer, namespace: str):
# - Header: `6 bits size | 2 bits encoding flags`.
# The `6 bits size: 0~63` will be used to indicate size `0~62`,
# the value `63` the size need more byte to read, the encoding will
encode `size - 62` as a varint next.
- meta_string = NAMESPACE_ENCODER.encode(namespace)
- write_meta_string(buffer, meta_string)
+ meta_string = NAMESPACE_ENCODER.encode(namespace, NAMESPACE_ENCODINGS)
+ write_meta_string(buffer, meta_string,
NAMESPACE_ENCODINGS.index(meta_string.encoding))
def write_typename(buffer: Buffer, typename: str):
@@ -153,15 +155,14 @@ def write_typename(buffer: Buffer, typename: str):
# - header: `6 bits size | 2 bits encoding flags`.
# The `6 bits size: 0~63` will be used to indicate size `1~64`,
# the value `63` the size need more byte to read, the encoding will
encode `size - 63` as a varint next.
- meta_string = TYPENAME_ENCODER.encode(typename)
- write_meta_string(buffer, meta_string)
+ meta_string = TYPENAME_ENCODER.encode(typename, TYPE_NAME_ENCODINGS)
+ write_meta_string(buffer, meta_string,
TYPE_NAME_ENCODINGS.index(meta_string.encoding))
-def write_meta_string(buffer: Buffer, meta_string):
+def write_meta_string(buffer: Buffer, meta_string, encoding_value: int):
"""Write a meta string to the buffer."""
# Write encoding and length combined in first byte
length = len(meta_string.encoded_data)
- encoding_value = meta_string.encoding.value
if length >= FIELD_NAME_SIZE_THRESHOLD:
# Use threshold value and write additional length
diff --git a/python/pyfory/tests/test_cross_language.py
b/python/pyfory/tests/test_cross_language.py
index 7b7e1b488..5740caeb4 100644
--- a/python/pyfory/tests/test_cross_language.py
+++ b/python/pyfory/tests/test_cross_language.py
@@ -456,7 +456,8 @@ def test_serialize_simple_struct_local():
@cross_language_test
def test_serialize_simple_struct(data_file_path):
- fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True)
+ compatible = "compatible" in data_file_path
+ fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True,
compatible=compatible)
fory.register_type(ComplexObject2, namespace="test",
typename="ComplexObject2")
obj = ComplexObject2(f1=True, f2={-1: 2})
struct_round_back(data_file_path, fory, obj)
@@ -537,7 +538,8 @@ def test_struct_hash(data_file_path):
@cross_language_test
def test_serialize_complex_struct(data_file_path):
- fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True)
+ compatible = "compatible" in data_file_path
+ fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True,
compatible=compatible)
fory.register_type(ComplexObject1, namespace="test",
typename="ComplexObject1")
fory.register_type(ComplexObject2, namespace="test",
typename="ComplexObject2")
@@ -687,7 +689,7 @@ if __name__ == "__main__":
try:
args = sys.argv[1:]
assert len(args) > 0
- func = getattr(sys.modules[__name__], args[0])
+ func = getattr(sys.modules[__name__], args[0].replace("_compatible",
""))
if not func:
raise Exception("Unknown args {}".format(args))
func(*args[1:])
diff --git a/python/pyfory/type.py b/python/pyfory/type.py
index 1ff93e509..13be4dadc 100644
--- a/python/pyfory/type.py
+++ b/python/pyfory/type.py
@@ -130,7 +130,6 @@ class TypeId:
See `org.apache.fory.types.Type`
"""
- UNKNOWN = -1
# null value
NA = 0
# a boolean value (true or false).
@@ -216,6 +215,7 @@ class TypeId:
ARROW_RECORD_BATCH = 38
# an arrow [table](https://arrow.apache.org/docs/cpp/tables.html#tables)
object.
ARROW_TABLE = 39
+ UNKNOWN = 63
# BOUND id remains at 64
BOUND = 64
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]