This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new d2a155d5e feat(python): add java python xlang tests and align protocol
(#3077)
d2a155d5e is described below
commit d2a155d5e0e77870a06907851d07f2dd513d824b
Author: Shawn Yang <[email protected]>
AuthorDate: Wed Dec 24 02:48:05 2025 +0800
feat(python): add java python xlang tests and align protocol (#3077)
## Why?
## What does this PR do?
## Related issues
## Does this PR introduce any user-facing change?
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
---
AGENTS.md | 12 +
.../test/java/org/apache/fory/PythonXlangTest.java | 194 +++++++++
python/pyfory/_fory.py | 9 +
python/pyfory/_registry.py | 64 ++-
python/pyfory/_serializer.py | 78 +++-
python/pyfory/_struct.py | 6 +-
python/pyfory/meta/typedef.py | 56 ++-
python/pyfory/meta/typedef_decoder.py | 38 +-
python/pyfory/meta/typedef_encoder.py | 2 +-
python/pyfory/serialization.pyx | 251 ++++++++---
python/pyfory/serializer.py | 11 +-
python/pyfory/tests/xlang_test_main.py | 469 +++++++++++++++++++++
python/pyfory/type.py | 20 +-
13 files changed, 1109 insertions(+), 101 deletions(-)
diff --git a/AGENTS.md b/AGENTS.md
index 9e2861065..ba5ff2756 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -117,6 +117,18 @@ ENABLE_FORY_CYTHON_SERIALIZATION=0 pytest -v -s .
ENABLE_FORY_CYTHON_SERIALIZATION=1 pytest -v -s .
```
+Run Python xlang tests:
+
+```bash
+cd java
+mvn -T16 install -DskipTests
+cd fory-core
+# disable fory cython for faster debugging
+FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=0 mvn -T16 test
-Dtest=org.apache.fory.PythonXlangTest
+# enable fory cython
+FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=1 mvn -T16 test
-Dtest=org.apache.fory.PythonXlangTest
+```
+
### Golang Development
- All commands must be executed within the `go/fory` directory.
diff --git a/java/fory-core/src/test/java/org/apache/fory/PythonXlangTest.java
b/java/fory-core/src/test/java/org/apache/fory/PythonXlangTest.java
new file mode 100644
index 000000000..748ecf3e2
--- /dev/null
+++ b/java/fory-core/src/test/java/org/apache/fory/PythonXlangTest.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fory;
+
+import com.google.common.collect.ImmutableMap;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.fory.test.TestUtils;
+import org.testng.SkipException;
+import org.testng.annotations.Test;
+
+/** Executes cross-language tests against the Python implementation. */
+@Test
+public class PythonXlangTest extends XlangTestBase {
+ private static final String PYTHON_EXECUTABLE = "python";
+ private static final String PYTHON_MODULE = "pyfory.tests.xlang_test_main";
+
+ private static final List<String> PYTHON_BASE_COMMAND =
+ Arrays.asList(PYTHON_EXECUTABLE, "-m", PYTHON_MODULE,
"<PYTHON_TESTCASE>");
+
+ private static final int PYTHON_TESTCASE_INDEX = 3;
+
+ @Override
+ protected void ensurePeerReady() {
+ String enabled = System.getenv("FORY_PYTHON_JAVA_CI");
+ if (!"1".equals(enabled)) {
+ throw new SkipException("Skipping PythonXlangTest: FORY_PYTHON_JAVA_CI
not set to 1");
+ }
+ TestUtils.verifyPyforyInstalled();
+ }
+
+ @Override
+ protected CommandContext buildCommandContext(String caseName, Path dataFile)
{
+ List<String> command = new ArrayList<>(PYTHON_BASE_COMMAND);
+ command.set(PYTHON_TESTCASE_INDEX, caseName);
+ ImmutableMap<String, String> env = envBuilder(dataFile).build();
+ return new CommandContext(command, env, new File("../../python"));
+ }
+
+ //
============================================================================
+ // Skip tests that are similar to CrossLanguageTest.java
+ // These tests are already covered in CrossLanguageTest which tests
Java-Python
+ //
============================================================================
+
+ @Override
+ @Test
+ public void testBuffer() throws IOException {
+ throw new SkipException("Skipping: similar test already covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testMurmurHash3() throws IOException {
+ throw new SkipException("Skipping: similar test already covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testCrossLanguageSerializer() throws Exception {
+ throw new SkipException("Skipping: similar test already covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testList() throws IOException {
+ throw new SkipException("Skipping: similar test already covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testMap() throws IOException {
+ throw new SkipException("Skipping: similar test already covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testItem() throws IOException {
+ throw new SkipException("Skipping: simple struct tests covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testColor() throws IOException {
+ throw new SkipException("Skipping: enum tests covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testStructWithList() throws IOException {
+ throw new SkipException("Skipping: struct with list covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testStructWithMap() throws IOException {
+ throw new SkipException("Skipping: struct with map covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testBufferVar() throws IOException {
+ throw new SkipException("Skipping: similar test already covered in
CrossLanguageTest");
+ }
+
+ @Override
+ @Test
+ public void testInteger() throws IOException {
+ throw new SkipException("Skipping: similar test already covered in
CrossLanguageTest");
+ }
+
+ //
============================================================================
+ // Explicitly re-declare inherited test methods to enable running individual
+ // tests via Maven: mvn test -Dtest=org.apache.fory.PythonXlangTest#testXxx
+ //
+ // Maven Surefire cannot find inherited test methods when using the
#methodName
+ // syntax for test selection. By overriding and forwarding to the parent
class,
+ // we make each test directly addressable while preserving the shared test
logic.
+ //
============================================================================
+
+ @Override
+ @Test
+ public void testStringSerializer() throws Exception {
+ super.testStringSerializer();
+ }
+
+ @Override
+ @Test
+ public void testSimpleStruct() throws IOException {
+ super.testSimpleStruct();
+ }
+
+ @Override
+ @Test
+ public void testSimpleNamedStruct() throws IOException {
+ super.testSimpleNamedStruct();
+ }
+
+ @Override
+ @Test
+ public void testSkipIdCustom() throws IOException {
+ super.testSkipIdCustom();
+ }
+
+ @Override
+ @Test
+ public void testSkipNameCustom() throws IOException {
+ super.testSkipNameCustom();
+ }
+
+ @Override
+ @Test
+ public void testConsistentNamed() throws IOException {
+ super.testConsistentNamed();
+ }
+
+ @Override
+ @Test
+ public void testStructVersionCheck() throws IOException {
+ super.testStructVersionCheck();
+ }
+
+ @Override
+ @Test
+ public void testPolymorphicList() throws IOException {
+ super.testPolymorphicList();
+ }
+
+ @Override
+ @Test
+ public void testPolymorphicMap() throws IOException {
+ super.testPolymorphicMap();
+ }
+}
diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py
index 7d5d406d1..b6d05e37a 100644
--- a/python/pyfory/_fory.py
+++ b/python/pyfory/_fory.py
@@ -634,6 +634,7 @@ class Fory:
assert buffers is None, "buffers should be null when the
serialized stream is produced with buffer_callback null."
# Read type definitions at the start, similar to Java implementation
+ end_reader_index = None
if self.serialization_context.scoped_meta_share_enabled:
relative_type_defs_offset = buffer.read_int32()
if relative_type_defs_offset != -1:
@@ -643,6 +644,8 @@ class Fory:
buffer.reader_index = current_reader_index +
relative_type_defs_offset
# Read type definitions
self.type_resolver.read_type_defs(buffer)
+ # Save the end position (after type defs) - this is the true
end of serialized data
+ end_reader_index = buffer.reader_index
# Jump back to continue with object deserialization
buffer.reader_index = current_reader_index
@@ -650,6 +653,12 @@ class Fory:
obj = self.xread_ref(buffer)
else:
obj = self.read_ref(buffer)
+
+ # After reading the object, position buffer at the end of serialized
data
+ # (which is after the type definitions, not after the object data)
+ if end_reader_index is not None:
+ buffer.reader_index = end_reader_index
+
return obj
def read_ref(self, buffer):
diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py
index 5efa3a58b..a22abb189 100644
--- a/python/pyfory/_registry.py
+++ b/python/pyfory/_registry.py
@@ -53,6 +53,7 @@ from pyfory.serializer import (
TupleSerializer,
MapSerializer,
SetSerializer,
+ NonExistEnum,
EnumSerializer,
SliceSerializer,
DataClassSerializer,
@@ -429,10 +430,9 @@ class TypeResolver:
dynamic_type = type_id is not None and type_id < 0
# In metashare mode, for struct types, we want to keep serializer=None
# so that _set_typeinfo will be called to create the TypeDef-based
serializer
+ # This applies to both types registered by name and by ID
should_create_serializer = (
- not internal
- and serializer is None
- and not (self.meta_share and typename is not None and type_id is
not None and is_struct_type(type_id & 0xFF))
+ not internal and serializer is None and not (self.meta_share and
type_id is not None and is_struct_type(type_id & 0xFF))
)
if should_create_serializer:
@@ -445,6 +445,8 @@ class TypeResolver:
splits = typename.rsplit(".", 1)
if len(splits) == 2:
namespace, typename = splits
+ else:
+ namespace = "" # Use empty string for consistency with
lookup
ns_metastr = self.namespace_encoder.encode(namespace or "")
ns_meta_bytes =
self.metastring_resolver.get_metastr_bytes(ns_metastr)
type_metastr = self.typename_encoder.encode(typename)
@@ -457,6 +459,13 @@ class TypeResolver:
if type_id not in self._type_id_to_typeinfo or not internal:
self._type_id_to_typeinfo[type_id] = typeinfo
self._types_info[cls] = typeinfo
+ # Create TypeDef for NAMED_ENUM and NAMED_EXT when meta_share is
enabled
+ if self.meta_share and type_id is not None:
+ base_type_id = type_id & 0xFF
+ if base_type_id in (TypeId.NAMED_ENUM, TypeId.NAMED_EXT):
+ type_def = encode_typedef(self, cls)
+ if type_def is not None:
+ typeinfo.type_def = type_def
return typeinfo
def _next_type_id(self):
@@ -498,6 +507,8 @@ class TypeResolver:
return type_info
elif not create:
return None
+ if cls is NonExistEnum:
+ return self._get_nonexist_enum_typeinfo()
if self.require_registration and not issubclass(cls, Enum):
raise TypeUnregisteredError(f"{cls} not registered")
logger.info("Type %s not registered", cls)
@@ -608,11 +619,14 @@ class TypeResolver:
return False
return TypeId.is_namespaced_type(typeinfo.type_id & 0xFF)
- def is_registered_by_id(self, cls):
- typeinfo = self._types_info.get(cls)
- if typeinfo is None:
- return False
- return not TypeId.is_namespaced_type(typeinfo.type_id & 0xFF)
+ def is_registered_by_id(self, cls=None, type_id=None):
+ if cls is not None:
+ typeinfo = self._types_info.get(cls)
+ if typeinfo is None:
+ return False
+ return not TypeId.is_namespaced_type(typeinfo.type_id & 0xFF)
+ else:
+ return type_id in self._type_id_to_typeinfo
def get_registered_name(self, cls):
typeinfo = self._types_info.get(cls)
@@ -672,17 +686,36 @@ class TypeResolver:
if typeinfo is not None:
self._ns_type_to_typeinfo[(ns_metabytes, type_metabytes)]
= typeinfo
return typeinfo
- # TODO(chaokunyang) generate a dynamic type and serializer
- # when meta share is enabled.
name = ns + "." + typename if ns else typename
raise TypeUnregisteredError(f"{name} not registered")
return typeinfo
else:
- return self._type_id_to_typeinfo[type_id]
+ return self._type_id_to_typeinfo.get(type_id)
def get_typeinfo_by_id(self, type_id):
- """Get typeinfo by type_id."""
- return self._type_id_to_typeinfo[type_id]
+ """Get typeinfo by type_id. Never returns None.
+
+ For unknown ENUM types, returns NonExistEnum typeinfo.
+ For other unknown types, raises TypeUnregisteredError.
+ """
+ typeinfo = self._type_id_to_typeinfo.get(type_id)
+ if typeinfo is not None:
+ return typeinfo
+ base_type_id = type_id & 0xFF
+ if base_type_id == TypeId.ENUM:
+ return self._get_nonexist_enum_typeinfo()
+ raise TypeUnregisteredError(f"type id {type_id} (base {base_type_id})
not registered")
+
+ def _get_nonexist_enum_typeinfo(self):
+ """Get or create TypeInfo for NonExistEnum to handle unknown enum
types."""
+ from pyfory.serializer import NonExistEnum, NonExistEnumSerializer
+
+ typeinfo = self._types_info.get(NonExistEnum)
+ if typeinfo is None:
+ serializer = NonExistEnumSerializer(self.fory)
+ typeinfo = TypeInfo(NonExistEnum, TypeId.ENUM, serializer, None,
None, False)
+ self._types_info[NonExistEnum] = typeinfo
+ return typeinfo
def get_typeinfo_by_name(self, namespace, typename):
"""Get typeinfo by namespace and typename."""
@@ -700,8 +733,7 @@ class TypeResolver:
"""Read shared type meta information."""
meta_context = self.serialization_context.meta_context
assert meta_context is not None, "Meta context must be set when meta
share is enabled"
- typeinfo = meta_context.read_shared_typeinfo(buffer)
- return typeinfo
+ return meta_context.read_shared_typeinfo(buffer)
def write_type_defs(self, buffer):
"""Write all type definitions that need to be sent."""
@@ -721,7 +753,7 @@ class TypeResolver:
return
num_type_defs = buffer.read_varuint32()
- for i in range(num_type_defs):
+ for _ in range(num_type_defs):
# Read the header (first 8 bytes) to get the type ID
header = buffer.read_int64()
# Check if we already have this TypeDef cached
diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py
index 2ce347595..d98b30cc9 100644
--- a/python/pyfory/_serializer.py
+++ b/python/pyfory/_serializer.py
@@ -296,7 +296,7 @@ class CollectionSerializer(Serializer):
else:
self._write_same_type_ref(buffer, value, typeinfo)
else:
- self._write_different_types(buffer, value)
+ self._write_different_types(buffer, value, collect_flag)
def _write_same_type_no_ref(self, buffer, value, typeinfo):
if self.is_py:
@@ -316,15 +316,41 @@ class CollectionSerializer(Serializer):
if not self.ref_resolver.write_ref_or_null(buffer, s):
typeinfo.serializer.xwrite(buffer, s)
- def _write_different_types(self, buffer, value):
- for s in value:
- if not self.ref_resolver.write_ref_or_null(buffer, s):
+ def _write_different_types(self, buffer, value, collect_flag=0):
+ tracking_ref = (collect_flag & COLL_TRACKING_REF) != 0
+ has_null = (collect_flag & COLL_HAS_NULL) != 0
+ if tracking_ref:
+ # When ref tracking is enabled, write with ref handling
+ for s in value:
+ if not self.ref_resolver.write_ref_or_null(buffer, s):
+ typeinfo = self.type_resolver.get_typeinfo(type(s))
+ self.type_resolver.write_typeinfo(buffer, typeinfo)
+ if self.is_py:
+ typeinfo.serializer.write(buffer, s)
+ else:
+ typeinfo.serializer.xwrite(buffer, s)
+ elif not has_null:
+ # When ref tracking is disabled and no nulls, write type info
directly
+ for s in value:
typeinfo = self.type_resolver.get_typeinfo(type(s))
self.type_resolver.write_typeinfo(buffer, typeinfo)
if self.is_py:
typeinfo.serializer.write(buffer, s)
else:
typeinfo.serializer.xwrite(buffer, s)
+ else:
+ # When ref tracking is disabled but has nulls, write null flag
first
+ for s in value:
+ if s is None:
+ buffer.write_int8(NULL_FLAG)
+ else:
+ buffer.write_int8(NOT_NULL_VALUE_FLAG)
+ typeinfo = self.type_resolver.get_typeinfo(type(s))
+ self.type_resolver.write_typeinfo(buffer, typeinfo)
+ if self.is_py:
+ typeinfo.serializer.write(buffer, s)
+ else:
+ typeinfo.serializer.xwrite(buffer, s)
def read(self, buffer):
len_ = buffer.read_varuint32()
@@ -342,7 +368,7 @@ class CollectionSerializer(Serializer):
else:
self._read_same_type_ref(buffer, len_, collection_, typeinfo)
else:
- self._read_different_types(buffer, len_, collection_)
+ self._read_different_types(buffer, len_, collection_, collect_flag)
return collection_
def new_instance(self, type_):
@@ -376,13 +402,41 @@ class CollectionSerializer(Serializer):
self._add_element(collection_, obj)
self.fory.dec_depth()
- def _read_different_types(self, buffer, len_, collection_):
+ def _read_different_types(self, buffer, len_, collection_, collect_flag):
self.fory.inc_depth()
- for _ in range(len_):
- self._add_element(
- collection_,
- get_next_element(buffer, self.ref_resolver,
self.type_resolver, self.is_py),
- )
+ tracking_ref = (collect_flag & COLL_TRACKING_REF) != 0
+ has_null = (collect_flag & COLL_HAS_NULL) != 0
+ if tracking_ref:
+ # When ref tracking is enabled, read with ref handling
+ for i in range(len_):
+ elem = get_next_element(buffer, self.ref_resolver,
self.type_resolver, self.is_py)
+ self._add_element(collection_, elem)
+ elif not has_null:
+ # When ref tracking is disabled and no nulls, read type info
directly
+ for i in range(len_):
+ typeinfo = self.type_resolver.read_typeinfo(buffer)
+ if typeinfo is None:
+ elem = None
+ elif self.is_py:
+ elem = typeinfo.serializer.read(buffer)
+ else:
+ elem = typeinfo.serializer.xread(buffer)
+ self._add_element(collection_, elem)
+ else:
+ # When ref tracking is disabled but has nulls, read null flag first
+ for i in range(len_):
+ head_flag = buffer.read_int8()
+ if head_flag == NULL_FLAG:
+ elem = None
+ else:
+ typeinfo = self.type_resolver.read_typeinfo(buffer)
+ if typeinfo is None:
+ elem = None
+ elif self.is_py:
+ elem = typeinfo.serializer.read(buffer)
+ else:
+ elem = typeinfo.serializer.xread(buffer)
+ self._add_element(collection_, elem)
self.fory.dec_depth()
def xwrite(self, buffer, value):
@@ -543,7 +597,7 @@ class MapSerializer(Serializer):
if not key_write_ref or not
ref_resolver.write_ref_or_null(buffer, key):
self._write_obj(key_serializer, buffer, key)
if not value_write_ref or not
ref_resolver.write_ref_or_null(buffer, value):
- value_serializer.write(buffer, value)
+ self._write_obj(value_serializer, buffer, value)
chunk_size += 1
try:
diff --git a/python/pyfory/_struct.py b/python/pyfory/_struct.py
index 5c81ba2e0..c9a0c6473 100644
--- a/python/pyfory/_struct.py
+++ b/python/pyfory/_struct.py
@@ -221,7 +221,11 @@ def compute_struct_meta(type_resolver, field_names,
serializers, nullable_map=No
hash_str = "".join(hash_parts)
hash_bytes = hash_str.encode("utf-8")
- full_hash = hash_buffer(hash_bytes, seed=47)[0]
+ # Handle empty hash_bytes (no fields or all fields are unknown/dynamic)
+ if len(hash_bytes) == 0:
+ full_hash = 47 # Use seed as default hash for empty structs
+ else:
+ full_hash = hash_buffer(hash_bytes, seed=47)[0]
type_hash_32 = full_hash & 0xFFFFFFFF
if full_hash & 0x80000000:
# If the sign bit is set, it's a negative number in 2's complement
diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py
index b3108f95b..b13427351 100644
--- a/python/pyfory/meta/typedef.py
+++ b/python/pyfory/meta/typedef.py
@@ -68,6 +68,16 @@ class TypeDef:
return [field_info.name for field_info in self.fields]
def create_serializer(self, resolver):
+ if self.type_id & 0xFF == TypeId.NAMED_EXT:
+ return resolver.get_typeinfo_by_name(self.namespace,
self.typename).serializer
+ if self.type_id & 0xFF == TypeId.NAMED_ENUM:
+ try:
+ return resolver.get_typeinfo_by_name(self.namespace,
self.typename).serializer
+ except Exception:
+ from pyfory.serializer import NonExistEnumSerializer
+
+ return NonExistEnumSerializer(resolver.fory)
+
from pyfory.serializer import DataClassSerializer
fory = resolver.fory
@@ -155,22 +165,41 @@ class FieldType:
return FieldType(xtype_id, is_monomorphic, is_nullable,
is_tracking_ref)
def create_serializer(self, resolver, type_):
- if self.type_id in [TypeId.EXT, TypeId.STRUCT, TypeId.NAMED_STRUCT,
TypeId.COMPATIBLE_STRUCT, TypeId.NAMED_COMPATIBLE_STRUCT, TypeId.UNKNOWN]:
- return None
+ # Handle list wrapper
if isinstance(type_, list):
type_ = type_[0]
- if isinstance(type_, type) and issubclass(type_, enum.Enum):
- typeinfo = resolver.get_typeinfo(type_, create=False)
- if typeinfo is not None and typeinfo.serializer is not None:
- return typeinfo.serializer
- else:
- from pyfory.serializer import NonExistEnumSerializer
-
- return NonExistEnumSerializer(resolver.fory)
- return resolver.get_typeinfo_by_id(self.type_id).serializer
+ # Types that need to be handled dynamically during deserialization
+ # For these types, we don't know the concrete type at compile time
+ if self.type_id & 0xFF in [
+ TypeId.EXT,
+ TypeId.NAMED_EXT,
+ TypeId.STRUCT,
+ TypeId.NAMED_STRUCT,
+ TypeId.COMPATIBLE_STRUCT,
+ TypeId.NAMED_COMPATIBLE_STRUCT,
+ TypeId.UNKNOWN,
+ ]:
+ return None
+ if self.type_id & 0xFF in [TypeId.ENUM, TypeId.NAMED_ENUM]:
+ try:
+ if issubclass(type_, enum.Enum):
+ return resolver.get_typeinfo(cls=type_).serializer
+ except Exception:
+ pass
+ from pyfory.serializer import NonExistEnumSerializer
+
+ return NonExistEnumSerializer(resolver.fory)
+ typeinfo = resolver.get_typeinfo_by_id(self.type_id)
+ return typeinfo.serializer
def __repr__(self):
- return f"FieldType(type_id={self.type_id},
is_monomorphic={self.is_monomorphic}, is_nullable={self.is_nullable},
is_tracking_ref={self.is_tracking_ref})"
+ type_id = self.type_id
+ if type_id > 128:
+ type_id = f"{type_id}, fory_id={type_id & 0xFF}, user_id={type_id
>> 8}"
+ return (
+ f"FieldType(type_id={type_id},
is_monomorphic={self.is_monomorphic}, "
+ f"is_nullable={self.is_nullable},
is_tracking_ref={self.is_tracking_ref})"
+ )
class CollectionFieldType(FieldType):
@@ -236,6 +265,9 @@ class DynamicFieldType(FieldType):
super().__init__(type_id, is_monomorphic, is_nullable, is_tracking_ref)
def create_serializer(self, resolver, type_):
+ # For dynamic field types (UNKNOWN, STRUCT, etc.), always return None
+ # This ensures type info is written/read at runtime, which is required
+ # for cross-language compatibility (Java always writes type info for
struct fields)
return None
def __repr__(self):
diff --git a/python/pyfory/meta/typedef_decoder.py
b/python/pyfory/meta/typedef_decoder.py
index 8b2f4b794..880349441 100644
--- a/python/pyfory/meta/typedef_decoder.py
+++ b/python/pyfory/meta/typedef_decoder.py
@@ -21,7 +21,8 @@ TypeDef decoder for xlang serialization.
This module implements the decoding of TypeDef objects according to the xlang
serialization specification.
"""
-from typing import List
+from dataclasses import make_dataclass
+from typing import List, Any
from pyfory._util import Buffer
from pyfory.meta.typedef import TypeDef, FieldInfo, FieldType
from pyfory.meta.typedef import (
@@ -36,10 +37,15 @@ from pyfory.meta.typedef import (
NAMESPACE_ENCODINGS,
TYPE_NAME_ENCODINGS,
)
-from pyfory.type import TypeId, record_class_factory
+from pyfory.type import TypeId
from pyfory.meta.metastring import MetaStringDecoder, Encoding
+MAX_GENERATED_CLASSES = 1000
+MAX_FIELDS_PER_CLASS = 256
+_generated_class_count = 0
+
+
# Meta string decoders
NAMESPACE_DECODER = MetaStringDecoder(".", "_")
TYPENAME_DECODER = MetaStringDecoder("$", "_")
@@ -70,6 +76,8 @@ def decode_typedef(buffer: Buffer, resolver, header=None) ->
TypeDef:
Returns:
The decoded TypeDef.
"""
+ global _generated_class_count
+
# Read global binary header
if header is None:
header = buffer.read_int64()
@@ -101,6 +109,12 @@ def decode_typedef(buffer: Buffer, resolver, header=None)
-> TypeDef:
if num_fields == SMALL_NUM_FIELDS_THRESHOLD:
num_fields += meta_buffer.read_varuint32()
+ # Check field count limit
+ if num_fields > MAX_FIELDS_PER_CLASS:
+ raise ValueError(
+ f"Class has {num_fields} fields, exceeding the maximum allowed
{MAX_FIELDS_PER_CLASS} fields. This may indicate malicious data."
+ )
+
# Check if registered by name
is_registered_by_name = (meta_header & REGISTER_BY_NAME_FLAG) != 0
@@ -119,8 +133,8 @@ def decode_typedef(buffer: Buffer, resolver, header=None)
-> TypeDef:
type_id = TypeId.COMPATIBLE_STRUCT
else:
type_id = meta_buffer.read_varuint32()
- type_info = resolver.get_typeinfo_by_id(type_id)
- if type_info is not None:
+ if resolver.is_registered_by_id(type_id=type_id):
+ type_info = resolver.get_typeinfo_by_id(type_id)
type_cls = type_info.cls
namespace = type_info.decode_namespace()
typename = type_info.decode_typename()
@@ -133,10 +147,22 @@ def decode_typedef(buffer: Buffer, resolver, header=None)
-> TypeDef:
if has_fields_meta:
field_infos = read_fields_info(meta_buffer, resolver, name, num_fields)
if type_cls is None:
- type_cls = record_class_factory(name, [field_info.name for field_info
in field_infos])
+ # Check generated class count limit
+ if _generated_class_count >= MAX_GENERATED_CLASSES:
+ raise ValueError(
+ f"Exceeded maximum number of dynamically generated classes
({MAX_GENERATED_CLASSES}). "
+ "This may indicate malicious data causing memory issues."
+ )
+ _generated_class_count += 1
+ # Generate dynamic dataclass from field definitions
+ field_definitions = [(field_info.name, Any) for field_info in
field_infos]
+ # Use a valid Python identifier for class name
+ class_name = typename.replace(".", "_").replace("$", "_")
+ type_cls = make_dataclass(class_name, field_definitions)
# Create TypeDef object
- return TypeDef(namespace, typename, type_cls, type_id, field_infos,
meta_data, is_compressed)
+ type_def = TypeDef(namespace, typename, type_cls, type_id, field_infos,
meta_data, is_compressed)
+ return type_def
def read_namespace(buffer: Buffer) -> str:
diff --git a/python/pyfory/meta/typedef_encoder.py
b/python/pyfory/meta/typedef_encoder.py
index 954c389c8..90a350e39 100644
--- a/python/pyfory/meta/typedef_encoder.py
+++ b/python/pyfory/meta/typedef_encoder.py
@@ -88,7 +88,7 @@ def encode_typedef(type_resolver, cls):
# Use the actual type_id from the resolver, not a generic one
type_id = type_resolver.get_registered_id(cls)
else:
- assert type_resolver.is_registered_by_id(cls), "Class must be
registered by name or id"
+ assert type_resolver.is_registered_by_id(cls=cls), "Class must be
registered by name or id"
type_id = type_resolver.get_registered_id(cls)
buffer.write_varuint32(type_id)
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index 47161509e..d4e6e7e76 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -756,6 +756,7 @@ cdef class MetaContext:
cdef flat_hash_map[uint64_t, int32_t].iterator it =
self._c_type_map.find(type_addr)
if it != self._c_type_map.end():
buffer.write_varuint32(deref(it).second)
+ return
cdef index = self._c_type_map.size()
buffer.write_varuint32(index)
@@ -1389,6 +1390,7 @@ cdef class Fory:
)
# Read type definitions at the start, similar to Java implementation
+ cdef int32_t end_reader_index = -1
if self.serialization_context.scoped_meta_share_enabled:
relative_type_defs_offset = buffer.read_int32()
if relative_type_defs_offset != -1:
@@ -1398,12 +1400,22 @@ cdef class Fory:
buffer.reader_index = current_reader_index +
relative_type_defs_offset
# Read type definitions
self.type_resolver.read_type_defs(buffer)
+ # Save the end position (after type defs) - this is the true
end of serialized data
+ end_reader_index = buffer.reader_index
# Jump back to continue with object deserialization
buffer.reader_index = current_reader_index
if not is_target_x_lang:
- return self.read_ref(buffer)
- return self.xread_ref(buffer)
+ obj = self.read_ref(buffer)
+ else:
+ obj = self.xread_ref(buffer)
+
+ # After reading the object, position buffer at the end of serialized
data
+ # (which is after the type definitions, not after the object data)
+ if end_reader_index != -1:
+ buffer.reader_index = end_reader_index
+
+ return obj
cpdef inline read_ref(self, Buffer buffer):
cdef MapRefResolver ref_resolver = self.ref_resolver
@@ -1933,6 +1945,8 @@ cdef class CollectionSerializer(Serializer):
cdef TypeResolver type_resolver = self.type_resolver
cdef c_bool is_py = self.is_py
cdef serializer = type(elem_typeinfo.serializer)
+ cdef c_bool tracking_ref
+ cdef c_bool has_null
if (collect_flag & COLL_IS_SAME_TYPE) != 0:
if elem_type is str:
self._write_string(buffer, value)
@@ -1948,22 +1962,51 @@ cdef class CollectionSerializer(Serializer):
else:
self._write_same_type_ref(buffer, value, elem_typeinfo)
else:
- for s in value:
- cls = type(s)
- if cls is str:
- buffer.write_int16(NOT_NULL_STRING_FLAG)
- buffer.write_string(s)
- elif cls is int:
- buffer.write_int16(NOT_NULL_INT64_FLAG)
- buffer.write_varint64(s)
- elif cls is bool:
- buffer.write_int16(NOT_NULL_BOOL_FLAG)
- buffer.write_bool(s)
- elif cls is float:
- buffer.write_int16(NOT_NULL_FLOAT64_FLAG)
- buffer.write_double(s)
- else:
- if not ref_resolver.write_ref_or_null(buffer, s):
+ # Check tracking_ref and has_null flags for different types writing
+ tracking_ref = (collect_flag & COLL_TRACKING_REF) != 0
+ has_null = (collect_flag & COLL_HAS_NULL) != 0
+ if tracking_ref:
+ # When ref tracking is enabled, write with ref handling
+ for s in value:
+ cls = type(s)
+ if cls is str:
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
+ buffer.write_string(s)
+ elif cls is int:
+ buffer.write_int16(NOT_NULL_INT64_FLAG)
+ buffer.write_varint64(s)
+ elif cls is bool:
+ buffer.write_int16(NOT_NULL_BOOL_FLAG)
+ buffer.write_bool(s)
+ elif cls is float:
+ buffer.write_int16(NOT_NULL_FLOAT64_FLAG)
+ buffer.write_double(s)
+ else:
+ if not ref_resolver.write_ref_or_null(buffer, s):
+ typeinfo = type_resolver.get_typeinfo(cls)
+ type_resolver.write_typeinfo(buffer, typeinfo)
+ if is_py:
+ typeinfo.serializer.write(buffer, s)
+ else:
+ typeinfo.serializer.xwrite(buffer, s)
+ elif not has_null:
+ # When ref tracking is disabled and no nulls, write type info
directly
+ for s in value:
+ cls = type(s)
+ typeinfo = type_resolver.get_typeinfo(cls)
+ type_resolver.write_typeinfo(buffer, typeinfo)
+ if is_py:
+ typeinfo.serializer.write(buffer, s)
+ else:
+ typeinfo.serializer.xwrite(buffer, s)
+ else:
+ # When ref tracking is disabled but has nulls, write null flag
first
+ for s in value:
+ if s is None:
+ buffer.write_int8(NULL_FLAG)
+ else:
+ buffer.write_int8(NOT_NULL_VALUE_FLAG)
+ cls = type(s)
typeinfo = type_resolver.get_typeinfo(cls)
type_resolver.write_typeinfo(buffer, typeinfo)
if is_py:
@@ -2090,6 +2133,9 @@ cdef class ListSerializer(CollectionSerializer):
cdef c_bool is_py = self.is_py
cdef TypeInfo typeinfo
cdef int32_t type_id = -1
+ cdef c_bool tracking_ref
+ cdef c_bool has_null
+ cdef int8_t head_flag
if (collect_flag & COLL_IS_SAME_TYPE) != 0:
if collect_flag & COLL_IS_DECL_ELEMENT_TYPE == 0:
typeinfo = self.type_resolver.read_typeinfo(buffer)
@@ -2115,10 +2161,39 @@ cdef class ListSerializer(CollectionSerializer):
self._read_same_type_ref(buffer, len_, list_, typeinfo)
else:
self.fory.inc_depth()
- for i in range(len_):
- elem = get_next_element(buffer, ref_resolver, type_resolver,
is_py)
- Py_INCREF(elem)
- PyList_SET_ITEM(list_, i, elem)
+ # Check tracking_ref and has_null flags for different types
handling
+ tracking_ref = (collect_flag & COLL_TRACKING_REF) != 0
+ has_null = (collect_flag & COLL_HAS_NULL) != 0
+ if tracking_ref:
+ # When ref tracking is enabled, read with ref handling
+ for i in range(len_):
+ elem = get_next_element(buffer, ref_resolver,
type_resolver, is_py)
+ Py_INCREF(elem)
+ PyList_SET_ITEM(list_, i, elem)
+ elif not has_null:
+ # When ref tracking is disabled and no nulls, read type info
directly
+ for i in range(len_):
+ typeinfo = type_resolver.read_typeinfo(buffer)
+ if is_py:
+ elem = typeinfo.serializer.read(buffer)
+ else:
+ elem = typeinfo.serializer.xread(buffer)
+ Py_INCREF(elem)
+ PyList_SET_ITEM(list_, i, elem)
+ else:
+ # When ref tracking is disabled but has nulls, read null flag
first
+ for i in range(len_):
+ head_flag = buffer.read_int8()
+ if head_flag == NULL_FLAG:
+ elem = None
+ else:
+ typeinfo = type_resolver.read_typeinfo(buffer)
+ if is_py:
+ elem = typeinfo.serializer.read(buffer)
+ else:
+ elem = typeinfo.serializer.xread(buffer)
+ Py_INCREF(elem)
+ PyList_SET_ITEM(list_, i, elem)
self.fory.dec_depth()
return list_
@@ -2176,6 +2251,9 @@ cdef class TupleSerializer(CollectionSerializer):
cdef c_bool is_py = self.is_py
cdef TypeInfo typeinfo
cdef int32_t type_id = -1
+ cdef c_bool tracking_ref
+ cdef c_bool has_null
+ cdef int8_t head_flag
if (collect_flag & COLL_IS_SAME_TYPE) != 0:
if collect_flag & COLL_IS_DECL_ELEMENT_TYPE == 0:
typeinfo = self.type_resolver.read_typeinfo(buffer)
@@ -2201,10 +2279,39 @@ cdef class TupleSerializer(CollectionSerializer):
self._read_same_type_ref(buffer, len_, tuple_, typeinfo)
else:
self.fory.inc_depth()
- for i in range(len_):
- elem = get_next_element(buffer, ref_resolver, type_resolver,
is_py)
- Py_INCREF(elem)
- PyTuple_SET_ITEM(tuple_, i, elem)
+ # Check tracking_ref and has_null flags for different types
handling
+ tracking_ref = (collect_flag & COLL_TRACKING_REF) != 0
+ has_null = (collect_flag & COLL_HAS_NULL) != 0
+ if tracking_ref:
+ # When ref tracking is enabled, read with ref handling
+ for i in range(len_):
+ elem = get_next_element(buffer, ref_resolver,
type_resolver, is_py)
+ Py_INCREF(elem)
+ PyTuple_SET_ITEM(tuple_, i, elem)
+ elif not has_null:
+ # When ref tracking is disabled and no nulls, read type info
directly
+ for i in range(len_):
+ typeinfo = type_resolver.read_typeinfo(buffer)
+ if is_py:
+ elem = typeinfo.serializer.read(buffer)
+ else:
+ elem = typeinfo.serializer.xread(buffer)
+ Py_INCREF(elem)
+ PyTuple_SET_ITEM(tuple_, i, elem)
+ else:
+ # When ref tracking is disabled but has nulls, read null flag
first
+ for i in range(len_):
+ head_flag = buffer.read_int8()
+ if head_flag == NULL_FLAG:
+ elem = None
+ else:
+ typeinfo = type_resolver.read_typeinfo(buffer)
+ if is_py:
+ elem = typeinfo.serializer.read(buffer)
+ else:
+ elem = typeinfo.serializer.xread(buffer)
+ Py_INCREF(elem)
+ PyTuple_SET_ITEM(tuple_, i, elem)
self.fory.dec_depth()
return tuple_
@@ -2237,6 +2344,9 @@ cdef class SetSerializer(CollectionSerializer):
cdef TypeInfo typeinfo
cdef int32_t type_id = -1
cdef c_bool is_py = self.is_py
+ cdef c_bool tracking_ref
+ cdef c_bool has_null
+ cdef int8_t head_flag
if (collect_flag & COLL_IS_SAME_TYPE) != 0:
if collect_flag & COLL_IS_DECL_ELEMENT_TYPE == 0:
typeinfo = self.type_resolver.read_typeinfo(buffer)
@@ -2262,29 +2372,74 @@ cdef class SetSerializer(CollectionSerializer):
self._read_same_type_ref(buffer, len_, instance, typeinfo)
else:
self.fory.inc_depth()
- for i in range(len_):
- ref_id = ref_resolver.try_preserve_ref_id(buffer)
- if ref_id < NOT_NULL_VALUE_FLAG:
- instance.add(ref_resolver.get_read_object())
- continue
- # indicates that the object is first read.
- typeinfo = type_resolver.read_typeinfo(buffer)
- type_id = typeinfo.type_id
- if type_id == <int32_t>TypeId.STRING:
- instance.add(buffer.read_string())
- elif type_id == <int32_t>TypeId.VAR_INT64:
- instance.add(buffer.read_varint64())
- elif type_id == <int32_t>TypeId.BOOL:
- instance.add(buffer.read_bool())
- elif type_id == <int32_t>TypeId.FLOAT64:
- instance.add(buffer.read_double())
- else:
- if is_py:
- o = typeinfo.serializer.read(buffer)
+ # Check tracking_ref and has_null flags for different types
handling
+ tracking_ref = (collect_flag & COLL_TRACKING_REF) != 0
+ has_null = (collect_flag & COLL_HAS_NULL) != 0
+ if tracking_ref:
+ # When ref tracking is enabled, read with ref handling
+ for i in range(len_):
+ ref_id = ref_resolver.try_preserve_ref_id(buffer)
+ if ref_id < NOT_NULL_VALUE_FLAG:
+ instance.add(ref_resolver.get_read_object())
+ continue
+ # indicates that the object is first read.
+ typeinfo = type_resolver.read_typeinfo(buffer)
+ type_id = typeinfo.type_id
+ if type_id == <int32_t>TypeId.STRING:
+ instance.add(buffer.read_string())
+ elif type_id == <int32_t>TypeId.VAR_INT64:
+ instance.add(buffer.read_varint64())
+ elif type_id == <int32_t>TypeId.BOOL:
+ instance.add(buffer.read_bool())
+ elif type_id == <int32_t>TypeId.FLOAT64:
+ instance.add(buffer.read_double())
+ else:
+ if is_py:
+ o = typeinfo.serializer.read(buffer)
+ else:
+ o = typeinfo.serializer.xread(buffer)
+ ref_resolver.set_read_object(ref_id, o)
+ instance.add(o)
+ elif not has_null:
+ # When ref tracking is disabled and no nulls, read type info
directly
+ for i in range(len_):
+ typeinfo = type_resolver.read_typeinfo(buffer)
+ type_id = typeinfo.type_id
+ if type_id == <int32_t>TypeId.STRING:
+ instance.add(buffer.read_string())
+ elif type_id == <int32_t>TypeId.VAR_INT64:
+ instance.add(buffer.read_varint64())
+ elif type_id == <int32_t>TypeId.BOOL:
+ instance.add(buffer.read_bool())
+ elif type_id == <int32_t>TypeId.FLOAT64:
+ instance.add(buffer.read_double())
+ else:
+ if is_py:
+ instance.add(typeinfo.serializer.read(buffer))
+ else:
+ instance.add(typeinfo.serializer.xread(buffer))
+ else:
+ # When ref tracking is disabled but has nulls, read null flag
first
+ for i in range(len_):
+ head_flag = buffer.read_int8()
+ if head_flag == NULL_FLAG:
+ instance.add(None)
else:
- o = typeinfo.serializer.xread(buffer)
- ref_resolver.set_read_object(ref_id, o)
- instance.add(o)
+ typeinfo = type_resolver.read_typeinfo(buffer)
+ type_id = typeinfo.type_id
+ if type_id == <int32_t>TypeId.STRING:
+ instance.add(buffer.read_string())
+ elif type_id == <int32_t>TypeId.VAR_INT64:
+ instance.add(buffer.read_varint64())
+ elif type_id == <int32_t>TypeId.BOOL:
+ instance.add(buffer.read_bool())
+ elif type_id == <int32_t>TypeId.FLOAT64:
+ instance.add(buffer.read_double())
+ else:
+ if is_py:
+ instance.add(typeinfo.serializer.read(buffer))
+ else:
+ instance.add(typeinfo.serializer.xread(buffer))
self.fory.dec_depth()
return instance
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index 1cbf9fc26..8bd7c2f6e 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -787,10 +787,13 @@ class DataClassSerializer(Serializer):
field_value = getattr(value, field_name)
serializer = self._serializers[index]
is_nullable = self._nullable_fields.get(field_name, False)
- if is_nullable and field_value is None:
- buffer.write_int8(-3)
+ if is_nullable:
+ if field_value is None:
+ buffer.write_int8(-3)
+ else:
+ self.fory.xwrite_ref(buffer, field_value,
serializer=serializer)
else:
- self.fory.xwrite_ref(buffer, field_value,
serializer=serializer)
+ serializer.xwrite(buffer, field_value)
def xread(self, buffer):
"""Read dataclass instance from buffer in cross-language format."""
@@ -816,7 +819,7 @@ class DataClassSerializer(Serializer):
buffer.reader_index -= 1
field_value = self.fory.xread_ref(buffer,
serializer=serializer)
else:
- field_value = self.fory.xread_ref(buffer,
serializer=serializer)
+ field_value = serializer.xread(buffer)
if field_name in current_class_field_names:
setattr(obj, field_name, field_value)
return obj
diff --git a/python/pyfory/tests/xlang_test_main.py
b/python/pyfory/tests/xlang_test_main.py
new file mode 100644
index 000000000..d2782c552
--- /dev/null
+++ b/python/pyfory/tests/xlang_test_main.py
@@ -0,0 +1,469 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Cross-language xlang tests for Python <-> Java/Rust/Go/etc.
+
+This module is invoked by PythonXlangTest.java and other language xlang tests.
+The test cases follow the same pattern as test_cross_language.rs (Rust).
+Data file path is passed via DATA_FILE environment variable.
+"""
+
+import enum
+import logging
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+import pyfory
+
+
+def debug_print(*params):
+ """Print params if debug is needed."""
+ if os.environ.get("ENABLE_FORY_DEBUG_OUTPUT") == "1":
+ print(*params)
+
+
+def get_data_file() -> str:
+ """Get the data file path from environment variable."""
+ return os.environ["DATA_FILE"]
+
+
+# ============================================================================
+# Test Data Classes - Must match XlangTestBase.java definitions
+# ============================================================================
+
+
+class Color(enum.Enum):
+ Green = 0
+ Red = 1
+ Blue = 2
+ White = 3
+
+
+@dataclass
+class Item:
+ name: Optional[str] = None
+
+
+@dataclass
+class SimpleStruct:
+ f1: Dict[pyfory.int32, pyfory.float64] = None
+ f2: pyfory.int32 = 0
+ f3: Item = None
+ f4: Optional[str] = None
+ f5: Color = None
+ f6: List[Optional[str]] = None
+ f7: pyfory.int32 = 0
+ f8: pyfory.int32 = 0
+ last: pyfory.int32 = 0
+
+
+@dataclass
+class VersionCheckStruct:
+ f1: pyfory.int32 = 0
+ f2: Optional[str] = None
+ f3: pyfory.float64 = 0.0
+
+
+@dataclass
+class Dog:
+ age: pyfory.int32 = 0
+ name: Optional[str] = None
+
+
+@dataclass
+class Cat:
+ age: pyfory.int32 = 0
+ lives: pyfory.int32 = 0
+
+
+@dataclass
+class AnimalListHolder:
+ animals: List[Any] = None
+
+
+@dataclass
+class AnimalMapHolder:
+ animal_map: Dict[Optional[str], Any] = None
+
+
+@dataclass
+class MyStruct:
+ id: pyfory.int32 = 0
+
+
+@dataclass
+class MyExt:
+ id: pyfory.int32 = 0
+
+
+class MyExtSerializer(pyfory.serializer.Serializer):
+ def write(self, buffer, value):
+ self.xwrite(buffer, value)
+
+ def read(self, buffer):
+ return self.xread(buffer)
+
+ def xwrite(self, buffer, value):
+ buffer.write_varint32(value.id)
+
+ def xread(self, buffer):
+ obj = MyExt()
+ obj.id = buffer.read_varint32()
+ return obj
+
+
+@dataclass
+class MyWrapper:
+ color: Color = None
+ my_struct: MyStruct = None
+ my_ext: MyExt = None
+
+
+@dataclass
+class EmptyWrapper:
+ pass
+
+
+# ============================================================================
+# Test Functions - Each function handles read -> verify -> write back
+# ============================================================================
+
+
+def test_string_serializer():
+ """Test string serialization with various encodings."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+ buffer = pyfory.Buffer(data_bytes)
+
+ fory = pyfory.Fory(xlang=True, compatible=True)
+ test_strings = [
+ # Latin1
+ "ab",
+ "Rust123",
+ "Çüéâäàåçêëèïî",
+ # UTF16
+ "こんにちは",
+ "Привет",
+ "𝄞🎵🎶",
+ # UTF8
+ "Hello, 世界",
+ ]
+ for expected in test_strings:
+ value = fory.deserialize(buffer)
+ assert value == expected, f"string mismatch: {value} != {expected}"
+
+ # Write strings back
+ new_buffer = pyfory.Buffer.allocate(512)
+ for s in test_strings:
+ fory.serialize(s, buffer=new_buffer)
+
+ with open(data_file, "wb") as f:
+ f.write(new_buffer.get_bytes(0, new_buffer.writer_index))
+
+
+def test_simple_struct():
+ """Test simple struct serialization."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+
+ fory = pyfory.Fory(xlang=True, compatible=True)
+ fory.register_type(Color, type_id=101)
+ fory.register_type(Item, type_id=102)
+ fory.register_type(SimpleStruct, type_id=103)
+
+ expected = SimpleStruct(
+ f1={1: 1.0, 2: 2.0},
+ f2=39,
+ f3=Item(name="item"),
+ f4="f4",
+ f5=Color.White,
+ f6=["f6"],
+ f7=40,
+ f8=41,
+ last=42,
+ )
+
+ debug_print(f"Java bytes length: {len(data_bytes)}")
+ debug_print(f"Java bytes (first 50): {data_bytes[:50].hex()}")
+
+ obj = fory.deserialize(data_bytes)
+ debug_print(f"Deserialized: {obj}")
+ assert obj == expected, f"Mismatch: {obj} != {expected}"
+
+ new_bytes = fory.serialize(obj)
+ debug_print(f"Python bytes length: {len(new_bytes)}")
+ debug_print(f"Python bytes (first 50): {new_bytes[:50].hex()}")
+ debug_print(f"Bytes match: {data_bytes == new_bytes}")
+ new_value = fory.deserialize(new_bytes)
+ assert new_value == expected, f"new_value: {new_value},\n expected:
{expected}"
+
+ with open(data_file, "wb") as f:
+ f.write(new_bytes)
+
+
+def test_named_simple_struct():
+ """Test named simple struct serialization."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+
+ fory = pyfory.Fory(xlang=True, compatible=True)
+ fory.register_type(Color, namespace="demo", typename="color")
+ fory.register_type(Item, namespace="demo", typename="item")
+ fory.register_type(SimpleStruct, namespace="demo",
typename="simple_struct")
+
+ expected = SimpleStruct(
+ f1={1: 1.0, 2: 2.0},
+ f2=39,
+ f3=Item(name="item"),
+ f4="f4",
+ f5=Color.White,
+ f6=["f6"],
+ f7=40,
+ f8=41,
+ last=42,
+ )
+
+ obj = fory.deserialize(data_bytes)
+ debug_print(f"Deserialized: {obj}")
+ assert obj == expected, f"Mismatch: {obj} != {expected}"
+
+ new_bytes = fory.serialize(obj)
+ assert fory.deserialize(new_bytes) == expected
+
+ with open(data_file, "wb") as f:
+ f.write(new_bytes)
+
+
+def _test_skip_custom(fory1, fory2):
+ """Helper for skip custom type tests."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+
+ obj = fory1.deserialize(data_bytes)
+ assert obj == EmptyWrapper(), f"Expected EmptyWrapper, got {obj}"
+
+ wrapper = MyWrapper(color=Color.White, my_struct=MyStruct(id=42),
my_ext=MyExt(id=43))
+ new_bytes = fory2.serialize(wrapper)
+
+ with open(data_file, "wb") as f:
+ f.write(new_bytes)
+
+
+def test_skip_id_custom():
+ """Test skipping custom types registered by ID."""
+ fory1 = pyfory.Fory(xlang=True, compatible=True)
+ fory1.register_type(MyExt, type_id=103, serializer=MyExtSerializer(fory1,
MyExt))
+ fory1.register_type(EmptyWrapper, type_id=104)
+
+ fory2 = pyfory.Fory(xlang=True, compatible=True)
+ fory2.register_type(Color, type_id=101)
+ fory2.register_type(MyStruct, type_id=102)
+ fory2.register_type(MyExt, type_id=103, serializer=MyExtSerializer(fory2,
MyExt))
+ fory2.register_type(MyWrapper, type_id=104)
+
+ _test_skip_custom(fory1, fory2)
+
+
+def test_skip_name_custom():
+ """Test skipping custom types registered by name."""
+ fory1 = pyfory.Fory(xlang=True, compatible=True)
+ fory1.register_type(MyExt, typename="my_ext",
serializer=MyExtSerializer(fory1, MyExt))
+ fory1.register_type(EmptyWrapper, typename="my_wrapper")
+
+ fory2 = pyfory.Fory(xlang=True, compatible=True)
+ fory2.register_type(Color, typename="color")
+ fory2.register_type(MyStruct, typename="my_struct")
+ fory2.register_type(MyExt, typename="my_ext",
serializer=MyExtSerializer(fory2, MyExt))
+ fory2.register_type(MyWrapper, typename="my_wrapper")
+
+ _test_skip_custom(fory1, fory2)
+
+
+def test_consistent_named():
+ """Test consistent mode with named types."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+ buffer = pyfory.Buffer(data_bytes)
+
+ fory = pyfory.Fory(xlang=True, compatible=False)
+ fory.register_type(Color, typename="color")
+ fory.register_type(MyStruct, typename="my_struct")
+ fory.register_type(MyExt, typename="my_ext",
serializer=MyExtSerializer(fory, MyExt))
+
+ color = Color.White
+ my_struct = MyStruct(id=42)
+ my_ext = MyExt(id=43)
+
+ for _ in range(3):
+ assert fory.deserialize(buffer) == color
+ for _ in range(3):
+ assert fory.deserialize(buffer) == my_struct
+ for _ in range(3):
+ assert fory.deserialize(buffer) == my_ext
+
+ new_buffer = pyfory.Buffer.allocate(256)
+ for _ in range(3):
+ fory.serialize(color, buffer=new_buffer)
+ for _ in range(3):
+ fory.serialize(my_struct, buffer=new_buffer)
+ for _ in range(3):
+ fory.serialize(my_ext, buffer=new_buffer)
+
+ with open(data_file, "wb") as f:
+ f.write(new_buffer.get_bytes(0, new_buffer.writer_index))
+
+
+def test_struct_version_check():
+ """Test struct version check."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+
+ fory = pyfory.Fory(xlang=True, compatible=False)
+ fory.register_type(VersionCheckStruct, type_id=201)
+
+ expected = VersionCheckStruct(f1=10, f2="test", f3=3.2)
+ obj = fory.deserialize(data_bytes)
+ assert obj == expected, f"Mismatch: {obj} != {expected}"
+
+ new_bytes = fory.serialize(obj)
+ assert fory.deserialize(new_bytes) == expected
+
+ with open(data_file, "wb") as f:
+ f.write(new_bytes)
+
+
+def test_polymorphic_list():
+ """Test polymorphic list serialization."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+ buffer = pyfory.Buffer(data_bytes)
+
+ fory = pyfory.Fory(xlang=True, compatible=True)
+ fory.register_type(Dog, type_id=302)
+ fory.register_type(Cat, type_id=303)
+ fory.register_type(AnimalListHolder, type_id=304)
+
+ # Part 1: Read List<Animal> with polymorphic elements
+ animals = fory.deserialize(buffer)
+ assert len(animals) == 2
+
+ dog = animals[0]
+ assert isinstance(dog, Dog)
+ assert dog.age == 3
+ assert dog.name == "Buddy"
+
+ cat = animals[1]
+ assert isinstance(cat, Cat)
+ assert cat.age == 5
+ assert cat.lives == 9
+
+ # Part 2: Read AnimalListHolder
+ holder = fory.deserialize(buffer)
+ assert len(holder.animals) == 2
+
+ dog2 = holder.animals[0]
+ assert isinstance(dog2, Dog)
+ assert dog2.name == "Rex"
+
+ cat2 = holder.animals[1]
+ assert isinstance(cat2, Cat)
+ assert cat2.lives == 7
+
+ # Write back
+ new_buffer = pyfory.Buffer.allocate(256)
+ fory.serialize(animals, buffer=new_buffer)
+ fory.serialize(holder, buffer=new_buffer)
+
+ with open(data_file, "wb") as f:
+ f.write(new_buffer.get_bytes(0, new_buffer.writer_index))
+
+
+def test_polymorphic_map():
+ """Test polymorphic map serialization."""
+ data_file = get_data_file()
+ with open(data_file, "rb") as f:
+ data_bytes = f.read()
+ buffer = pyfory.Buffer(data_bytes)
+
+ fory = pyfory.Fory(xlang=True, compatible=True)
+ fory.register_type(Dog, type_id=302)
+ fory.register_type(Cat, type_id=303)
+ fory.register_type(AnimalMapHolder, type_id=305)
+
+ # Part 1: Read Map<String, Animal> with polymorphic values
+ animal_map = fory.deserialize(buffer)
+ assert len(animal_map) == 2
+
+ dog1 = animal_map.get("dog1")
+ assert isinstance(dog1, Dog)
+ assert dog1.name == "Rex"
+ assert dog1.age == 2
+
+ cat1 = animal_map.get("cat1")
+ assert isinstance(cat1, Cat)
+ assert cat1.lives == 9
+ assert cat1.age == 4
+
+ # Part 2: Read AnimalMapHolder
+ holder = fory.deserialize(buffer)
+ assert len(holder.animal_map) == 2
+
+ my_dog = holder.animal_map.get("myDog")
+ assert isinstance(my_dog, Dog)
+ assert my_dog.name == "Fido"
+
+ my_cat = holder.animal_map.get("myCat")
+ assert isinstance(my_cat, Cat)
+ assert my_cat.lives == 8
+
+ # Write back
+ new_buffer = pyfory.Buffer.allocate(256)
+ fory.serialize(animal_map, buffer=new_buffer)
+ fory.serialize(holder, buffer=new_buffer)
+
+ with open(data_file, "wb") as f:
+ f.write(new_buffer.get_bytes(0, new_buffer.writer_index))
+
+
+if __name__ == "__main__":
+ """
+ This file is executed by PythonXlangTest.java and other cross-language
tests.
+ The test case name is passed as the first argument.
+ """
+ import sys
+
+ print(f"Execute {sys.argv}")
+ try:
+ args = sys.argv[1:]
+ assert len(args) > 0, "Test case name required"
+ test_name = args[0]
+ func = getattr(sys.modules[__name__], test_name)
+ if not func:
+ raise Exception(f"Unknown test case: {test_name}")
+ func(*args[1:])
+ except BaseException as e:
+ logging.exception("Execute %s failed with %s", args, e)
+ raise
diff --git a/python/pyfory/type.py b/python/pyfory/type.py
index 6faf62c15..9f72a382e 100644
--- a/python/pyfory/type.py
+++ b/python/pyfory/type.py
@@ -235,6 +235,10 @@ class TypeId:
def is_namespaced_type(type_id: int) -> bool:
return type_id in __NAMESPACED_TYPES__
+ @staticmethod
+ def is_type_share_meta(type_id: int) -> bool:
+ return type_id in __TYPE_SHARE_META__
+
__NAMESPACED_TYPES__ = {
TypeId.NAMED_EXT,
@@ -242,6 +246,14 @@ __NAMESPACED_TYPES__ = {
TypeId.NAMED_STRUCT,
TypeId.NAMED_COMPATIBLE_STRUCT,
}
+
+__TYPE_SHARE_META__ = {
+ TypeId.NAMED_ENUM,
+ TypeId.NAMED_STRUCT,
+ TypeId.NAMED_EXT,
+ TypeId.COMPATIBLE_STRUCT,
+ TypeId.NAMED_COMPATIBLE_STRUCT,
+}
int8 = TypeVar("int8", bound=int)
int16 = TypeVar("int16", bound=int)
int32 = TypeVar("int32", bound=int)
@@ -472,7 +484,13 @@ def infer_field(field_name, type_, visitor: TypeVisitor,
types_path=None):
key_type, value_type = args
return visitor.visit_dict(field_name, key_type, value_type,
types_path=types_path)
elif origin is typing.Union:
- # Union types are treated as "other" types and handled by
UnionSerializer
+ # For Optional types (Union[X, None]), unwrap to get the inner type
+ # This allows proper type inference for element types in
collections
+ unwrapped, is_optional = unwrap_optional(type_)
+ if is_optional and unwrapped is not type_:
+ # Recursively infer the unwrapped type
+ return infer_field(field_name, unwrapped, visitor, types_path)
+ # Non-Optional Union types are treated as "other" types and
handled by UnionSerializer
return visitor.visit_other(field_name, type_,
types_path=types_path)
else:
raise TypeError(f"Collection types should be {list, dict} instead
of {type_}")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]