This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new f4c927a79 perf(python/java): Fix & optimize cross-language meta-share 
mode (#2601)
f4c927a79 is described below

commit f4c927a792f3097aac8879a0e82ccc49c5e7b041
Author: PAN <[email protected]>
AuthorDate: Sat Sep 13 11:00:12 2025 +0800

    perf(python/java): Fix & optimize cross-language meta-share mode (#2601)
    
    <!--
    **Thanks for contributing to Apache Fory™.**
    
    **If this is your first time opening a PR on fory, you can refer to
    
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).**
    
    Contribution Checklist
    
    - The **Apache Fory™** community has requirements on the naming of pr
    titles. You can also find instructions in
    [CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).
    
    - Apache Fory™ has a strong focus on performance. If the PR you submit
    will have an impact on performance, please benchmark it first and
    provide the benchmark result here.
    -->
    
    ## Why?
    Fix & optimize cross-language meta-share mode
    <!-- Describe the purpose of this PR. -->
    
    ## What does this PR do?
    
    <!-- Describe the details of this PR. -->
    
    ## Related issues
    
    <!--
    Is there any related issue? If this PR closes them you say say
    fix/closes:
    
    - #xxxx0
    - #xxxx1
    - Fixes #xxxx2
    -->
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fory/issues/new/choose) describing the
    need to do so and update the document if necessary.
    
    Delete section if not applicable.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    
    Delete section if not applicable.
    -->
---
 .../java/org/apache/fory/CrossLanguageTest.java    | 210 +++++++++++++++++
 python/pyfory/_registry.py                         |  20 +-
 python/pyfory/meta/typedef_encoder.py              |   4 +-
 python/pyfory/tests/test_cross_language.py         | 260 +++++++++++++++++++++
 4 files changed, 490 insertions(+), 4 deletions(-)

diff --git 
a/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java 
b/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
index b47b1de89..22ac18d67 100644
--- a/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
+++ b/java/fory-core/src/test/java/org/apache/fory/CrossLanguageTest.java
@@ -579,6 +579,22 @@ public class CrossLanguageTest extends ForyTestBase {
     Assert.assertEquals(fory.deserialize(Files.readAllBytes(dataFile)), obj);
   }
 
+  private void structBackwardCompatibility(Fory fory, Object obj, String 
testName)
+      throws IOException {
+    byte[] serialized = fory.serialize(obj);
+    Assert.assertEquals(fory.deserialize(serialized), obj);
+    Path dataFile = Paths.get(testName);
+    System.out.println(dataFile.toAbsolutePath());
+    Files.deleteIfExists(dataFile);
+    Files.write(dataFile, serialized);
+    dataFile.toFile().deleteOnExit();
+    ImmutableList<String> command =
+        ImmutableList.of(
+            PYTHON_EXECUTABLE, "-m", PYTHON_MODULE, testName, 
dataFile.toAbsolutePath().toString());
+    // Just test that Python can read the data - don't check round-trip
+    Assert.assertTrue(executeCommand(command, 30));
+  }
+
   private static class ComplexObject1Serializer extends 
Serializer<ComplexObject1> {
 
     public ComplexObject1Serializer(Fory fory, Class<ComplexObject1> cls) {
@@ -842,4 +858,198 @@ public class CrossLanguageTest extends ForyTestBase {
     Assert.assertEquals(xserDe(fory, a), a);
     structRoundBack(fory, a, "test_enum_field" + (compatible ? "_compatible" : 
""));
   }
+
+  @Test
+  public void testCrossLanguageMetaShare() throws Exception {
+    Fory fory =
+        Fory.builder()
+            .withLanguage(Language.XLANG)
+            .withRefTracking(true)
+            .withCompatibleMode(CompatibleMode.COMPATIBLE)
+            .requireClassRegistration(false)
+            .build();
+    fory.register(ComplexObject2.class, "test.ComplexObject2");
+
+    ComplexObject2 obj = new ComplexObject2();
+    obj.f1 = true;
+    obj.f2 = new HashMap<>(ImmutableMap.of((byte) -1, 2));
+
+    // Test with meta share enabled
+    byte[] serialized = fory.serialize(obj);
+    Assert.assertEquals(fory.deserialize(serialized), obj);
+
+    structRoundBack(fory, obj, "test_cross_language_meta_share");
+  }
+
+  @Test
+  public void testCrossLanguageMetaShareComplex() throws Exception {
+    Fory fory =
+        Fory.builder()
+            .withLanguage(Language.XLANG)
+            .withRefTracking(true)
+            .withCompatibleMode(CompatibleMode.COMPATIBLE)
+            .requireClassRegistration(false)
+            .build();
+    fory.register(ComplexObject1.class, "test.ComplexObject1");
+    fory.register(ComplexObject2.class, "test.ComplexObject2");
+
+    ComplexObject2 obj2 = new ComplexObject2();
+    obj2.f1 = true;
+    obj2.f2 = ImmutableMap.of((byte) -1, 2);
+
+    ComplexObject1 obj = new ComplexObject1();
+    obj.f1 = obj2;
+    obj.f2 = "meta_share_test";
+    obj.f3 = Arrays.asList("compatible", "mode");
+    obj.f4 = ImmutableMap.of((byte) 1, 2);
+    obj.f5 = Byte.MAX_VALUE;
+    obj.f6 = Short.MAX_VALUE;
+    obj.f7 = Integer.MAX_VALUE;
+    obj.f8 = Long.MAX_VALUE;
+    obj.f9 = 1.0f / 2;
+    obj.f10 = 1 / 3.0;
+    obj.f11 = new short[] {(short) 1, (short) 2};
+    obj.f12 = ImmutableList.of((short) -1, (short) 4);
+
+    // Test with meta share enabled
+    byte[] serialized = fory.serialize(obj);
+    Assert.assertEquals(fory.deserialize(serialized), obj);
+
+    structRoundBack(fory, obj, "test_cross_language_meta_share_complex");
+  }
+
+  // Compatibility test classes - Version 1 (original)
+  @Data
+  public static class CompatTestV1 {
+    String name;
+    Integer age;
+  }
+
+  // Compatibility test classes - Version 2 (with additional field)
+  @Data
+  public static class CompatTestV2 {
+    String name;
+    Integer age;
+    String email; // New field added
+  }
+
+  // Compatibility test classes - Version 3 (with reordered fields)
+  @Data
+  public static class CompatTestV3 {
+    Integer age; // Reordered
+    String name; // Reordered
+    String email;
+    Boolean active; // Another new field
+  }
+
+  @Test
+  public void testSchemaEvolution() throws Exception {
+    // Test simple schema evolution compatibility
+    Fory fory =
+        Fory.builder()
+            .withLanguage(Language.XLANG)
+            .withRefTracking(true)
+            .withCompatibleMode(CompatibleMode.COMPATIBLE)
+            .requireClassRegistration(false)
+            .build();
+
+    fory.register(CompatTestV1.class, "test.CompatTest");
+
+    CompatTestV1 objV1 = new CompatTestV1();
+    objV1.name = "Schema Evolution Test";
+    objV1.age = 42;
+
+    // Serialize with V1 schema
+    Assert.assertEquals(fory.deserialize(fory.serialize(objV1)), objV1);
+
+    structRoundBack(fory, objV1, "test_schema_evolution");
+  }
+
+  @Test
+  public void testBackwardCompatibility() throws Exception {
+    // Test that old version can read new data (ignoring unknown fields)
+    Fory fory =
+        Fory.builder()
+            .withLanguage(Language.XLANG)
+            .withRefTracking(true)
+            .withCompatibleMode(CompatibleMode.COMPATIBLE)
+            .requireClassRegistration(false)
+            .build();
+
+    fory.register(CompatTestV2.class, "test.CompatTest");
+
+    CompatTestV2 objV2 = new CompatTestV2();
+    objV2.name = "Bob";
+    objV2.age = 30;
+    objV2.email = "[email protected]";
+
+    // Serialize with V2 schema
+    Assert.assertEquals(fory.deserialize(fory.serialize(objV2)), objV2);
+
+    // Test: old version (V1) reads new version (V2) data
+    // Expected: V1 should successfully read name and age, ignoring email
+    structBackwardCompatibility(fory, objV2, "test_backward_compatibility");
+  }
+
+  @Test
+  public void testFieldReorderingCompatibility() throws Exception {
+    // Test that field reordering doesn't break compatibility
+    Fory fory =
+        Fory.builder()
+            .withLanguage(Language.XLANG)
+            .withRefTracking(true)
+            .withCompatibleMode(CompatibleMode.COMPATIBLE)
+            .requireClassRegistration(false)
+            .build();
+
+    fory.register(CompatTestV3.class, "test.CompatTest");
+
+    CompatTestV3 objV3 = new CompatTestV3();
+    objV3.name = "Charlie";
+    objV3.age = 35;
+    objV3.email = "[email protected]";
+    objV3.active = true;
+
+    // Serialize with V3 schema (reordered fields)
+    Assert.assertEquals(fory.deserialize(fory.serialize(objV3)), objV3);
+
+    structRoundBack(fory, objV3, "test_field_reordering_compatibility");
+  }
+
+  @Data
+  public static class CompatContainer {
+    CompatTestV1 oldObject;
+    CompatTestV2 newObject;
+  }
+
+  @Test
+  public void testCrossVersionCompatibility() throws Exception {
+    // Test mixed version compatibility in one test
+    Fory fory =
+        Fory.builder()
+            .withLanguage(Language.XLANG)
+            .withRefTracking(true)
+            .withCompatibleMode(CompatibleMode.COMPATIBLE)
+            .requireClassRegistration(false)
+            .build();
+
+    fory.register(CompatContainer.class, "test.CompatContainer");
+    fory.register(CompatTestV1.class, "test.CompatTestV1");
+    fory.register(CompatTestV2.class, "test.CompatTestV2");
+
+    CompatTestV1 v1 = new CompatTestV1();
+    v1.name = "Old Format";
+    v1.age = 20;
+
+    CompatTestV2 v2 = new CompatTestV2();
+    v2.name = "New Format";
+    v2.age = 25;
+    v2.email = "[email protected]";
+
+    CompatContainer container = new CompatContainer();
+    container.oldObject = v1;
+    container.newObject = v2;
+
+    structRoundBack(fory, container, "test_cross_version_compatibility");
+  }
 }
diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py
index da42f43c2..984a6a528 100644
--- a/python/pyfory/_registry.py
+++ b/python/pyfory/_registry.py
@@ -378,6 +378,7 @@ class TypeResolver:
                     type_id = TypeId.NAMED_STRUCT if type_id is None else 
((type_id << 8) + TypeId.STRUCT)
         elif not internal:
             type_id = TypeId.NAMED_EXT if type_id is None else ((type_id << 8) 
+ TypeId.EXT)
+
         return self.__register_type(
             cls,
             type_id=type_id,
@@ -420,8 +421,17 @@ class TypeResolver:
         internal: bool = False,
     ):
         dynamic_type = type_id is not None and type_id < 0
-        if not internal and serializer is None:
+        # In metashare mode, for struct types, we want to keep serializer=None
+        # so that _set_typeinfo will be called to create the TypeDef-based 
serializer
+        should_create_serializer = (
+            not internal
+            and serializer is None
+            and not (self.meta_share and typename is not None and type_id is 
not None and is_struct_type(type_id & 0xFF))
+        )
+
+        if should_create_serializer:
             serializer = self._create_serializer(cls)
+
         if typename is None:
             typeinfo = TypeInfo(cls, type_id, serializer, None, None, 
dynamic_type)
         else:
@@ -514,8 +524,12 @@ class TypeResolver:
         if is_struct_type(type_id):
             if self.meta_share:
                 type_def = encode_typedef(self, typeinfo.cls)
-                typeinfo.serializer = type_def.create_serializer(self)
-                typeinfo.type_def = type_def
+                if type_def is not None:
+                    typeinfo.serializer = type_def.create_serializer(self)
+                    typeinfo.type_def = type_def
+                else:
+                    # Fallback to regular serializer
+                    typeinfo.serializer = DataClassSerializer(self.fory, 
typeinfo.cls, xlang=not self.fory.is_py)
             else:
                 typeinfo.serializer = DataClassSerializer(self.fory, 
typeinfo.cls, xlang=not self.fory.is_py)
         else:
diff --git a/python/pyfory/meta/typedef_encoder.py 
b/python/pyfory/meta/typedef_encoder.py
index e3d19d22d..29b5cc843 100644
--- a/python/pyfory/meta/typedef_encoder.py
+++ b/python/pyfory/meta/typedef_encoder.py
@@ -111,7 +111,9 @@ def encode_typedef(type_resolver, cls):
         if len(splits) == 1:
             splits.insert(0, "")
         namespace, typename = splits
-    return TypeDef(namespace, typename, cls, type_id, field_infos, binary, 
is_compressed)
+
+    result = TypeDef(namespace, typename, cls, type_id, field_infos, binary, 
is_compressed)
+    return result
 
 
 def prepend_header(buffer: bytes, is_compressed: bool, has_fields_meta: bool):
diff --git a/python/pyfory/tests/test_cross_language.py 
b/python/pyfory/tests/test_cross_language.py
index 673fd47da..3c757a576 100644
--- a/python/pyfory/tests/test_cross_language.py
+++ b/python/pyfory/tests/test_cross_language.py
@@ -677,6 +677,266 @@ def test_oob_buffer(in_band_file_path, 
out_of_band_file_path):
         f.write(out_of_band_buffer.to_bytes(0, 
out_of_band_buffer.writer_index))
 
 
+@cross_language_test
+def test_cross_language_meta_share(data_file_path):
+    """Test cross-language meta sharing with ComplexObject2."""
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, compatible=True, 
ref_tracking=True)
+
+    @dataclass
+    class ComplexObject2:
+        f1: Any
+        f2: Dict[pyfory.Int8Type, pyfory.Int32Type]
+
+    fory.register_type(ComplexObject2, namespace="test", 
typename="ComplexObject2")
+
+    with open(data_file_path, "rb") as f:
+        data_bytes = f.read()
+
+    debug_print(f"Reading data of length {len(data_bytes)} from 
{data_file_path}")
+
+    # Deserialize Java-generated data with meta share
+    obj = fory.deserialize(data_bytes)
+    debug_print(f"Deserialized object: {obj}")
+
+    # Verify the object structure
+    assert obj.f1 is True  # Boolean value
+    assert isinstance(obj.f2, dict)
+    assert obj.f2[-1] == 2  # Dict[Int8, Int32] with key=-1, value=2
+
+    # Serialize back with meta share
+    new_serialized = fory.serialize(obj)
+    debug_print(f"Re-serialized data length: {len(new_serialized)}")
+
+    # Verify round-trip
+    round_trip_obj = fory.deserialize(new_serialized)
+    assert round_trip_obj == obj
+
+    # Write back for Java to verify
+    with open(data_file_path, "wb") as f:
+        f.write(new_serialized)
+
+
+@cross_language_test
+def test_cross_language_meta_share_complex(data_file_path):
+    """Test cross-language meta sharing with complex nested objects."""
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, compatible=True, 
ref_tracking=True)
+
+    @dataclass
+    class ComplexObject2:
+        f1: Any
+        f2: Dict[pyfory.Int8Type, pyfory.Int32Type]
+
+    @dataclass
+    class ComplexObject1:
+        f1: Any
+        f2: str
+        f3: List[str]
+        f4: Dict[pyfory.Int8Type, pyfory.Int32Type]
+        f5: pyfory.Int8Type
+        f6: pyfory.Int16Type
+        f7: pyfory.Int32Type
+        f8: pyfory.Int64Type
+        f9: pyfory.Float32Type
+        f10: pyfory.Float64Type
+        f11: pyfory.Int16ArrayType
+        f12: List[pyfory.Int16Type]
+
+    fory.register_type(ComplexObject1, namespace="test", 
typename="ComplexObject1")
+    fory.register_type(ComplexObject2, namespace="test", 
typename="ComplexObject2")
+
+    with open(data_file_path, "rb") as f:
+        data_bytes = f.read()
+
+    debug_print(f"Reading complex data of length {len(data_bytes)} from 
{data_file_path}")
+
+    # Deserialize Java-generated complex object with meta share
+    obj = fory.deserialize(data_bytes)
+    debug_print(f"Deserialized complex object: {obj}")
+
+    # Verify the nested object structure
+    assert hasattr(obj, "f1") and hasattr(obj.f1, "f1") and hasattr(obj.f1, 
"f2")
+    assert obj.f1.f1 is True
+    assert isinstance(obj.f1.f2, dict)
+    assert obj.f2 == "meta_share_test"
+    assert obj.f3 == ["compatible", "mode"]
+
+    # Serialize back with meta share
+    new_serialized = fory.serialize(obj)
+    debug_print(f"Re-serialized complex data length: {len(new_serialized)}")
+
+    # Verify round-trip
+    round_trip_obj = fory.deserialize(new_serialized)
+    assert round_trip_obj == obj
+
+    # Write back for Java to verify
+    with open(data_file_path, "wb") as f:
+        f.write(new_serialized)
+
+
+@cross_language_test
+def test_schema_evolution(data_file_path):
+    """Test schema evolution compatibility."""
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, compatible=True, 
ref_tracking=True)
+
+    # Same V1 class reading V1 data - should work perfectly
+    @dataclass
+    class CompatTestV1:
+        name: str
+        age: pyfory.Int32Type  # Use specific fory type to match Java Integer
+
+    fory.register_type(CompatTestV1, namespace="test", typename="CompatTest")
+
+    with open(data_file_path, "rb") as f:
+        data_bytes = f.read()
+
+    debug_print(f"Reading schema evolution data of length {len(data_bytes)} 
from {data_file_path}")
+
+    # Deserialize V1 data into V1 object
+    obj = fory.deserialize(data_bytes)
+    debug_print(f"Deserialized V1 object: {obj}")
+
+    # Verify the object structure
+    assert obj.name == "Schema Evolution Test"
+    assert obj.age == 42
+
+    # Test round trip
+    round_trip_obj = fory.deserialize(fory.serialize(obj))
+    assert round_trip_obj == obj
+
+    # Serialize back
+    new_serialized = fory.serialize(obj)
+    debug_print(f"Re-serialized data length: {len(new_serialized)}")
+
+    # Write back for Java to verify
+    with open(data_file_path, "wb") as f:
+        f.write(new_serialized)
+
+
+@cross_language_test
+def test_backward_compatibility(data_file_path):
+    """Test backward compatibility - old version reading new data."""
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, compatible=True, 
ref_tracking=True)
+
+    # Version 1 class (original) reading Version 2 data (should ignore unknown 
fields)
+    @dataclass
+    class CompatTestV1:
+        name: str
+        age: pyfory.Int32Type
+
+    fory.register_type(CompatTestV1, namespace="test", typename="CompatTest")
+
+    with open(data_file_path, "rb") as f:
+        data_bytes = f.read()
+
+    debug_print(f"Reading V2 data of length {len(data_bytes)} from 
{data_file_path}")
+
+    # Deserialize V2 data into V1 object (ignoring email field)
+    obj = fory.deserialize(data_bytes)
+    debug_print(f"Deserialized V1 object from V2 data: {obj}")
+
+    # Verify the object structure - only V1 fields should be present
+    assert obj.name == "Bob"
+    assert obj.age == 30
+    # email field should be ignored/not present
+
+    # Serialize back with V1 schema
+    new_serialized = fory.serialize(obj)
+    debug_print(f"Re-serialized V1 data length: {len(new_serialized)}")
+
+    # Write back for Java to verify
+    with open(data_file_path, "wb") as f:
+        f.write(new_serialized)
+
+
+@cross_language_test
+def test_field_reordering_compatibility(data_file_path):
+    """Test field reordering compatibility in metashare mode."""
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, ref_tracking=True, 
compatible=True)
+
+    # Version 3 class with reordered fields matching Java CompatTestV3
+    @dataclass
+    class CompatTestV3:
+        age: pyfory.Int32Type  # Reordered (was second in V1)
+        name: str  # Reordered (was first in V1)
+        email: str
+        active: bool  # New field
+
+    fory.register_type(CompatTestV3, namespace="test", typename="CompatTest")
+
+    with open(data_file_path, "rb") as f:
+        data_bytes = f.read()
+
+    debug_print(f"Reading V3 reordered data of length {len(data_bytes)} from 
{data_file_path}")
+
+    # Deserialize V3 data into V3 object
+    obj = fory.deserialize(data_bytes)
+    debug_print(f"Deserialized V3 object from V3 reordered data: {obj}")
+
+    # Verify the object structure
+    assert obj.age == 35
+    assert obj.name == "Charlie"
+    assert obj.email == "[email protected]"
+    assert obj.active
+
+    # Serialize back with V3 schema
+    new_serialized = fory.serialize(obj)
+    debug_print(f"Re-serialized V3 data length: {len(new_serialized)}")
+
+    # Write back for Java to verify
+    with open(data_file_path, "wb") as f:
+        f.write(new_serialized)
+
+
+@cross_language_test
+def test_cross_version_compatibility(data_file_path):
+    """Test mixed version compatibility."""
+    fory = pyfory.Fory(language=pyfory.Language.XLANG, compatible=True, 
ref_tracking=True)
+
+    @dataclass
+    class CompatTestV1:
+        name: str
+        age: pyfory.Int32Type
+
+    @dataclass
+    class CompatTestV2:
+        name: str
+        age: pyfory.Int32Type
+        email: str = "[email protected]"
+
+    @dataclass
+    class CompatContainer:
+        oldObject: CompatTestV1
+        newObject: CompatTestV2
+
+    fory.register_type(CompatContainer, namespace="test", 
typename="CompatContainer")
+    fory.register_type(CompatTestV1, namespace="test", typename="CompatTestV1")
+    fory.register_type(CompatTestV2, namespace="test", typename="CompatTestV2")
+
+    with open(data_file_path, "rb") as f:
+        data_bytes = f.read()
+
+    debug_print(f"Reading mixed version data of length {len(data_bytes)} from 
{data_file_path}")
+
+    # Deserialize mixed version container
+    obj = fory.deserialize(data_bytes)
+    debug_print(f"Deserialized mixed version container: {obj}")
+
+    # Verify the nested objects
+    assert obj.oldObject.name == "Old Format"
+    assert obj.oldObject.age == 20
+    assert obj.newObject.name == "New Format"
+    assert obj.newObject.age == 25
+    assert obj.newObject.email == "[email protected]"
+
+    # Serialize back
+    new_serialized = fory.serialize(obj)
+    debug_print(f"Re-serialized mixed version data length: 
{len(new_serialized)}")
+
+    # Write back for Java to verify
+    with open(data_file_path, "wb") as f:
+        f.write(new_serialized)
+
+
 if __name__ == "__main__":
     """This file will be executed by CrossLanguageTest.java in 
fory-core/fory-format module and
     fory_xlang_test.go in go/fory module


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to