This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 3f16c30c fix(python): correct compute_string_hash usage in 
StructHashVisitor.visit_customized (#2335)
3f16c30c is described below

commit 3f16c30c8c5f45693e772af4ccad53005b1837a2
Author: lzaeh <[email protected]>
AuthorDate: Fri Jun 13 17:33:29 2025 +0800

    fix(python): correct compute_string_hash usage in 
StructHashVisitor.visit_customized (#2335)
    
    <!--
    **Thanks for contributing to Fory.**
    
    **If this is your first time opening a PR on fory, you can refer to
    
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).**
    
    Contribution Checklist
    
    - The **Apache Fory (incubating)** community has restrictions on the
    naming of pr titles. You can also find instructions in
    [CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).
    
    - Fory has a strong focus on performance. If the PR you submit will have
    an impact on performance, please benchmark it first and provide the
    benchmark result here.
    -->
    
    ## What does this PR do?
    
    Fixed the incorrect arguments passed to compute_string_hash in
    StructHashVisitor.visit_customized when calculating the hash value.
    - Aligned this change with the Java implementation after consulting the
    project founder.
    - Exposed Cython’s TypeResolver in Python so its internal _resolver
    could provide the namespace and typename decoders.
    - Marked the _resolver field as readonly to allow external read access
    without permitting modification.
    - Added the issue author’s example to the test suite for validation.
    - Noted in the test file comments that objects with self-references
    cannot be compared using ==; instead, comparison of serialized data is
    recommended.
    
    <!-- Describe the purpose of this PR. -->
    
    ## Related issues
    close #2332
    <!--
    Is there any related issue? Please attach here.
    
    - #xxxx0
    - #xxxx1
    - #xxxx2
    -->
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fory/issues/new/choose) describing the
    need to do so and update the document if necessary.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    -->
---
 python/pyfory/_registry.py                 | 13 ++++++++++++-
 python/pyfory/_serialization.pyx           | 15 +++++++++++++++
 python/pyfory/_struct.py                   |  4 +++-
 python/pyfory/tests/test_cross_language.py | 29 +++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py
index 6c3790c8..e19010be 100644
--- a/python/pyfory/_registry.py
+++ b/python/pyfory/_registry.py
@@ -84,7 +84,8 @@ except ImportError:
     np = None
 
 logger = logging.getLogger(__name__)
-
+namespace_decoder = MetaStringDecoder(".", "_")
+typename_decoder = MetaStringDecoder("$", "_")
 
 if ENABLE_FORY_CYTHON_SERIALIZATION:
     from pyfory._serialization import TypeInfo
@@ -122,6 +123,16 @@ else:
                 f"serializer={self.serializer})"
             )
 
+        def decode_namespace(self) -> str:
+            if self.namespace_bytes is None:
+                return ""
+            return self.namespace_bytes.decode(namespace_decoder)
+
+        def decode_typename(self) -> str:
+            if self.typename_bytes is None:
+                return ""
+            return self.typename_bytes.decode(typename_decoder)
+
 
 class TypeResolver:
     __slots__ = (
diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx
index f635dcc9..be6f7ada 100644
--- a/python/pyfory/_serialization.pyx
+++ b/python/pyfory/_serialization.pyx
@@ -52,6 +52,7 @@ from libcpp.utility cimport pair
 from cython.operator cimport dereference as deref
 from pyfory._util cimport Buffer
 from pyfory.includes.libabsl cimport flat_hash_map
+from pyfory.meta.metastring import MetaStringDecoder
 
 try:
     import numpy as np
@@ -83,6 +84,10 @@ cdef int8_t REF_FLAG = -2
 cdef int8_t NOT_NULL_VALUE_FLAG = -1
 # this flag indicates that the object is a referencable and first read.
 cdef int8_t REF_VALUE_FLAG = 0
+# Global MetaString decoder for namespace bytes to str
+namespace_decoder = MetaStringDecoder(".", "_")
+# Global MetaString decoder for typename bytes to str
+typename_decoder = MetaStringDecoder("$", "_")
 
 
 @cython.final
@@ -415,6 +420,16 @@ cdef class TypeInfo:
         return f"TypeInfo(cls={self.cls}, type_id={self.type_id}, " \
                f"serializer={self.serializer})"
 
+    cpdef str decode_namespace(self):
+        if self.namespace_bytes is None:
+            return ""
+        return self.namespace_bytes.decode(namespace_decoder)
+
+    cpdef str decode_typename(self):
+        if self.typename_bytes is None:
+            return ""
+        return self.typename_bytes.decode(typename_decoder)
+
 
 @cython.final
 cdef class TypeResolver:
diff --git a/python/pyfory/_struct.py b/python/pyfory/_struct.py
index e71be2ab..92e33b53 100644
--- a/python/pyfory/_struct.py
+++ b/python/pyfory/_struct.py
@@ -259,7 +259,9 @@ class StructHashVisitor(TypeVisitor):
         if typeinfo is not None:
             hash_value = typeinfo.type_id
             if TypeId.is_namespaced_type(typeinfo.type_id):
-                hash_value = compute_string_hash(typeinfo.namespace + 
typeinfo.typename)
+                namespace_str = typeinfo.decode_namespace()
+                typename_str = typeinfo.decode_typename()
+                hash_value = compute_string_hash(namespace_str + typename_str)
         self._hash = self._compute_field_hash(self._hash, hash_value)
 
     def visit_other(self, field_name, type_, types_path=None):
diff --git a/python/pyfory/tests/test_cross_language.py 
b/python/pyfory/tests/test_cross_language.py
index f04627ce..d5a066c6 100644
--- a/python/pyfory/tests/test_cross_language.py
+++ b/python/pyfory/tests/test_cross_language.py
@@ -466,6 +466,35 @@ def test_serialize_simple_struct(data_file_path):
     struct_round_back(data_file_path, fory, obj)
 
 
+class SomeClass:
+    f1: "SomeClass"
+    f2: Dict[str, str]
+    f3: Dict[str, str]
+
+
+def test_custom_class_roundtrip():
+    fory = pyfory.Fory(ref_tracking=True)
+    fory.register_type(SomeClass, typename="example.SomeClass")
+    obj1 = SomeClass()
+    obj1.f2 = {"k1": "v1", "k2": "v2"}
+    obj1.f1, obj1.f3 = obj1, obj1.f2
+    data1 = fory.serialize(obj1)
+    obj2 = fory.deserialize(data1)
+    data2 = fory.serialize(obj2)
+    assert data1 == data2
+    # bytes can be data serialized by other languages.
+    # due to the self-referential nature of this object,
+    # direct `==` comparison will fail.
+    # 1. Serialize `obj1` to `data1`
+    # 2. Deserialize `data1` to `obj2`
+    # 3. Serialize `obj2` to `data2`
+    # If `data1 == data2`, the round-trip preserves value equivalence.
+    # print(data1)
+    # print(data2)
+    # print(obj1)
+    # print(obj2)
+
+
 class EnumTestClass(enum.Enum):
     FOO = 0
     BAR = 1


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to