This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 3f16c30c fix(python): correct compute_string_hash usage in
StructHashVisitor.visit_customized (#2335)
3f16c30c is described below
commit 3f16c30c8c5f45693e772af4ccad53005b1837a2
Author: lzaeh <[email protected]>
AuthorDate: Fri Jun 13 17:33:29 2025 +0800
fix(python): correct compute_string_hash usage in
StructHashVisitor.visit_customized (#2335)
<!--
**Thanks for contributing to Fory.**
**If this is your first time opening a PR on fory, you can refer to
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).**
Contribution Checklist
- The **Apache Fory (incubating)** community has restrictions on the
naming of pr titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).
- Fory has a strong focus on performance. If the PR you submit will have
an impact on performance, please benchmark it first and provide the
benchmark result here.
-->
## What does this PR do?
Fixed the incorrect arguments passed to compute_string_hash in
StructHashVisitor.visit_customized when calculating the hash value.
- Aligned this change with the Java implementation after consulting the
project founder.
- Exposed Cython’s TypeResolver in Python so its internal _resolver
could provide the namespace and typename decoders.
- Marked the _resolver field as readonly to allow external read access
without permitting modification.
- Added the issue author’s example to the test suite for validation.
- Noted in the test file comments that objects with self-references
cannot be compared using ==; instead, comparison of serialized data is
recommended.
<!-- Describe the purpose of this PR. -->
## Related issues
close #2332
<!--
Is there any related issue? Please attach here.
- #xxxx0
- #xxxx1
- #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fory/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
---
python/pyfory/_registry.py | 13 ++++++++++++-
python/pyfory/_serialization.pyx | 15 +++++++++++++++
python/pyfory/_struct.py | 4 +++-
python/pyfory/tests/test_cross_language.py | 29 +++++++++++++++++++++++++++++
4 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py
index 6c3790c8..e19010be 100644
--- a/python/pyfory/_registry.py
+++ b/python/pyfory/_registry.py
@@ -84,7 +84,8 @@ except ImportError:
np = None
logger = logging.getLogger(__name__)
-
+namespace_decoder = MetaStringDecoder(".", "_")
+typename_decoder = MetaStringDecoder("$", "_")
if ENABLE_FORY_CYTHON_SERIALIZATION:
from pyfory._serialization import TypeInfo
@@ -122,6 +123,16 @@ else:
f"serializer={self.serializer})"
)
+ def decode_namespace(self) -> str:
+ if self.namespace_bytes is None:
+ return ""
+ return self.namespace_bytes.decode(namespace_decoder)
+
+ def decode_typename(self) -> str:
+ if self.typename_bytes is None:
+ return ""
+ return self.typename_bytes.decode(typename_decoder)
+
class TypeResolver:
__slots__ = (
diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx
index f635dcc9..be6f7ada 100644
--- a/python/pyfory/_serialization.pyx
+++ b/python/pyfory/_serialization.pyx
@@ -52,6 +52,7 @@ from libcpp.utility cimport pair
from cython.operator cimport dereference as deref
from pyfory._util cimport Buffer
from pyfory.includes.libabsl cimport flat_hash_map
+from pyfory.meta.metastring import MetaStringDecoder
try:
import numpy as np
@@ -83,6 +84,10 @@ cdef int8_t REF_FLAG = -2
cdef int8_t NOT_NULL_VALUE_FLAG = -1
# this flag indicates that the object is a referencable and first read.
cdef int8_t REF_VALUE_FLAG = 0
+# Global MetaString decoder for namespace bytes to str
+namespace_decoder = MetaStringDecoder(".", "_")
+# Global MetaString decoder for typename bytes to str
+typename_decoder = MetaStringDecoder("$", "_")
@cython.final
@@ -415,6 +420,16 @@ cdef class TypeInfo:
return f"TypeInfo(cls={self.cls}, type_id={self.type_id}, " \
f"serializer={self.serializer})"
+ cpdef str decode_namespace(self):
+ if self.namespace_bytes is None:
+ return ""
+ return self.namespace_bytes.decode(namespace_decoder)
+
+ cpdef str decode_typename(self):
+ if self.typename_bytes is None:
+ return ""
+ return self.typename_bytes.decode(typename_decoder)
+
@cython.final
cdef class TypeResolver:
diff --git a/python/pyfory/_struct.py b/python/pyfory/_struct.py
index e71be2ab..92e33b53 100644
--- a/python/pyfory/_struct.py
+++ b/python/pyfory/_struct.py
@@ -259,7 +259,9 @@ class StructHashVisitor(TypeVisitor):
if typeinfo is not None:
hash_value = typeinfo.type_id
if TypeId.is_namespaced_type(typeinfo.type_id):
- hash_value = compute_string_hash(typeinfo.namespace +
typeinfo.typename)
+ namespace_str = typeinfo.decode_namespace()
+ typename_str = typeinfo.decode_typename()
+ hash_value = compute_string_hash(namespace_str + typename_str)
self._hash = self._compute_field_hash(self._hash, hash_value)
def visit_other(self, field_name, type_, types_path=None):
diff --git a/python/pyfory/tests/test_cross_language.py
b/python/pyfory/tests/test_cross_language.py
index f04627ce..d5a066c6 100644
--- a/python/pyfory/tests/test_cross_language.py
+++ b/python/pyfory/tests/test_cross_language.py
@@ -466,6 +466,35 @@ def test_serialize_simple_struct(data_file_path):
struct_round_back(data_file_path, fory, obj)
+class SomeClass:
+ f1: "SomeClass"
+ f2: Dict[str, str]
+ f3: Dict[str, str]
+
+
+def test_custom_class_roundtrip():
+ fory = pyfory.Fory(ref_tracking=True)
+ fory.register_type(SomeClass, typename="example.SomeClass")
+ obj1 = SomeClass()
+ obj1.f2 = {"k1": "v1", "k2": "v2"}
+ obj1.f1, obj1.f3 = obj1, obj1.f2
+ data1 = fory.serialize(obj1)
+ obj2 = fory.deserialize(data1)
+ data2 = fory.serialize(obj2)
+ assert data1 == data2
+ # bytes can be data serialized by other languages.
+ # due to the self-referential nature of this object,
+ # direct `==` comparison will fail.
+ # 1. Serialize `obj1` to `data1`
+ # 2. Deserialize `data1` to `obj2`
+ # 3. Serialize `obj2` to `data2`
+ # If `data1 == data2`, the round-trip preserves value equivalence.
+ # print(data1)
+ # print(data2)
+ # print(obj1)
+ # print(obj2)
+
+
class EnumTestClass(enum.Enum):
FOO = 0
BAR = 1
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]