This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git


The following commit(s) were added to refs/heads/main by this push:
     new b01b7b4b perf(python): Enhance the Python benchmark script (#1926)
b01b7b4b is described below

commit b01b7b4b99cf631c2008bc5e5f494304e4475ef0
Author: penguin_wwy <[email protected]>
AuthorDate: Tue Nov 5 19:13:45 2024 +0800

    perf(python): Enhance the Python benchmark script (#1926)
    
    ## What does this PR do?
    
    Enhance the Python benchmark script to support more parameter options
    
    ## Related issues
    
    ## Does this PR introduce any user-facing change?
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
---
 integration_tests/cpython_benchmark/README.md      | 10 +++
 .../cpython_benchmark/fury_benchmark.py            | 82 ++++++++++++++++++++--
 python/pyfury/_fury.py                             |  2 +-
 3 files changed, 86 insertions(+), 8 deletions(-)

diff --git a/integration_tests/cpython_benchmark/README.md 
b/integration_tests/cpython_benchmark/README.md
index 3d482816..1fb37f08 100644
--- a/integration_tests/cpython_benchmark/README.md
+++ b/integration_tests/cpython_benchmark/README.md
@@ -18,6 +18,16 @@ Step 3: Execute the benchmark script
 python fury_benchmark.py
 ```
 
+### fury options
+
+`--xlang` specify using cross-language mode, otherwise choose python mode
+
+`--no-ref` specify ref tracking is true
+
+`--disable-cython` disable cython serialization
+
+### pyperf options
+
 `--affinity CPU_LIST` specify CPU affinity for worker processes
 
 `-o FILENAME, --output FILENAME` write results encoded to JSON into FILENAME
diff --git a/integration_tests/cpython_benchmark/fury_benchmark.py 
b/integration_tests/cpython_benchmark/fury_benchmark.py
index fb8bec59..e83f64e6 100644
--- a/integration_tests/cpython_benchmark/fury_benchmark.py
+++ b/integration_tests/cpython_benchmark/fury_benchmark.py
@@ -15,10 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import argparse
+import array
+from dataclasses import dataclass
 import datetime
+import os
 import random
 import sys
-from pyfury import Fury, Language
+from typing import Any, Dict, List
+import pyfury
 import pyperf
 
 
@@ -103,18 +108,81 @@ random_source = random.Random(5)
 DICT_GROUP = [mutate_dict(DICT, random_source) for _ in range(3)]
 
 
-def fury_python_object(obj):
-    fury = Fury(language=Language.PYTHON, ref_tracking=True)
+@dataclass
+class ComplexObject1:
+    f1: Any = None
+    f2: str = None
+    f3: List[str] = None
+    f4: Dict[pyfury.Int8Type, pyfury.Int32Type] = None
+    f5: pyfury.Int8Type = None
+    f6: pyfury.Int16Type = None
+    f7: pyfury.Int32Type = None
+    f8: pyfury.Int64Type = None
+    f9: pyfury.Float32Type = None
+    f10: pyfury.Float64Type = None
+    f11: pyfury.Int16ArrayType = None
+    f12: List[pyfury.Int16Type] = None
+
+
+@dataclass
+class ComplexObject2:
+    f1: Any
+    f2: Dict[pyfury.Int8Type, pyfury.Int32Type]
+
+
+COMPLEX_OBJECT = ComplexObject1(
+    f1=ComplexObject2(f1=True, f2={-1: 2}),
+    f2="abc",
+    f3=["abc", "abc"],
+    f4={1: 2},
+    f5=2**7 - 1,
+    f6=2**15 - 1,
+    f7=2**31 - 1,
+    f8=2**63 - 1,
+    f9=1.0 / 2,
+    f10=1 / 3.0,
+    f11=array.array("h", [1, 2]),
+    f12=[-1, 4],
+)
+
+
+def fury_object(language, ref_tracking, obj):
+    fury = pyfury.Fury(language=language, ref_tracking=ref_tracking)
     binary = fury.serialize(obj)
     fury.deserialize(binary)
 
 
+def benchmark_args():
+    parser = argparse.ArgumentParser(description="Fury Benchmark")
+    parser.add_argument("--xlang", action="store_true", default=False)
+    parser.add_argument("--no-ref", action="store_true", default=False)
+    parser.add_argument("--disable-cython", action="store_true", default=False)
+
+    if "--help" in sys.argv:
+        parser.print_help()
+        return None
+    args, unknown_args = parser.parse_known_args()
+    sys.argv = sys.argv[:1] + unknown_args
+    return args
+
+
 def micro_benchmark():
+    args = benchmark_args()
     runner = pyperf.Runner()
-    runner.bench_func("fury_dict", fury_python_object, DICT)
-    runner.bench_func("fury_dict_group", fury_python_object, DICT_GROUP)
-    runner.bench_func("fury_tuple", fury_python_object, TUPLE)
-    runner.bench_func("fury_list", fury_python_object, LIST)
+    if args.disable_cython:
+        os.environ["ENABLE_FURY_CYTHON_SERIALIZATION"] = "0"
+        sys.argv += ["--inherit-environ", "ENABLE_FURY_CYTHON_SERIALIZATION"]
+    runner.parse_args()
+    language = pyfury.Language.XLANG if args.xlang else pyfury.Language.PYTHON
+    runner.bench_func("fury_dict", fury_object, language, not args.no_ref, 
DICT)
+    runner.bench_func(
+        "fury_dict_group", fury_object, language, not args.no_ref, DICT_GROUP
+    )
+    runner.bench_func("fury_tuple", fury_object, language, not args.no_ref, 
TUPLE)
+    runner.bench_func("fury_list", fury_object, language, not args.no_ref, 
LIST)
+    runner.bench_func(
+        "fury_complex", fury_object, language, not args.no_ref, COMPLEX_OBJECT
+    )
 
 
 if __name__ == "__main__":
diff --git a/python/pyfury/_fury.py b/python/pyfury/_fury.py
index 19a624ed..13044864 100644
--- a/python/pyfury/_fury.py
+++ b/python/pyfury/_fury.py
@@ -106,7 +106,7 @@ class MetaStringBytes:
         self.data = data
         self.length = len(data)
         if hashcode is None:
-            hashcode = mmh3.hash_buffer(data, 47)[0] & 0xFFFFFFFFFFFFFF00
+            hashcode = (mmh3.hash_buffer(data, 47)[0] >> 8) << 8
         self.hashcode = hashcode
         self.dynamic_write_string_id = DEFAULT_DYNAMIC_WRITE_STRING_ID
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to