This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new b01b7b4b perf(python): Enhance the Python benchmark script (#1926)
b01b7b4b is described below
commit b01b7b4b99cf631c2008bc5e5f494304e4475ef0
Author: penguin_wwy <[email protected]>
AuthorDate: Tue Nov 5 19:13:45 2024 +0800
perf(python): Enhance the Python benchmark script (#1926)
## What does this PR do?
Enhance the Python benchmark script to support more parameter options
## Related issues
## Does this PR introduce any user-facing change?
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
---
integration_tests/cpython_benchmark/README.md | 10 +++
.../cpython_benchmark/fury_benchmark.py | 82 ++++++++++++++++++++--
python/pyfury/_fury.py | 2 +-
3 files changed, 86 insertions(+), 8 deletions(-)
diff --git a/integration_tests/cpython_benchmark/README.md
b/integration_tests/cpython_benchmark/README.md
index 3d482816..1fb37f08 100644
--- a/integration_tests/cpython_benchmark/README.md
+++ b/integration_tests/cpython_benchmark/README.md
@@ -18,6 +18,16 @@ Step 3: Execute the benchmark script
python fury_benchmark.py
```
+### fury options
+
+`--xlang` specify using cross-language mode, otherwise choose python mode
+
+`--no-ref` specify ref tracking is true
+
+`--disable-cython` disable cython serialization
+
+### pyperf options
+
`--affinity CPU_LIST` specify CPU affinity for worker processes
`-o FILENAME, --output FILENAME` write results encoded to JSON into FILENAME
diff --git a/integration_tests/cpython_benchmark/fury_benchmark.py
b/integration_tests/cpython_benchmark/fury_benchmark.py
index fb8bec59..e83f64e6 100644
--- a/integration_tests/cpython_benchmark/fury_benchmark.py
+++ b/integration_tests/cpython_benchmark/fury_benchmark.py
@@ -15,10 +15,15 @@
# specific language governing permissions and limitations
# under the License.
+import argparse
+import array
+from dataclasses import dataclass
import datetime
+import os
import random
import sys
-from pyfury import Fury, Language
+from typing import Any, Dict, List
+import pyfury
import pyperf
@@ -103,18 +108,81 @@ random_source = random.Random(5)
DICT_GROUP = [mutate_dict(DICT, random_source) for _ in range(3)]
-def fury_python_object(obj):
- fury = Fury(language=Language.PYTHON, ref_tracking=True)
+@dataclass
+class ComplexObject1:
+ f1: Any = None
+ f2: str = None
+ f3: List[str] = None
+ f4: Dict[pyfury.Int8Type, pyfury.Int32Type] = None
+ f5: pyfury.Int8Type = None
+ f6: pyfury.Int16Type = None
+ f7: pyfury.Int32Type = None
+ f8: pyfury.Int64Type = None
+ f9: pyfury.Float32Type = None
+ f10: pyfury.Float64Type = None
+ f11: pyfury.Int16ArrayType = None
+ f12: List[pyfury.Int16Type] = None
+
+
+@dataclass
+class ComplexObject2:
+ f1: Any
+ f2: Dict[pyfury.Int8Type, pyfury.Int32Type]
+
+
+COMPLEX_OBJECT = ComplexObject1(
+ f1=ComplexObject2(f1=True, f2={-1: 2}),
+ f2="abc",
+ f3=["abc", "abc"],
+ f4={1: 2},
+ f5=2**7 - 1,
+ f6=2**15 - 1,
+ f7=2**31 - 1,
+ f8=2**63 - 1,
+ f9=1.0 / 2,
+ f10=1 / 3.0,
+ f11=array.array("h", [1, 2]),
+ f12=[-1, 4],
+)
+
+
+def fury_object(language, ref_tracking, obj):
+ fury = pyfury.Fury(language=language, ref_tracking=ref_tracking)
binary = fury.serialize(obj)
fury.deserialize(binary)
+def benchmark_args():
+ parser = argparse.ArgumentParser(description="Fury Benchmark")
+ parser.add_argument("--xlang", action="store_true", default=False)
+ parser.add_argument("--no-ref", action="store_true", default=False)
+ parser.add_argument("--disable-cython", action="store_true", default=False)
+
+ if "--help" in sys.argv:
+ parser.print_help()
+ return None
+ args, unknown_args = parser.parse_known_args()
+ sys.argv = sys.argv[:1] + unknown_args
+ return args
+
+
def micro_benchmark():
+ args = benchmark_args()
runner = pyperf.Runner()
- runner.bench_func("fury_dict", fury_python_object, DICT)
- runner.bench_func("fury_dict_group", fury_python_object, DICT_GROUP)
- runner.bench_func("fury_tuple", fury_python_object, TUPLE)
- runner.bench_func("fury_list", fury_python_object, LIST)
+ if args.disable_cython:
+ os.environ["ENABLE_FURY_CYTHON_SERIALIZATION"] = "0"
+ sys.argv += ["--inherit-environ", "ENABLE_FURY_CYTHON_SERIALIZATION"]
+ runner.parse_args()
+ language = pyfury.Language.XLANG if args.xlang else pyfury.Language.PYTHON
+ runner.bench_func("fury_dict", fury_object, language, not args.no_ref,
DICT)
+ runner.bench_func(
+ "fury_dict_group", fury_object, language, not args.no_ref, DICT_GROUP
+ )
+ runner.bench_func("fury_tuple", fury_object, language, not args.no_ref,
TUPLE)
+ runner.bench_func("fury_list", fury_object, language, not args.no_ref,
LIST)
+ runner.bench_func(
+ "fury_complex", fury_object, language, not args.no_ref, COMPLEX_OBJECT
+ )
if __name__ == "__main__":
diff --git a/python/pyfury/_fury.py b/python/pyfury/_fury.py
index 19a624ed..13044864 100644
--- a/python/pyfury/_fury.py
+++ b/python/pyfury/_fury.py
@@ -106,7 +106,7 @@ class MetaStringBytes:
self.data = data
self.length = len(data)
if hashcode is None:
- hashcode = mmh3.hash_buffer(data, 47)[0] & 0xFFFFFFFFFFFFFF00
+ hashcode = (mmh3.hash_buffer(data, 47)[0] >> 8) << 8
self.hashcode = hashcode
self.dynamic_write_string_id = DEFAULT_DYNAMIC_WRITE_STRING_ID
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]