This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 40a7b259 perf(python): Optimize sequence buffer writing via the c-api
(#2078)
40a7b259 is described below
commit 40a7b2595c268f813a965b2fcc44fa1c989bc1a5
Author: penguin_wwy <[email protected]>
AuthorDate: Fri Feb 21 18:40:33 2025 +0800
perf(python): Optimize sequence buffer writing via the c-api (#2078)
## What does this PR do?
Optimize the writing of float and bool type sequences via the C API
## Related issues
## Does this PR introduce any user-facing change?
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
```
fury_large_float_tuple: Mean +- std dev: [base] 32.8 ms +- 0.9 ms -> [opt]
28.4 ms +- 0.8 ms: 1.16x faster
fury_large_boolean_tuple: Mean +- std dev: [base] 22.1 ms +- 0.2 ms ->
[opt] 13.6 ms +- 0.6 ms: 1.63x faster
Geometric mean: 1.37x faster
```
---
BUILD | 1 +
cpp/fury/python/BUILD | 33 ++++++++++++
cpp/fury/python/pyfury.cc | 61 ++++++++++++++++++++++
cpp/fury/python/pyfury.h | 29 ++++++++++
.../cpython_benchmark/fury_benchmark.py | 17 ++++++
python/pyfury/_serialization.pyx | 26 +++++++--
python/pyfury/includes/libserialization.pxd | 5 ++
7 files changed, 167 insertions(+), 5 deletions(-)
diff --git a/BUILD b/BUILD
index 37fa36df..d50feb9f 100644
--- a/BUILD
+++ b/BUILD
@@ -64,6 +64,7 @@ pyx_library(
deps = [
"//cpp/fury/util:fury_util",
"//cpp/fury/type:fury_type",
+ "//cpp/fury/python:_pyfury",
"@com_google_absl//absl/container:flat_hash_map",
],
)
diff --git a/cpp/fury/python/BUILD b/cpp/fury/python/BUILD
new file mode 100644
index 00000000..36a01038
--- /dev/null
+++ b/cpp/fury/python/BUILD
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@com_github_grpc_grpc//bazel:cython_library.bzl", "pyx_library")
+
+cc_library(
+ name = "_pyfury",
+ srcs = ["pyfury.cc"],
+ hdrs = ["pyfury.h"],
+ alwayslink=True,
+ linkstatic=True,
+ strip_include_prefix = "/cpp",
+ deps = [
+ "//cpp/fury/util:fury_util",
+ "@local_config_python//:python_headers",
+ ],
+ visibility = ["//visibility:public"],
+)
\ No newline at end of file
diff --git a/cpp/fury/python/pyfury.cc b/cpp/fury/python/pyfury.cc
new file mode 100644
index 00000000..a9ca00a6
--- /dev/null
+++ b/cpp/fury/python/pyfury.cc
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "fury/python/pyfury.h"
+
+static PyObject **PySequenceGetItems(PyObject *collection) {
+ if (PyList_CheckExact(collection)) {
+ return ((PyListObject *)collection)->ob_item;
+ } else if (PyTuple_CheckExact(collection)) {
+ return ((PyTupleObject *)collection)->ob_item;
+ }
+ return nullptr;
+}
+
+namespace fury {
+int Fury_PyBooleanSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+ Py_ssize_t start_index) {
+ PyObject **items = PySequenceGetItems(collection);
+ if (items == nullptr) {
+ return -1;
+ }
+ Py_ssize_t size = Py_SIZE(collection);
+ for (Py_ssize_t i = 0; i < size; i++) {
+ bool b = items[i] == Py_True;
+ buffer->UnsafePut(start_index, b);
+ start_index += sizeof(bool);
+ }
+ return 0;
+}
+
+int Fury_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+ Py_ssize_t start_index) {
+ PyObject **items = PySequenceGetItems(collection);
+ if (items == nullptr) {
+ return -1;
+ }
+ Py_ssize_t size = Py_SIZE(collection);
+ for (Py_ssize_t i = 0; i < size; i++) {
+ auto *f = (PyFloatObject *)items[i];
+ buffer->UnsafePut(start_index, f->ob_fval);
+ start_index += sizeof(double);
+ }
+ return 0;
+}
+} // namespace fury
diff --git a/cpp/fury/python/pyfury.h b/cpp/fury/python/pyfury.h
new file mode 100644
index 00000000..721bd61a
--- /dev/null
+++ b/cpp/fury/python/pyfury.h
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+#include "Python.h"
+#include "fury/util/buffer.h"
+
+namespace fury {
+int Fury_PyBooleanSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+ Py_ssize_t start_index);
+int Fury_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+ Py_ssize_t start_index);
+} // namespace fury
\ No newline at end of file
diff --git a/integration_tests/cpython_benchmark/fury_benchmark.py
b/integration_tests/cpython_benchmark/fury_benchmark.py
index 01be1d49..5039f528 100644
--- a/integration_tests/cpython_benchmark/fury_benchmark.py
+++ b/integration_tests/cpython_benchmark/fury_benchmark.py
@@ -92,6 +92,8 @@ TUPLE = (
60,
)
LARGE_TUPLE = tuple(range(2**20 + 1))
+LARGE_FLOAT_TUPLE = tuple([random.random() * 10000 for _ in range(2**20 + 1)])
+LARGE_BOOLEAN_TUPLE = tuple([bool(random.random() > 0.5) for _ in range(2**20
+ 1)])
LIST = [[list(range(10)), list(range(10))] for _ in range(10)]
@@ -189,6 +191,21 @@ def micro_benchmark():
"fury_large_tuple", fury_object, language, not args.no_ref, LARGE_TUPLE
)
runner.bench_func("fury_list", fury_object, language, not args.no_ref,
LIST)
+ runner.bench_func(
+ "fury_large_float_tuple",
+ fury_object,
+ language,
+ not args.no_ref,
+ LARGE_FLOAT_TUPLE,
+ )
+ runner.bench_func(
+ "fury_large_boolean_tuple",
+ fury_object,
+ language,
+ not args.no_ref,
+ LARGE_BOOLEAN_TUPLE,
+ )
+ runner.bench_func("fury_list", fury_object, language, not args.no_ref,
LIST)
runner.bench_func(
"fury_large_list", fury_object, language, not args.no_ref, LARGE_LIST
)
diff --git a/python/pyfury/_serialization.pyx b/python/pyfury/_serialization.pyx
index 3bf3ade4..9b2a5e1a 100644
--- a/python/pyfury/_serialization.pyx
+++ b/python/pyfury/_serialization.pyx
@@ -34,7 +34,8 @@ from pyfury.lib import mmh3
from pyfury.meta.metastring import Encoding
from pyfury.type import is_primitive_type
from pyfury.util import is_little_endian
-from pyfury.includes.libserialization cimport TypeId, IsNamespacedType
+from pyfury.includes.libserialization cimport \
+ (TypeId, IsNamespacedType, Fury_PyBooleanSequenceWriteToBuffer,
Fury_PyFloatSequenceWriteToBuffer)
from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, uint64_t
from libc.stdint cimport *
@@ -69,6 +70,7 @@ cdef extern from *:
object int2obj(int64_t obj_addr)
int64_t obj2int(object obj_ref)
dict _PyDict_NewPresized(Py_ssize_t minused)
+ Py_ssize_t Py_SIZE(object obj)
cdef int8_t NULL_FLAG = -3
@@ -1297,8 +1299,15 @@ cdef class CollectionSerializer(Serializer):
cdef inline _write_bool(self, Buffer buffer, value):
buffer.write_int16(NOT_NULL_BOOL_FLAG)
- for s in value:
- buffer.write_bool(s)
+ value_type = type(value)
+ if value_type is list or value_type is tuple:
+ size = sizeof(bool) * Py_SIZE(value)
+ buffer.grow(<int32_t>size)
+ Fury_PyBooleanSequenceWriteToBuffer(value, buffer.c_buffer.get(),
buffer.writer_index)
+ buffer.writer_index += size
+ else:
+ for s in value:
+ buffer.write_bool(s)
cdef inline _read_bool(self, Buffer buffer, int64_t len_, object
collection_):
assert buffer.read_int16() == NOT_NULL_BOOL_FLAG
@@ -1307,8 +1316,15 @@ cdef class CollectionSerializer(Serializer):
cdef inline _write_float(self, Buffer buffer, value):
buffer.write_int16(NOT_NULL_FLOAT64_FLAG)
- for s in value:
- buffer.write_double(s)
+ value_type = type(value)
+ if value_type is list or value_type is tuple:
+ size = sizeof(double) * Py_SIZE(value)
+ buffer.grow(<int32_t>size)
+ Fury_PyFloatSequenceWriteToBuffer(value, buffer.c_buffer.get(),
buffer.writer_index)
+ buffer.writer_index += size
+ else:
+ for s in value:
+ buffer.write_double(s)
cdef inline _read_float(self, Buffer buffer, int64_t len_, object
collection_):
assert buffer.read_int16() == NOT_NULL_FLOAT64_FLAG
diff --git a/python/pyfury/includes/libserialization.pxd
b/python/pyfury/includes/libserialization.pxd
index 3cb69bc1..be8601ac 100644
--- a/python/pyfury/includes/libserialization.pxd
+++ b/python/pyfury/includes/libserialization.pxd
@@ -17,6 +17,7 @@
from libc.stdint cimport int32_t
from libcpp cimport bool as c_bool
+from pyfury.includes.libutil cimport CBuffer
cdef extern from "fury/type/type.h" namespace "fury" nogil:
@@ -64,3 +65,7 @@ cdef extern from "fury/type/type.h" namespace "fury" nogil:
BOUND = 64
cdef c_bool IsNamespacedType(int32_t type_id)
+
+cdef extern from "fury/python/pyfury.h" namespace "fury":
+ int Fury_PyBooleanSequenceWriteToBuffer(object collection, CBuffer
*buffer, Py_ssize_t start_index)
+ int Fury_PyFloatSequenceWriteToBuffer(object collection, CBuffer *buffer,
Py_ssize_t start_index)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]