This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git


The following commit(s) were added to refs/heads/main by this push:
     new 40a7b259 perf(python): Optimize sequence buffer writing via the c-api 
(#2078)
40a7b259 is described below

commit 40a7b2595c268f813a965b2fcc44fa1c989bc1a5
Author: penguin_wwy <[email protected]>
AuthorDate: Fri Feb 21 18:40:33 2025 +0800

    perf(python): Optimize sequence buffer writing via the c-api (#2078)
    
    ## What does this PR do?
    
    Optimize the writing of float and bool type sequences via the C API
    
    ## Related issues
    
    ## Does this PR introduce any user-facing change?
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    ```
    fury_large_float_tuple: Mean +- std dev: [base] 32.8 ms +- 0.9 ms -> [opt] 
28.4 ms +- 0.8 ms: 1.16x faster
    fury_large_boolean_tuple: Mean +- std dev: [base] 22.1 ms +- 0.2 ms -> 
[opt] 13.6 ms +- 0.6 ms: 1.63x faster
    
    Geometric mean: 1.37x faster
    ```
---
 BUILD                                              |  1 +
 cpp/fury/python/BUILD                              | 33 ++++++++++++
 cpp/fury/python/pyfury.cc                          | 61 ++++++++++++++++++++++
 cpp/fury/python/pyfury.h                           | 29 ++++++++++
 .../cpython_benchmark/fury_benchmark.py            | 17 ++++++
 python/pyfury/_serialization.pyx                   | 26 +++++++--
 python/pyfury/includes/libserialization.pxd        |  5 ++
 7 files changed, 167 insertions(+), 5 deletions(-)

diff --git a/BUILD b/BUILD
index 37fa36df..d50feb9f 100644
--- a/BUILD
+++ b/BUILD
@@ -64,6 +64,7 @@ pyx_library(
     deps = [
         "//cpp/fury/util:fury_util",
         "//cpp/fury/type:fury_type",
+        "//cpp/fury/python:_pyfury",
         "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
diff --git a/cpp/fury/python/BUILD b/cpp/fury/python/BUILD
new file mode 100644
index 00000000..36a01038
--- /dev/null
+++ b/cpp/fury/python/BUILD
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@com_github_grpc_grpc//bazel:cython_library.bzl", "pyx_library")
+
+cc_library(
+    name = "_pyfury",
+    srcs = ["pyfury.cc"],
+    hdrs = ["pyfury.h"],
+    alwayslink=True,
+    linkstatic=True,
+    strip_include_prefix = "/cpp",
+    deps = [
+        "//cpp/fury/util:fury_util",
+        "@local_config_python//:python_headers",
+    ],
+    visibility = ["//visibility:public"],
+)
\ No newline at end of file
diff --git a/cpp/fury/python/pyfury.cc b/cpp/fury/python/pyfury.cc
new file mode 100644
index 00000000..a9ca00a6
--- /dev/null
+++ b/cpp/fury/python/pyfury.cc
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "fury/python/pyfury.h"
+
+static PyObject **PySequenceGetItems(PyObject *collection) {
+  if (PyList_CheckExact(collection)) {
+    return ((PyListObject *)collection)->ob_item;
+  } else if (PyTuple_CheckExact(collection)) {
+    return ((PyTupleObject *)collection)->ob_item;
+  }
+  return nullptr;
+}
+
+namespace fury {
+int Fury_PyBooleanSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+                                        Py_ssize_t start_index) {
+  PyObject **items = PySequenceGetItems(collection);
+  if (items == nullptr) {
+    return -1;
+  }
+  Py_ssize_t size = Py_SIZE(collection);
+  for (Py_ssize_t i = 0; i < size; i++) {
+    bool b = items[i] == Py_True;
+    buffer->UnsafePut(start_index, b);
+    start_index += sizeof(bool);
+  }
+  return 0;
+}
+
+int Fury_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+                                      Py_ssize_t start_index) {
+  PyObject **items = PySequenceGetItems(collection);
+  if (items == nullptr) {
+    return -1;
+  }
+  Py_ssize_t size = Py_SIZE(collection);
+  for (Py_ssize_t i = 0; i < size; i++) {
+    auto *f = (PyFloatObject *)items[i];
+    buffer->UnsafePut(start_index, f->ob_fval);
+    start_index += sizeof(double);
+  }
+  return 0;
+}
+} // namespace fury
diff --git a/cpp/fury/python/pyfury.h b/cpp/fury/python/pyfury.h
new file mode 100644
index 00000000..721bd61a
--- /dev/null
+++ b/cpp/fury/python/pyfury.h
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+#include "Python.h"
+#include "fury/util/buffer.h"
+
+namespace fury {
+int Fury_PyBooleanSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+                                        Py_ssize_t start_index);
+int Fury_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
+                                      Py_ssize_t start_index);
+} // namespace fury
\ No newline at end of file
diff --git a/integration_tests/cpython_benchmark/fury_benchmark.py 
b/integration_tests/cpython_benchmark/fury_benchmark.py
index 01be1d49..5039f528 100644
--- a/integration_tests/cpython_benchmark/fury_benchmark.py
+++ b/integration_tests/cpython_benchmark/fury_benchmark.py
@@ -92,6 +92,8 @@ TUPLE = (
     60,
 )
 LARGE_TUPLE = tuple(range(2**20 + 1))
+LARGE_FLOAT_TUPLE = tuple([random.random() * 10000 for _ in range(2**20 + 1)])
+LARGE_BOOLEAN_TUPLE = tuple([bool(random.random() > 0.5) for _ in range(2**20 
+ 1)])
 
 
 LIST = [[list(range(10)), list(range(10))] for _ in range(10)]
@@ -189,6 +191,21 @@ def micro_benchmark():
         "fury_large_tuple", fury_object, language, not args.no_ref, LARGE_TUPLE
     )
     runner.bench_func("fury_list", fury_object, language, not args.no_ref, 
LIST)
+    runner.bench_func(
+        "fury_large_float_tuple",
+        fury_object,
+        language,
+        not args.no_ref,
+        LARGE_FLOAT_TUPLE,
+    )
+    runner.bench_func(
+        "fury_large_boolean_tuple",
+        fury_object,
+        language,
+        not args.no_ref,
+        LARGE_BOOLEAN_TUPLE,
+    )
+    runner.bench_func("fury_list", fury_object, language, not args.no_ref, 
LIST)
     runner.bench_func(
         "fury_large_list", fury_object, language, not args.no_ref, LARGE_LIST
     )
diff --git a/python/pyfury/_serialization.pyx b/python/pyfury/_serialization.pyx
index 3bf3ade4..9b2a5e1a 100644
--- a/python/pyfury/_serialization.pyx
+++ b/python/pyfury/_serialization.pyx
@@ -34,7 +34,8 @@ from pyfury.lib import mmh3
 from pyfury.meta.metastring import Encoding
 from pyfury.type import is_primitive_type
 from pyfury.util import is_little_endian
-from pyfury.includes.libserialization cimport TypeId, IsNamespacedType
+from pyfury.includes.libserialization cimport \
+    (TypeId, IsNamespacedType, Fury_PyBooleanSequenceWriteToBuffer, 
Fury_PyFloatSequenceWriteToBuffer)
 
 from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, uint64_t
 from libc.stdint cimport *
@@ -69,6 +70,7 @@ cdef extern from *:
     object int2obj(int64_t obj_addr)
     int64_t obj2int(object obj_ref)
     dict _PyDict_NewPresized(Py_ssize_t minused)
+    Py_ssize_t Py_SIZE(object obj)
 
 
 cdef int8_t NULL_FLAG = -3
@@ -1297,8 +1299,15 @@ cdef class CollectionSerializer(Serializer):
 
     cdef inline _write_bool(self, Buffer buffer, value):
         buffer.write_int16(NOT_NULL_BOOL_FLAG)
-        for s in value:
-            buffer.write_bool(s)
+        value_type = type(value)
+        if value_type is list or value_type is tuple:
+            size = sizeof(bool) * Py_SIZE(value)
+            buffer.grow(<int32_t>size)
+            Fury_PyBooleanSequenceWriteToBuffer(value, buffer.c_buffer.get(), 
buffer.writer_index)
+            buffer.writer_index += size
+        else:
+            for s in value:
+                buffer.write_bool(s)
 
     cdef inline _read_bool(self, Buffer buffer, int64_t len_, object 
collection_):
         assert buffer.read_int16() == NOT_NULL_BOOL_FLAG
@@ -1307,8 +1316,15 @@ cdef class CollectionSerializer(Serializer):
 
     cdef inline _write_float(self, Buffer buffer, value):
         buffer.write_int16(NOT_NULL_FLOAT64_FLAG)
-        for s in value:
-            buffer.write_double(s)
+        value_type = type(value)
+        if value_type is list or value_type is tuple:
+            size = sizeof(double) * Py_SIZE(value)
+            buffer.grow(<int32_t>size)
+            Fury_PyFloatSequenceWriteToBuffer(value, buffer.c_buffer.get(), 
buffer.writer_index)
+            buffer.writer_index += size
+        else:
+            for s in value:
+                buffer.write_double(s)
 
     cdef inline _read_float(self, Buffer buffer, int64_t len_, object 
collection_):
         assert buffer.read_int16() == NOT_NULL_FLOAT64_FLAG
diff --git a/python/pyfury/includes/libserialization.pxd 
b/python/pyfury/includes/libserialization.pxd
index 3cb69bc1..be8601ac 100644
--- a/python/pyfury/includes/libserialization.pxd
+++ b/python/pyfury/includes/libserialization.pxd
@@ -17,6 +17,7 @@
 
 from libc.stdint cimport int32_t
 from libcpp cimport bool as c_bool
+from pyfury.includes.libutil cimport CBuffer
 
 cdef extern from "fury/type/type.h" namespace "fury" nogil:
 
@@ -64,3 +65,7 @@ cdef extern from "fury/type/type.h" namespace "fury" nogil:
         BOUND = 64
 
     cdef c_bool IsNamespacedType(int32_t type_id)
+
+cdef extern from "fury/python/pyfury.h" namespace "fury":
+    int Fury_PyBooleanSequenceWriteToBuffer(object collection, CBuffer 
*buffer, Py_ssize_t start_index)
+    int Fury_PyFloatSequenceWriteToBuffer(object collection, CBuffer *buffer, 
Py_ssize_t start_index)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to