Tpt commented on code in PR #9439:
URL: https://github.com/apache/arrow-rs/pull/9439#discussion_r2955818325
##########
arrow-pyarrow/src/lib.rs:
##########
@@ -606,21 +595,52 @@ impl FromPyArrow for Table {
/// Convert a [`Table`] into `pyarrow.Table`.
impl IntoPyArrow for Table {
fn into_pyarrow(self, py: Python) -> PyResult<Bound<PyAny>> {
- let module = py.import(intern!(py, "pyarrow"))?;
- let class = module.getattr(intern!(py, "Table"))?;
-
let py_batches = PyList::new(py,
self.record_batches.into_iter().map(PyArrowType))?;
let py_schema = PyArrowType(Arc::unwrap_or_clone(self.schema));
let kwargs = PyDict::new(py);
kwargs.set_item("schema", py_schema)?;
- let reader = class.call_method("from_batches", (py_batches,),
Some(&kwargs))?;
+ let reader = table_class(py)?.call_method("from_batches",
(py_batches,), Some(&kwargs))?;
Ok(reader)
}
}
+fn array_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+ static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+ TYPE.import(py, "pyarrow", "Array")
Review Comment:
Thank you! I just ran a benchmark by curiosity. Here is the result:
```
import_direct time: [272.33 ns 274.23 ns 276.52 ns]
Found 4 outliers among 100 measurements (4.00%)
1 (1.00%) high mild
3 (3.00%) high severe
import_intern time: [206.61 ns 207.60 ns 208.84 ns]
Found 8 outliers among 100 measurements (8.00%)
6 (6.00%) high mild
2 (2.00%) high severe
import_static time: [1.3524 ns 1.3578 ns 1.3648 ns]
Found 17 outliers among 100 measurements (17.00%)
4 (4.00%) high mild
13 (13.00%) high severe
```
the three benchmarks import `uuid.UUID`.
- `import_direct` uses `Python::import()?.getattr()`
- `import_intern` uses `Python::import(intern!())?.getattr(intern!())` to
avoid always allocating the strings `"uuid"` and `"UUID"`
- `import_static` uses `PyOnceLock::import`
Code:
<details>
```rust
use std::hint::black_box;
use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher,
Criterion};
use pyo3::prelude::*;
use pyo3::intern;
use pyo3::sync::PyOnceLock;
use pyo3::types::PyType;
fn import_direct(b: &mut Bencher<'_>) {
Python::attach(|py| {
b.iter(||
black_box(black_box(&py.import("uuid").unwrap()).getattr("UUID")).unwrap());
});
}
fn import_intern(b: &mut Bencher<'_>) {
Python::attach(|py| {
b.iter(|| {
black_box(
black_box(&py.import(intern!(py,
"uuid")).unwrap()).getattr(intern!(py, "UUID")),
)
.unwrap()
});
});
}
fn import_static(b: &mut Bencher<'_>) {
Python::attach(|py| {
static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
b.iter(|| {
black_box(TYPE.import(py, "uuid", "UUID")).unwrap();
});
});
}
fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("import_direct", import_direct);
c.bench_function("import_intern", import_intern);
c.bench_function("import_static", import_static);
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]