paleolimbot opened a new pull request, #499:
URL: https://github.com/apache/sedona-db/pull/499
This needs some extensive testing but is very cool! The overhead here is
very low (we can execute x + 1 just as fast as native R data frames!).
``` r
library(sedonadb)
df <- data.frame(x = 1:1e6)
df |> sedonadb:::sd_transmute(y = x + 1)
#> ┌─────────┐
#> │ y │
#> │ float64 │
#> ╞═════════╡
#> │ 2.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │ 3.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │ 4.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │ 5.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │ 6.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │ 7.0 │
#> └─────────┘
#> Preview of up to 6 row(s)
df |> sedonadb:::sd_transmute(y = sum(x))
#> ┌──────────────┐
#> │ y │
#> │ int64 │
#> ╞══════════════╡
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> └──────────────┘
#> Preview of up to 6 row(s)
df |> sedonadb:::sd_transmute(y = sum(x + 1) + x)
#> ┌────────────────┐
#> │ y │
#> │ float64 │
#> ╞════════════════╡
#> │ 500001500001.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500002.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500003.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500004.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500005.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500006.0 │
#> └────────────────┘
#> Preview of up to 6 row(s)
bench::mark(
sd = df |>
sedonadb:::sd_transmute(y = sum(x + 1)) |>
sd_collect(),
arrow = df |>
arrow::as_arrow_table() |>
dplyr::transmute(y = x + 1) |>
dplyr::collect(),
duckdb = df |>
arrow::as_arrow_table() |>
dplyr::transmute(y = x + 1) |>
dplyr::collect(),
dplyr = df |> dplyr::transmute(y = x + 1),
check = FALSE
)
#> # A tibble: 4 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 sd 1.25ms 2.77ms 334. 7.7MB 84.5
#> 2 arrow 9.09ms 9.8ms 100. 38.01MB 17.1
#> 3 duckdb 9.19ms 9.64ms 103. 145.85KB 16.4
#> 4 dplyr 1.27ms 1.37ms 697. 8.89MB 161.
```
<sup>Created on 2026-01-08 with [reprex
v2.1.1](https://reprex.tidyverse.org)</sup>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]