paleolimbot opened a new pull request #11777:
URL: https://github.com/apache/arrow/pull/11777


   This PR:
   
   - Improves error messages for aggregate expressions that are not supported
   - Allows a scalar to be passed into an aggregate expression. This is related 
because it is valid in dplyr and currently gives very weird errors.
   
   Reprex before this PR:
   
   ``` r
   # remotes::install_github("apache/arrow/r@master")
   library(arrow, warn.conflicts = FALSE)
   library(dplyr, warn.conflicts = FALSE)
   
   record_batch(x = 4) %>% summarise(y = mean(mean(x)))
   #> Warning: Error in mean(..temp0) : object '..temp0' not found; pulling 
data into
   #> R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     4
   record_batch(x = 4) %>% summarize(y = x + 1)
   #> Warning: Error : Expression x + 1 not supported in Arrow; pulling data 
into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     5
   record_batch(x = 4) %>% summarize(y = x)
   #> Warning: Error in .f(.x[[i]], ...) : attempt to apply non-function; 
pulling data
   #> into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     4
   
   record_batch(x = 4) %>% summarise(y = 1)
   #> Warning: Error in .$data : $ operator is invalid for atomic vectors; 
pulling
   #> data into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     1
   record_batch(x = 4) %>% summarise(y = Expression$scalar(1))
   #> InMemoryDataset (query)
   #> y: double (1)
   #> 
   #> See $.data for the source Arrow object
   record_batch(x = 4) %>% summarise(y = Scalar$create(1))
   #> Error in if (nzchar(name)) {: argument is of length zero
   
   some_scalar_value <- 3
   record_batch(x = 4) %>% summarise(y = some_scalar_value)
   #> Warning: Error in .$data : $ operator is invalid for atomic vectors; 
pulling
   #> data into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     3
   record_batch(x = 4) %>% summarise(y = !! some_scalar_value)
   #> Warning: Error in .$data : $ operator is invalid for atomic vectors; 
pulling
   #> data into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     3
   ```
   
   Reprex after this PR:
   
   ``` r
   # remotes::install_github("paleolimbot/arrow/r@r-summarise-eval")
   library(arrow, warn.conflicts = FALSE)
   library(dplyr, warn.conflicts = FALSE)
   
   record_batch(x = 4) %>% summarise(y = mean(mean(x)))
   #> Warning: Error : Aggregate within aggregate expression mean(mean(x)) not
   #> supported in Arrow; pulling data into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     4
   record_batch(x = 4) %>% summarize(y = x + 1)
   #> Warning: Error : Expression x + 1 is not an aggregate expression or is not
   #> supported in Arrow; pulling data into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     5
   record_batch(x = 4) %>% summarize(y = x)
   #> Warning: Error : Expression x is not an aggregate expression or is not 
supported
   #> in Arrow; pulling data into R
   #> # A tibble: 1 × 1
   #>       y
   #>   <dbl>
   #> 1     4
   
   record_batch(x = 4) %>% summarise(y = 1)
   #> InMemoryDataset (query)
   #> y: double (1)
   #> 
   #> See $.data for the source Arrow object
   record_batch(x = 4) %>% summarise(y = Expression$scalar(1))
   #> InMemoryDataset (query)
   #> y: double (1)
   #> 
   #> See $.data for the source Arrow object
   record_batch(x = 4) %>% summarise(y = Scalar$create(1))
   #> InMemoryDataset (query)
   #> y: double (1)
   #> 
   #> See $.data for the source Arrow object
   
   some_scalar_value <- 3
   record_batch(x = 4) %>% summarise(y = some_scalar_value)
   #> InMemoryDataset (query)
   #> y: double (3)
   #> 
   #> See $.data for the source Arrow object
   record_batch(x = 4) %>% summarise(y = !! some_scalar_value)
   #> InMemoryDataset (query)
   #> y: double (3)
   #> 
   #> See $.data for the source Arrow object
   ```
   
   <sup>Created on 2021-11-25 by the [reprex 
package](https://reprex.tidyverse.org) (v2.0.1)</sup>
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to