boshek commented on code in PR #13641:
URL: https://github.com/apache/arrow/pull/13641#discussion_r927973046


##########
r/R/dplyr.R:
##########
@@ -184,6 +184,29 @@ dim.arrow_dplyr_query <- function(x) {
   c(rows, cols)
 }
 
+#' @export
+unique.arrow_dplyr_query <- function(x, incomparables = FALSE, fromLast = 
FALSE, ...) {
+
+  if (incomparables == TRUE) {
+    arrow_not_supported("`unique()` with `incomparables = TRUE`")
+  }
+
+  if (fromLast == TRUE) {
+    arrow_not_supported("`unique()` with `fromLast = TRUE`")
+  }
+
+  x <- dplyr::distinct(x)
+  dplyr::collect(x)

Review Comment:
   It comes 
[this](https://issues.apache.org/jira/browse/ARROW-12693?focusedCommentId=17568169&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17568169)
 here. I think it is a bit of a grey area. My thinking was that base don't fall 
into the lazy dbplyr paradigm but then I remember `head`. dbplyr does not have 
`unique` so I don't think there is a precedent:
   
   ``` r
   library(arrow, warn.conflicts = FALSE)
   library(dplyr, warn.conflicts = FALSE)
   
   arrow_iris <- arrow_table(iris)
   duckdb_iris <- to_duckdb(arrow_iris)
   
   ## head
   head(arrow_iris)
   #> Table
   #> 6 rows x 5 columns
   #> $Sepal.Length <double>
   #> $Sepal.Width <double>
   #> $Petal.Length <double>
   #> $Petal.Width <double>
   #> $Species <dictionary<values=string, indices=int8>>
   #> 
   #> See $metadata for additional Schema metadata
   head(duckdb_iris)
   #> # Source:   SQL [6 x 5]
   #> # Database: DuckDB 0.3.5-dev1410 [root@Darwin 21.6.0:R 4.2.1/:memory:]
   #>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
   #>          <dbl>       <dbl>        <dbl>       <dbl> <chr>  
   #> 1          5.1         3.5          1.4         0.2 setosa 
   #> 2          4.9         3            1.4         0.2 setosa 
   #> 3          4.7         3.2          1.3         0.2 setosa 
   #> 4          4.6         3.1          1.5         0.2 setosa 
   #> 5          5           3.6          1.4         0.2 setosa 
   #> 6          5.4         3.9          1.7         0.4 setosa
   
   ## distinct
   distinct(arrow_iris)
   #> Table (query)
   #> Sepal.Length: double
   #> Sepal.Width: double
   #> Petal.Length: double
   #> Petal.Width: double
   #> Species: dictionary<values=string, indices=int8>
   #> 
   #> See $.data for the source Arrow object
   distinct(duckdb_iris)
   #> # Source:   SQL [?? x 5]
   #> # Database: DuckDB 0.3.5-dev1410 [root@Darwin 21.6.0:R 4.2.1/:memory:]
   #>    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
   #>           <dbl>       <dbl>        <dbl>       <dbl> <chr>  
   #>  1          5.1         3.5          1.4         0.2 setosa 
   #>  2          4.9         3            1.4         0.2 setosa 
   #>  3          4.7         3.2          1.3         0.2 setosa 
   #>  4          4.6         3.1          1.5         0.2 setosa 
   #>  5          5           3.6          1.4         0.2 setosa 
   #>  6          5.4         3.9          1.7         0.4 setosa 
   #>  7          4.6         3.4          1.4         0.3 setosa 
   #>  8          5           3.4          1.5         0.2 setosa 
   #>  9          4.4         2.9          1.4         0.2 setosa 
   #> 10          4.9         3.1          1.5         0.1 setosa 
   #> # … with more rows
   #> # ℹ Use `print(n = ...)` to see more rows
   
   ##
   unique(duckdb_iris)
   #> [[1]]
   #> src:  DuckDB 0.3.5-dev1410 [root@Darwin 21.6.0:R 4.2.1/:memory:]
   #> tbls:
   #> 
   #> [[2]]
   #> From: arrow_001
   #> <Table: arrow_001>
   ```
   
   I don't think users will have an expectation here so we are probably free to 
decide. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to