dvictori opened a new issue, #38358:
URL: https://github.com/apache/arrow/issues/38358

   ### Describe the enhancement requested
   
   I have an arrow dataset that has some invalid dates. I'd like to convert 
those to NA and I figured I'd use a combination of `mutate` and `if_else`. 
However, when I tried to do that, I got an _NotImplemented_ error.
   
   Any advice on achieving the same result without using `if_else`?
   
   Here goes a simple reprex:
   ``` r
   # Arrow and mutate if_else do not play nice
   # example
   
   library(arrow)
   #> 
   #> Attaching package: 'arrow'
   #> The following object is masked from 'package:utils':
   #> 
   #>     timestamp
   library(dplyr)
   #> 
   #> Attaching package: 'dplyr'
   #> The following objects are masked from 'package:stats':
   #> 
   #>     filter, lag
   #> The following objects are masked from 'package:base':
   #> 
   #>     intersect, setdiff, setequal, union
   
   df <- data.frame(i = 1:4,
                    date = c(as.Date('2013-01-01'),
                             as.Date('2013-01-02'),
                             as.Date('2033-01-03'),
                             as.Date('2013-01-04')))
   tfile <- tempfile()
   write.csv(df, tfile,
             row.names = FALSE)
   
   arrow_dataset <- open_dataset(tfile, format = 'csv')
   # column type is date32[day]
   arrow_dataset
   #> FileSystemDataset with 1 csv file
   #> i: int64
   #> date: date32[day]
   
   arrow_dataset |>
     mutate(date = if_else(date > '2014-01-01', NA, date))
   #> Error in `map_chr()`:
   #> ℹ In index: 2.
   #> ℹ With name: date.
   #> Caused by error:
   #> ! NotImplemented: Function 'if_else' has no kernel matching input types 
(bool, bool, date32[day])
   #> Backtrace:
   #>      ▆
   #>   1. ├─base::tryCatch(...)
   #>   2. │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
   #>   3. │   ├─base (local) tryCatchOne(...)
   #>   4. │   │ └─base (local) doTryCatch(return(expr), name, parentenv, 
handler)
   #>   5. │   └─base (local) tryCatchList(expr, names[-nh], parentenv, 
handlers[-nh])
   #>   6. │     └─base (local) tryCatchOne(expr, names, parentenv, 
handlers[[1L]])
   #>   7. │       └─base (local) doTryCatch(return(expr), name, parentenv, 
handler)
   #>   8. ├─base::withCallingHandlers(...)
   #>   9. ├─base::saveRDS(...)
   #>  10. ├─base::do.call(...)
   #>  11. ├─base (local) `<fn>`(...)
   #>  12. ├─global `<fn>`(input = base::quote("flat-ram_reprex.R"))
   #>  13. │ └─rmarkdown::render(input, quiet = TRUE, envir = globalenv(), 
encoding = "UTF-8")
   #>  14. │   └─knitr::knit(knit_input, knit_output, envir = envir, quiet = 
quiet)
   #>  15. │     └─knitr:::process_file(text, output)
   #>  16. │       ├─base::withCallingHandlers(...)
   #>  17. │       ├─base::withCallingHandlers(...)
   #>  18. │       ├─knitr:::process_group(group)
   #>  19. │       └─knitr:::process_group.block(group)
   #>  20. │         └─knitr:::call_block(x)
   #>  21. │           └─knitr:::block_exec(params)
   #>  22. │             └─knitr:::eng_r(options)
   #>  23. │               ├─knitr:::in_input_dir(...)
   #>  24. │               │ └─knitr:::in_dir(input_dir(), expr)
   #>  25. │               └─knitr (local) evaluate(...)
   #>  26. │                 └─evaluate::evaluate(...)
   #>  27. │                   └─evaluate:::evaluate_call(...)
   #>  28. │                     ├─evaluate (local) handle(...)
   #>  29. │                     │ └─base::try(f, silent = TRUE)
   #>  30. │                     │   └─base::tryCatch(...)
   #>  31. │                     │     └─base (local) tryCatchList(expr, 
classes, parentenv, handlers)
   #>  32. │                     │       └─base (local) tryCatchOne(expr, 
names, parentenv, handlers[[1L]])
   #>  33. │                     │         └─base (local) 
doTryCatch(return(expr), name, parentenv, handler)
   #>  34. │                     ├─base::withCallingHandlers(...)
   #>  35. │                     ├─base::withVisible(value_fun(ev$value, 
ev$visible))
   #>  36. │                     └─knitr (local) value_fun(ev$value, ev$visible)
   #>  37. │                       └─knitr (local) fun(x, options = options)
   #>  38. │                         ├─base::withVisible(knit_print(x, ...))
   #>  39. │                         ├─knitr::knit_print(x, ...)
   #>  40. │                         └─knitr:::knit_print.default(x, ...)
   #>  41. │                           └─evaluate (local) normal_print(x)
   #>  42. │                             ├─base::print(x)
   #>  43. │                             └─arrow:::print.arrow_dplyr_query(x)
   #>  44. │                               └─purrr::map_chr(...)
   #>  45. │                                 └─purrr:::map_("character", .x, 
.f, ..., .progress = .progress)
   #>  46. │                                   
├─purrr:::with_indexed_errors(...)
   #>  47. │                                   │ 
└─base::withCallingHandlers(...)
   #>  48. │                                   ├─purrr:::call_with_cleanup(...)
   #>  49. │                                   └─arrow (local) .f(.x[[i]], ...)
   #>  50. │                                     ├─base::paste0(...)
   #>  51. │                                     └─expr$type(schm)
   #>  52. │                                       
└─arrow:::compute___expr__type(self, schema)
   #>  53. └─base::.handleSimpleError(...)
   #>  54.   └─purrr (local) h(simpleError(msg, call))
   #>  55.     └─cli::cli_abort(...)
   #>  56.       └─rlang::abort(...)
   
   sessionInfo()
   #> R version 4.3.1 (2023-06-16)
   #> Platform: x86_64-pc-linux-gnu (64-bit)
   #> Running under: Ubuntu 20.04.6 LTS
   #> 
   #> Matrix products: default
   #> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
   #> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  
LAPACK version 3.9.0
   #> 
   #> locale:
   #>  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
   #>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
   #>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
   #>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
   #>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
   #> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
   #> 
   #> time zone: America/Sao_Paulo
   #> tzcode source: system (glibc)
   #> 
   #> attached base packages:
   #> [1] stats     graphics  grDevices utils     datasets  methods   base     
   #> 
   #> other attached packages:
   #> [1] dplyr_1.1.2    arrow_12.0.1.1
   #> 
   #> loaded via a namespace (and not attached):
   #>  [1] vctrs_0.6.3       cli_3.6.1         knitr_1.43        rlang_1.1.1    
  
   #>  [5] xfun_0.39         purrr_1.0.1       styler_1.10.1     generics_0.1.3 
  
   #>  [9] assertthat_0.2.1  glue_1.6.2        bit_4.0.5         
htmltools_0.5.5  
   #> [13] fansi_1.0.4       rmarkdown_2.23    R.cache_0.16.0    tibble_3.2.1   
  
   #> [17] evaluate_0.21     fastmap_1.1.1     yaml_2.3.7        
lifecycle_1.0.3  
   #> [21] compiler_4.3.1    fs_1.6.2          pkgconfig_2.0.3   
rstudioapi_0.15.0
   #> [25] R.oo_1.25.0       R.utils_2.12.2    digest_0.6.33     R6_2.5.1       
  
   #> [29] utf8_1.2.3        tidyselect_1.2.0  reprex_2.0.2      pillar_1.9.0   
  
   #> [33] magrittr_2.0.3    R.methodsS3_1.8.2 tools_4.3.1       withr_2.5.0    
  
   #> [37] bit64_4.0.5
   ```
   
   <sup>Created on 2023-10-19 with [reprex 
v2.0.2](https://reprex.tidyverse.org)</sup>
   
   
   ### Component(s)
   
   R


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to