jonkeane commented on a change in pull request #12240:
URL: https://github.com/apache/arrow/pull/12240#discussion_r791779372



##########
File path: r/tests/testthat/test-Array.R
##########
@@ -985,3 +985,14 @@ test_that("Array to C-interface", {
   delete_arrow_schema(schema_ptr)
   delete_arrow_array(array_ptr)
 })
+
+test_that("Array coverts timestamps with missing timezone /assumed local tz 
correctly", {
+  withr::with_envvar(c(TZ = "America/Chicago"), {
+    a <- as.POSIXct("1970-01-01 00:00:00")
+    attr(a, "tzone") <- Sys.getenv("TZ")

Review comment:
       The phrase "the display" here is confusing / wrong in some 
circumstances. When printing arrays, currently AFAICT arrow prints the 
timestamp in UTC for datetimes regardless if there is a timezone attached or 
not:
   
   ``` r
   library(arrow, warn.conflicts = FALSE)
   
   # specifically setting the timezone, and the Arrow Array repl shows UTC
   ts <- as.POSIXct("2020-01-01 02:00:00", tz = "America/Chicago") + 1:10*3600
   ts
   #>  [1] "2020-01-01 03:00:00 CST" "2020-01-01 04:00:00 CST"
   #>  [3] "2020-01-01 05:00:00 CST" "2020-01-01 06:00:00 CST"
   #>  [5] "2020-01-01 07:00:00 CST" "2020-01-01 08:00:00 CST"
   #>  [7] "2020-01-01 09:00:00 CST" "2020-01-01 10:00:00 CST"
   #>  [9] "2020-01-01 11:00:00 CST" "2020-01-01 12:00:00 CST"
   attr(ts, "tzone")
   #> [1] "America/Chicago"
   
   arr <- Array$create(ts)
   arr
   #> Array
   #> <timestamp[us, tz=America/Chicago]>
   #> [
   #>   2020-01-01 09:00:00.000000,
   #>   2020-01-01 10:00:00.000000,
   #>   2020-01-01 11:00:00.000000,
   #>   2020-01-01 12:00:00.000000,
   #>   2020-01-01 13:00:00.000000,
   #>   2020-01-01 14:00:00.000000,
   #>   2020-01-01 15:00:00.000000,
   #>   2020-01-01 16:00:00.000000,
   #>   2020-01-01 17:00:00.000000,
   #>   2020-01-01 18:00:00.000000
   #> ]
   arr$type$timezone()
   #> [1] "America/Chicago"
   
   as.vector(arr)
   #>  [1] "2020-01-01 03:00:00 CST" "2020-01-01 04:00:00 CST"
   #>  [3] "2020-01-01 05:00:00 CST" "2020-01-01 06:00:00 CST"
   #>  [5] "2020-01-01 07:00:00 CST" "2020-01-01 08:00:00 CST"
   #>  [7] "2020-01-01 09:00:00 CST" "2020-01-01 10:00:00 CST"
   #>  [9] "2020-01-01 11:00:00 CST" "2020-01-01 12:00:00 CST"
   attr(as.vector(arr), "tzone")
   #> [1] "America/Chicago"
   
   
   # without setting the timezone, and the Arrow Array repl still shows UTC
   ts <- as.POSIXct("2020-01-01 02:00:00") + 1:10*3600
   ts
   #>  [1] "2020-01-01 03:00:00 CST" "2020-01-01 04:00:00 CST"
   #>  [3] "2020-01-01 05:00:00 CST" "2020-01-01 06:00:00 CST"
   #>  [5] "2020-01-01 07:00:00 CST" "2020-01-01 08:00:00 CST"
   #>  [7] "2020-01-01 09:00:00 CST" "2020-01-01 10:00:00 CST"
   #>  [9] "2020-01-01 11:00:00 CST" "2020-01-01 12:00:00 CST"
   attr(ts[[1]], "tzone")
   #> NULL
   
   arr <- Array$create(ts)
   arr
   #> Array
   #> <timestamp[us]>
   #> [
   #>   2020-01-01 09:00:00.000000,
   #>   2020-01-01 10:00:00.000000,
   #>   2020-01-01 11:00:00.000000,
   #>   2020-01-01 12:00:00.000000,
   #>   2020-01-01 13:00:00.000000,
   #>   2020-01-01 14:00:00.000000,
   #>   2020-01-01 15:00:00.000000,
   #>   2020-01-01 16:00:00.000000,
   #>   2020-01-01 17:00:00.000000,
   #>   2020-01-01 18:00:00.000000
   #> ]
   arr$type$timezone()
   #> [1] ""
   
   as.vector(arr)
   #>  [1] "2020-01-01 03:00:00 CST" "2020-01-01 04:00:00 CST"
   #>  [3] "2020-01-01 05:00:00 CST" "2020-01-01 06:00:00 CST"
   #>  [5] "2020-01-01 07:00:00 CST" "2020-01-01 08:00:00 CST"
   #>  [7] "2020-01-01 09:00:00 CST" "2020-01-01 10:00:00 CST"
   #>  [9] "2020-01-01 11:00:00 CST" "2020-01-01 12:00:00 CST"
   attr(as.vector(arr), "tzone")
   #> NULL
   ```
   
   But as I showed up there, when pulling the data back in with 
`as.vector(arr)`, the timezone is pulled in with it so that when R displays the 
timestamp it is faithful to the original timestamp.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to