thisisnic commented on code in PR #48086:
URL: https://github.com/apache/arrow/pull/48086#discussion_r2527566206


##########
r/tests/testthat/test-dataset-write.R:
##########
@@ -1015,3 +1015,47 @@ test_that("Dataset write wrappers can write flat files 
using readr::write_csv()
     c("true", "false", "NOVALUE", "true", "false", "true", "false", "NOVALUE", 
"true", "false")
   )
 })
+
+test_that("write_dataset respects url_encode_hive_values parameter", {
+  skip_if_not_available("parquet")
+
+  # Create test data with special characters that would be URL encoded
+  # Note: Forward slash (/) cannot be used in directory names on Unix systems,
+  # so we test with other special characters that are valid in filenames
+  test_df <- data.frame(
+    value = c(1, 2, 3, 4),
+    category = c("test space", "test+plus", "test%percent", "normal"),
+    stringsAsFactors = FALSE
+  )
+
+  # Test with URL encoding enabled (default)
+  dst_dir_encoded <- make_temp_dir()
+  write_dataset(test_df, dst_dir_encoded, partitioning = "category", 
hive_style = TRUE, url_encode_hive_values = TRUE)
+
+  # Test with URL encoding disabled
+  dst_dir_not_encoded <- make_temp_dir()
+  write_dataset(test_df, dst_dir_not_encoded, partitioning = "category", 
hive_style = TRUE, url_encode_hive_values = FALSE)
+
+  # Check that the directories are different (encoded vs not encoded)
+  encoded_dirs <- list.dirs(dst_dir_encoded, recursive = FALSE)
+  not_encoded_dirs <- list.dirs(dst_dir_not_encoded, recursive = FALSE)
+
+  # The encoded version should have URL-encoded directory names
+  expect_true(any(grepl("test%20space", encoded_dirs)))
+  expect_true(any(grepl("test%2Bplus", encoded_dirs)))
+  expect_true(any(grepl("test%25percent", encoded_dirs)))
+
+  # The non-encoded version should have raw directory names
+  expect_true(any(grepl("test space", not_encoded_dirs, fixed = TRUE)))
+  expect_true(any(grepl("test+plus", not_encoded_dirs, fixed = TRUE)))
+  expect_true(any(grepl("test%percent", not_encoded_dirs, fixed = TRUE)))
+
+  # Both datasets should be readable and equivalent when loaded
+  ds_encoded <- open_dataset(dst_dir_encoded)
+  ds_not_encoded <- open_dataset(dst_dir_not_encoded)
+
+  expect_equal(
+    arrange(ds_encoded %>% collect(), value),
+    arrange(ds_not_encoded %>% collect(), value)

Review Comment:
   Please use the base pipe instead



##########
r/R/dataset-write.R:
##########
@@ -124,22 +126,21 @@
 #'   write_dataset(two_levels_tree_no_hive, hive_style = FALSE)
 #' list.files(two_levels_tree_no_hive, recursive = TRUE)
 #' @export
-write_dataset <- function(
-  dataset,
+write_dataset <- function(dataset,
   path,
   format = c("parquet", "feather", "arrow", "ipc", "csv", "tsv", "txt", 
"text"),
   partitioning = dplyr::group_vars(dataset),
   basename_template = paste0("part-{i}.", as.character(format)),
   hive_style = TRUE,
+  url_encode_hive_values = TRUE,

Review Comment:
   What's the reason for inserting this parameter here in the function 
signature?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to