nealrichardson commented on a change in pull request #12133:
URL: https://github.com/apache/arrow/pull/12133#discussion_r783981221



##########
File path: r/R/dataset-factory.R
##########
@@ -60,16 +61,65 @@ DatasetFactory$create <- function(x,
     return(FileSystemDatasetFactory$create(path_and_fs$fs, NULL, 
path_and_fs$path, format))
   }
 
-  if (!is.null(partitioning)) {
-    if (inherits(partitioning, "Schema")) {
-      partitioning <- DirectoryPartitioning$create(partitioning)
-    } else if (is.character(partitioning)) {
-      # These are the column/field names, and we should autodetect their types
-      partitioning <- DirectoryPartitioningFactory$create(partitioning)
+  # Handle partitioning arg in cases where it is "character" or "Schema"
+  if (!is.null(partitioning) && !inherits(partitioning, c("Partitioning", 
"PartitioningFactory"))) {
+    # isFALSE() would be sufficient but that requires R >= 3.5
+    assert_that(is.logical(hive_style), length(hive_style) == 1)
+    if (hive_style %in% c(TRUE, NA)) {
+      # Default is NA, which means check to see if the paths could be 
hive_style
+      hive_factory <- HivePartitioningFactory$create()
+      paths <- path_and_fs$fs$ls(
+        path_and_fs$path,
+        allow_not_found = FALSE,
+        recursive = TRUE
+      )
+      hive_schema <- hive_factory$Inspect(paths)
+      # This is length-0 if there are no hive segments
+      if (is.na(hive_style)) {
+        hive_style <- length(hive_schema) > 0
+      }
+    }
+
+    if (hive_style) {
+      if (is.character(partitioning)) {
+        # These are not needed, the user probably provided them because they
+        # thought they needed to. Just make sure they aren't invalid.
+        if (!identical(names(hive_schema), partitioning)) {
+          stop(
+            '"partitioning" does not match the detected Hive-style partitions: 
',
+            deparse1(names(hive_schema)),

Review comment:
       Done, PTAL @thisisnic 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to