westonpace commented on code in PR #14132:
URL: https://github.com/apache/arrow/pull/14132#discussion_r978942979


##########
cpp/src/arrow/filesystem/filesystem_test.cc:
##########
@@ -308,28 +308,27 @@ TEST(InternalUtil, GlobFiles) {
     ASSERT_EQ(actual, expected);
   };
 
-  ASSERT_OK(fs->CreateDir("A/CD"));
-  ASSERT_OK(fs->CreateDir("AB/CD"));
-  ASSERT_OK(fs->CreateDir("AB/CD/ab"));
-  CreateFile(fs.get(), "A/CD/ab.txt", "data");
-  CreateFile(fs.get(), "AB/CD/a.txt", "data");
-  CreateFile(fs.get(), "AB/CD/abc.txt", "data");
-  CreateFile(fs.get(), "AB/CD/ab/c.txt", "data");
+  ASSERT_OK(fs->CreateDir(base_dir + "A/CD"));
+  ASSERT_OK(fs->CreateDir(base_dir + "AB/CD"));
+  ASSERT_OK(fs->CreateDir(base_dir + "AB/CD/ab"));
+  CreateFile(fs.get(), base_dir + "A/CD/ab.txt", "data");
+  CreateFile(fs.get(), base_dir + "AB/CD/a.txt", "data");
+  CreateFile(fs.get(), base_dir + "AB/CD/abc.txt", "data");
+  CreateFile(fs.get(), base_dir + "AB/CD/ab/c.txt", "data");
 
   FileInfoVector infos;
-  ASSERT_OK_AND_ASSIGN(infos, GlobFiles(fs, "A*/CD/?b*.txt"));
+  ASSERT_OK_AND_ASSIGN(infos, GlobFiles(fs, base_dir + "A*/CD/?b*.txt"));
   ASSERT_EQ(infos.size(), 2);
-  check_entries(infos, {"A/CD/ab.txt", "AB/CD/abc.txt"});
+  check_entries(infos, {base_dir + "A/CD/ab.txt", base_dir + "AB/CD/abc.txt"});
 
-  // Leading slash is optional but doesn't change behavior
-  ASSERT_OK_AND_ASSIGN(infos, GlobFiles(fs, "/A*/CD/?b*.txt"));
-  ASSERT_EQ(infos.size(), 2);
-  check_entries(infos, {"A/CD/ab.txt", "AB/CD/abc.txt"});

Review Comment:
   Yes, that seems fine to me.



##########
cpp/src/arrow/engine/substrait/serde_test.cc:
##########
@@ -3078,5 +3089,97 @@ TEST(Substrait, AggregateRelEmit) {
                        buf, {}, conversion_options);
 }
 
+TEST(Substrait, ReadRelWithGlobFiles) {
+#ifdef _WIN32
+  GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows";
+#endif
+  compute::ExecContext exec_context;
+  arrow::dataset::internal::Initialize();
+
+  auto dummy_schema =
+      schema({field("A", int32()), field("B", int32()), field("C", int32())});
+
+  // creating a dummy dataset using a dummy table
+  auto table_1 = TableFromJSON(dummy_schema, {R"([
+      [1, 1, 10],
+      [3, 4, 20]
+    ])"});
+  auto table_2 = TableFromJSON(dummy_schema, {R"([
+      [11, 11, 110],
+      [13, 14, 120]
+    ])"});
+  auto table_3 = TableFromJSON(dummy_schema, {R"([
+      [21, 21, 210],
+      [23, 24, 220]
+    ])"});
+  auto expected_table = TableFromJSON(dummy_schema, {R"([
+      [1, 1, 10],
+      [3, 4, 20],
+      [11, 11, 110],
+      [13, 14, 120],
+      [21, 21, 210],
+      [23, 24, 220]
+    ])"});
+
+  std::vector<std::shared_ptr<Table>> input_tables = {table_1, table_2, 
table_3};
+  auto format = std::make_shared<arrow::dataset::IpcFileFormat>();
+  auto filesystem = std::make_shared<fs::LocalFileSystem>();
+  const std::vector<std::string> file_names = {"serde_test_1.arrow", 
"serde_test_2.arrow",
+                                               "serde_test_3.arrow"};
+
+  const std::string path_prefix = "substrait-globfiles-";
+  std::string glob_like_path;
+  int idx = 0;
+
+  // creating a vector to avoid out-of-scoping Temporary directory
+  // if out-of-scoped the written folder get wiped out
+  std::vector<std::unique_ptr<arrow::internal::TemporaryDir>> tempdirs;
+  for (size_t i = 0; i < file_names.size(); i++) {
+    ASSERT_OK_AND_ASSIGN(auto tempdir, 
arrow::internal::TemporaryDir::Make(path_prefix));
+    tempdirs.push_back(std::move(tempdir));
+  }
+  for (const auto& file_name : file_names) {

Review Comment:
   ```suggestion
     }
     
     std::string sample_tempdir_path = tempdirs[0]->path();
     std::string base_tempdir_path = sample_tempdir_path.substr(0, 
sample_tempdir_path.find(path_prefix));
     std::string glob_like_path = "file://" + base_tempdir_path + path_prefix + 
"*/serde_test_*.arrow";
     
     for (const auto& file_name : file_names) {
   ```
   Nit: what you have works but I think it might be a little readable / clearer 
to calculate `glob_like_path` outside of the for loop.



##########
cpp/src/arrow/filesystem/path_util.h:
##########
@@ -75,6 +75,9 @@ util::string_view RemoveTrailingSlash(util::string_view s);
 ARROW_EXPORT
 Status AssertNoTrailingSlash(util::string_view s);
 
+ARROW_EXPORT
+Status AssertLeadingSlash(util::string_view s);

Review Comment:
   Yes, it should probably return a bool and be named `HasLeadingSlash` but, at 
that point, the method is just...
   
   ```
   bool HasLeadingSlash(util::string_view key) {
     return key.front() == '/';
   }
   ```
   
   ...and I'm not really sure such a method would be really needed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to