This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 134b65e23f6c15238b839f0f734d72d73fd06802
Author: Antoine Pitrou <anto...@python.org>
AuthorDate: Thu Oct 3 18:37:33 2019 +0200

    ARROW-6613: [C++] Minimize usage of boost::filesystem
    
    Allow building Arrow core (without tests, without filesystem layer) without 
boost dependencies.
    
    The `minimal_build` example produces something like this:
    ```
    # size  /usr/local/lib/libarrow.so
       text        data     bss     dec     hex filename
    4001919       84992    1712 4088623  3e632f /usr/local/lib/libarrow.so
    # ldd  /usr/local/lib/libarrow.so
        linux-vdso.so.1 (0x00007fff45f40000)
        libstdc++.so.6 => /usr/lib/x86_64-linux-gnu/libstdc++.so.6 
(0x00007f7089b08000)
        libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007f708976a000)
        libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 
(0x00007f7089552000)
        libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 
(0x00007f7089333000)
        libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f7088f42000)
        /lib64/ld-linux-x86-64.so.2 (0x00007f708a478000)
    ```
    
    Closes #5545 from pitrou/ARROW-6613-boost-fs-optional and squashes the 
following commits:
    
    4757240b6 <Antoine Pitrou> Address review comments
    6ac957778 <Antoine Pitrou> ARROW-6613:  Minimize usage of boost::filesystem
    
    Authored-by: Antoine Pitrou <anto...@python.org>
    Signed-off-by: Antoine Pitrou <anto...@python.org>
---
 cpp/CMakeLists.txt                         |  15 +-
 cpp/src/arrow/CMakeLists.txt               |   4 +
 cpp/src/arrow/io/hdfs_internal.cc          |  24 +-
 cpp/src/arrow/ipc/json_integration_test.cc |  29 +-
 cpp/src/arrow/util/io_util.cc              | 465 +++++++++++++++++++++++------
 cpp/src/arrow/util/io_util.h               |  31 +-
 cpp/src/arrow/util/io_util_test.cc         | 182 +++++++++++
 7 files changed, 626 insertions(+), 124 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1e49a4b..8015c27 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -673,12 +673,19 @@ if(ARROW_STATIC_LINK_LIBS)
   add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
 endif()
 
-set(ARROW_SHARED_PRIVATE_LINK_LIBS ${ARROW_STATIC_LINK_LIBS} 
${BOOST_FILESYSTEM_LIBRARY}
-                                   ${BOOST_SYSTEM_LIBRARY})
+set(ARROW_SHARED_PRIVATE_LINK_LIBS ${ARROW_STATIC_LINK_LIBS})
 
-list(APPEND ARROW_STATIC_LINK_LIBS ${BOOST_FILESYSTEM_LIBRARY} 
${BOOST_SYSTEM_LIBRARY})
+# Is boost::filesystem needed?
+if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS OR ARROW_FILESYSTEM OR 
ARROW_HDFS)
+  set(ARROW_WITH_BOOST_FILESYSTEM ON)
 
-list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS boost_filesystem boost_system 
boost_regex)
+  list(APPEND ARROW_SHARED_PRIVATE_LINK_LIBS ${BOOST_FILESYSTEM_LIBRARY}
+              ${BOOST_SYSTEM_LIBRARY})
+
+  list(APPEND ARROW_STATIC_LINK_LIBS ${BOOST_FILESYSTEM_LIBRARY} 
${BOOST_SYSTEM_LIBRARY})
+
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS boost_filesystem 
boost_system)
+endif()
 
 if(NOT MSVC)
   list(APPEND ARROW_LINK_LIBS ${CMAKE_DL_LIBS})
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index bf5a77e..8aaf5f5 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -298,6 +298,10 @@ if(ARROW_WITH_URIPARSER)
   list(APPEND ARROW_SRCS util/uri.cc)
 endif()
 
+if(ARROW_WITH_BOOST_FILESYSTEM)
+  add_definitions(-DARROW_WITH_BOOST_FILESYSTEM)
+endif()
+
 if(ARROW_WITH_BROTLI)
   add_definitions(-DARROW_WITH_BROTLI)
   list(APPEND ARROW_SRCS util/compression_brotli.cc)
diff --git a/cpp/src/arrow/io/hdfs_internal.cc 
b/cpp/src/arrow/io/hdfs_internal.cc
index ae2c4f0..0b1bd66 100644
--- a/cpp/src/arrow/io/hdfs_internal.cc
+++ b/cpp/src/arrow/io/hdfs_internal.cc
@@ -41,11 +41,19 @@
 #include <dlfcn.h>
 #endif
 
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
 #include <boost/filesystem.hpp>  // NOLINT
+#endif
 
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
 
+namespace arrow {
+namespace io {
+namespace internal {
+
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
+
 namespace fs = boost::filesystem;
 
 #ifndef _WIN32
@@ -282,10 +290,6 @@ static inline void* GetLibrarySymbol(void* handle, const 
char* symbol) {
         GetLibrarySymbol(SHIM->handle, "" #SYMBOL_NAME); \
   }
 
-namespace arrow {
-namespace io {
-namespace internal {
-
 static LibHdfsShim libhdfs_shim;
 static LibHdfsShim libhdfs3_shim;
 
@@ -570,6 +574,18 @@ Status ConnectLibHdfs3(LibHdfsShim** driver) {
   return shim->GetRequiredSymbols();
 }
 
+#else  // ARROW_WITH_BOOST_FILESYSTEM
+
+Status ConnectLibHdfs(LibHdfsShim** driver) {
+  return Status::NotImplemented("ConnectLibHdfs not available in this Arrow 
build");
+}
+
+Status ConnectLibHdfs3(LibHdfsShim** driver) {
+  return Status::NotImplemented("ConnectLibHdfs3 not available in this Arrow 
build");
+}
+
+#endif
+
 }  // namespace internal
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/json_integration_test.cc 
b/cpp/src/arrow/ipc/json_integration_test.cc
index 931ede3..5598805 100644
--- a/cpp/src/arrow/ipc/json_integration_test.cc
+++ b/cpp/src/arrow/ipc/json_integration_test.cc
@@ -21,14 +21,13 @@
 #include <fstream>  // IWYU pragma: keep
 #include <iostream>
 #include <memory>
+#include <sstream>
 #include <string>
 #include <vector>
 
 #include <gflags/gflags.h>
 #include <gtest/gtest.h>
 
-#include <boost/filesystem.hpp>  // NOLINT
-
 #include "arrow/io/file.h"
 #include "arrow/ipc/json_integration.h"
 #include "arrow/ipc/reader.h"
@@ -38,6 +37,7 @@
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/io_util.h"
 
 DEFINE_string(arrow, "", "Arrow file name");
 DEFINE_string(json, "", "JSON file name");
@@ -47,11 +47,10 @@ DEFINE_string(
 DEFINE_bool(integration, false, "Run in integration test mode");
 DEFINE_bool(verbose, true, "Verbose output");
 
-namespace fs = boost::filesystem;
-
 namespace arrow {
 
 class Buffer;
+using internal::TemporaryDir;
 
 namespace ipc {
 
@@ -228,18 +227,15 @@ Status RunCommand(const std::string& json_path, const 
std::string& arrow_path,
   }
 }
 
-static std::string temp_path() {
-  return (fs::temp_directory_path() / fs::unique_path()).string();
-}
-
 class TestJSONIntegration : public ::testing::Test {
  public:
-  void SetUp() {}
+  void SetUp() { ASSERT_OK(TemporaryDir::Make("json-integration-test-", 
&temp_dir_)); }
 
   std::string mkstemp() {
-    auto path = temp_path();
-    tmp_paths_.push_back(path);
-    return path;
+    std::stringstream ss;
+    ss << temp_dir_->path().ToString();
+    ss << "file" << ntemp_++;
+    return ss.str();
   }
 
   Status WriteJson(const char* data, const std::string& path) {
@@ -251,14 +247,11 @@ class TestJSONIntegration : public ::testing::Test {
     return Status::OK();
   }
 
-  void TearDown() {
-    for (const std::string path : tmp_paths_) {
-      ARROW_UNUSED(std::remove(path.c_str()));
-    }
-  }
+  void TearDown() { temp_dir_.reset(); }
 
  protected:
-  std::vector<std::string> tmp_paths_;
+  std::unique_ptr<TemporaryDir> temp_dir_;
+  int ntemp_ = 1;
 };
 
 static const char* JSON_EXAMPLE = R"example(
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 45f2afd..a2c680e 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -33,6 +33,7 @@
 #include <sstream>
 #include <string>
 #include <utility>
+#include <vector>
 
 #include <fcntl.h>
 #include <signal.h>
@@ -49,7 +50,9 @@
 #define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH
 #endif
 
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
 #include <boost/filesystem.hpp>
+#endif
 
 // ----------------------------------------------------------------------
 // file compatibility stuff
@@ -178,10 +181,24 @@ Status StdinStream::Read(int64_t nbytes, 
std::shared_ptr<Buffer>* out) {
 
 namespace internal {
 
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
 namespace bfs = ::boost::filesystem;
+#endif
 
 namespace {
 
+template <typename CharT>
+std::basic_string<CharT> ReplaceChars(std::basic_string<CharT> s, CharT find, 
CharT rep) {
+  if (find != rep) {
+    for (size_t i = 0; i < s.length(); ++i) {
+      if (s[i] == find) {
+        s[i] = rep;
+      }
+    }
+  }
+  return s;
+}
+
 Status StringToNative(const std::string& s, NativePathString* out) {
 #if _WIN32
   std::wstring ws;
@@ -193,8 +210,73 @@ Status StringToNative(const std::string& s, 
NativePathString* out) {
   return Status::OK();
 }
 
+#if _WIN32
+Status NativeToString(const NativePathString& ws, std::string* out) {
+  std::string s;
+  RETURN_NOT_OK(::arrow::util::WideStringToUTF8(ws, &s));
+  *out = std::move(s);
+  return Status::OK();
+}
+#endif
+
+#if _WIN32
+const wchar_t kNativeSep = L'\\';
+const wchar_t kGenericSep = L'/';
+const wchar_t* kAllSeps = L"\\/";
+#else
+const char kNativeSep = '/';
+const char kGenericSep = '/';
+const char* kAllSeps = "/";
+#endif
+
+NativePathString NativeSlashes(NativePathString s) {
+  return ReplaceChars(std::move(s), kGenericSep, kNativeSep);
+}
+
+NativePathString GenericSlashes(NativePathString s) {
+  return ReplaceChars(std::move(s), kNativeSep, kGenericSep);
+}
+
+NativePathString NativeParent(const NativePathString& s) {
+  auto last_sep = s.find_last_of(kAllSeps);
+  if (last_sep == s.length() - 1) {
+    // Last separator is a trailing separator, skip all trailing separators
+    // and try again
+    auto before_last_seps = s.find_last_not_of(kAllSeps);
+    if (before_last_seps == NativePathString::npos) {
+      // Only separators in path
+      return s;
+    }
+    last_sep = s.find_last_of(kAllSeps, before_last_seps);
+  }
+  if (last_sep == NativePathString::npos) {
+    // No (other) separator in path
+    return s;
+  }
+  // There may be multiple contiguous separators, skip all of them
+  auto before_last_seps = s.find_last_not_of(kAllSeps, last_sep);
+  if (before_last_seps == NativePathString::npos) {
+    // All separators are at start of string, keep them all
+    return s.substr(0, last_sep + 1);
+  } else {
+    return s.substr(0, before_last_seps + 1);
+  }
+}
+
+Status ValidatePath(const std::string& s) {
+  if (s.find_first_of('\0') != std::string::npos) {
+    return Status::Invalid("Embedded NUL char in path: '", s, "'");
+  }
+  return Status::OK();
+}
+
 }  // namespace
 
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
+
+// NOTE: catching filesystem_error gives more context than system::error_code
+// (it includes the file path(s) in the error message)
+
 #define BOOST_FILESYSTEM_TRY try {
 #define BOOST_FILESYSTEM_CATCH           \
   }                                      \
@@ -202,25 +284,11 @@ Status StringToNative(const std::string& s, 
NativePathString* out) {
     return ToStatus(_err);               \
   }
 
-// NOTE: catching filesystem_error gives more context than system::error_code
-// (it includes the file path(s) in the error message)
-
 static Status ToStatus(const bfs::filesystem_error& err) {
   return Status::IOError(err.what());
 }
 
-static std::string MakeRandomName(int num_chars) {
-  static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz";
-  std::random_device gen;
-  std::uniform_int_distribution<int> dist(0, static_cast<int>(chars.length() - 
1));
-
-  std::string s;
-  s.reserve(num_chars);
-  for (int i = 0; i < num_chars; ++i) {
-    s += chars[dist(gen)];
-  }
-  return s;
-}
+#endif  // ARROW_WITH_BOOST_FILESYSTEM
 
 std::string ErrnoMessage(int errnum) { return std::strerror(errnum); }
 
@@ -245,27 +313,32 @@ std::string WinErrorMessage(int errnum) {
 
 struct PlatformFilename::Impl {
   Impl() = default;
-  explicit Impl(bfs::path p) : path(p.make_preferred()) {}
+  explicit Impl(NativePathString p) : native_(NativeSlashes(std::move(p))) {}
 
-  bfs::path path;
+  NativePathString native_;
+
+  // '/'-separated
+  NativePathString generic() const { return GenericSlashes(native_); }
+
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
+  bfs::path boost_path() const { return bfs::path(native_); }
+#endif
 };
 
 PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
 
 PlatformFilename::~PlatformFilename() {}
 
-PlatformFilename::PlatformFilename(const Impl& impl) : impl_(new Impl(impl)) {}
-
-PlatformFilename::PlatformFilename(Impl&& impl) : impl_(new 
Impl(std::move(impl))) {}
+PlatformFilename::PlatformFilename(Impl impl) : impl_(new 
Impl(std::move(impl))) {}
 
 PlatformFilename::PlatformFilename(const PlatformFilename& other)
-    : PlatformFilename(Impl{other.impl_->path}) {}
+    : PlatformFilename(Impl{other.impl_->native_}) {}
 
 PlatformFilename::PlatformFilename(PlatformFilename&& other)
     : impl_(std::move(other.impl_)) {}
 
 PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
-  this->impl_.reset(new Impl{other.impl_->path});
+  this->impl_.reset(new Impl{other.impl_->native_});
   return *this;
 }
 
@@ -277,15 +350,20 @@ PlatformFilename& 
PlatformFilename::operator=(PlatformFilename&& other) {
 PlatformFilename::PlatformFilename(const NativePathString& path)
     : PlatformFilename(Impl{path}) {}
 
-const NativePathString& PlatformFilename::ToNative() const {
-  return impl_->path.native();
+bool PlatformFilename::operator==(const PlatformFilename& other) const {
+  return impl_->native_ == other.impl_->native_;
+}
+
+bool PlatformFilename::operator!=(const PlatformFilename& other) const {
+  return impl_->native_ != other.impl_->native_;
 }
 
+const NativePathString& PlatformFilename::ToNative() const { return 
impl_->native_; }
+
 std::string PlatformFilename::ToString() const {
 #if _WIN32
-  std::wstring ws = impl_->path.generic_wstring();
   std::string s;
-  Status st = ::arrow::util::WideStringToUTF8(ws, &s);
+  Status st = NativeToString(impl_->generic(), &s);
   if (!st.ok()) {
     std::stringstream ss;
     ss << "<Unrepresentable filename: " << st.ToString() << ">";
@@ -293,14 +371,16 @@ std::string PlatformFilename::ToString() const {
   }
   return s;
 #else
-  return impl_->path.generic_string();
+  return impl_->generic();
 #endif
 }
 
+PlatformFilename PlatformFilename::Parent() const {
+  return PlatformFilename(NativeParent(ToNative()));
+}
+
 Status PlatformFilename::FromString(const std::string& file_name, 
PlatformFilename* out) {
-  if (file_name.find_first_of('\0') != std::string::npos) {
-    return Status::Invalid("Embedded NUL char in file name: '", file_name, 
"'");
-  }
+  RETURN_NOT_OK(ValidatePath(file_name));
   NativePathString ns;
   RETURN_NOT_OK(StringToNative(file_name, &ns));
   *out = PlatformFilename(std::move(ns));
@@ -309,38 +389,93 @@ Status PlatformFilename::FromString(const std::string& 
file_name, PlatformFilena
 
 Status PlatformFilename::Join(const std::string& child_name,
                               PlatformFilename* out) const {
-  NativePathString ns;
-  RETURN_NOT_OK(StringToNative(child_name, &ns));
-  auto path = impl_->path / ns;
-  *out = PlatformFilename(Impl{std::move(path)});
-  return Status::OK();
+  PlatformFilename child;
+  RETURN_NOT_OK(PlatformFilename::FromString(child_name, &child));
+  if (impl_->native_.empty() || impl_->native_.back() == kNativeSep) {
+    *out = PlatformFilename(Impl{impl_->native_ + child.impl_->native_});
+    return Status::OK();
+  } else {
+    *out = PlatformFilename(Impl{impl_->native_ + kNativeSep + 
child.impl_->native_});
+    return Status::OK();
+  }
+}
+
+Status FileNameFromString(const std::string& file_name, PlatformFilename* out) 
{
+  return PlatformFilename::FromString(file_name, out);
+}
+
+//
+// Filesystem access routines
+//
+
+namespace {
+
+Status DoCreateDir(const PlatformFilename& dir_path, bool create_parents, 
bool* created) {
+#ifdef _WIN32
+  if (CreateDirectoryW(dir_path.ToNative().c_str(), nullptr)) {
+    *created = true;
+    return Status::OK();
+  }
+  int errnum = GetLastError();
+  if (errnum == ERROR_ALREADY_EXISTS) {
+    *created = false;
+    return Status::OK();
+  }
+  if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
+    auto parent_path = dir_path.Parent();
+    if (parent_path != dir_path) {
+      RETURN_NOT_OK(DoCreateDir(parent_path, create_parents, created));
+      return DoCreateDir(dir_path, false, created);  // Retry
+    }
+  }
+  return Status::IOError("Cannot create directory '", dir_path.ToString(),
+                         "': ", WinErrorMessage(errnum));
+#else
+  if (mkdir(dir_path.ToNative().c_str(), S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
+    *created = true;
+    return Status::OK();
+  }
+  if (errno == EEXIST) {
+    *created = false;
+    return Status::OK();
+  }
+  if (create_parents && errno == ENOENT) {
+    auto parent_path = dir_path.Parent();
+    if (parent_path != dir_path) {
+      RETURN_NOT_OK(DoCreateDir(parent_path, create_parents, created));
+      return DoCreateDir(dir_path, false, created);  // Retry
+    }
+  }
+  return Status::IOError("Cannot create directory '", dir_path.ToString(),
+                         "': ", ErrnoMessage(errno));
+#endif
 }
 
+}  // namespace
+
 Status CreateDir(const PlatformFilename& dir_path, bool* created) {
-  bool res;
-  BOOST_FILESYSTEM_TRY
-  res = bfs::create_directory(dir_path.impl_->path);
-  BOOST_FILESYSTEM_CATCH
+  bool did_create = false;
+  RETURN_NOT_OK(DoCreateDir(dir_path, false, &did_create));
   if (created) {
-    *created = res;
+    *created = did_create;
   }
   return Status::OK();
 }
 
 Status CreateDirTree(const PlatformFilename& dir_path, bool* created) {
-  bool res;
-  BOOST_FILESYSTEM_TRY
-  res = bfs::create_directories(dir_path.impl_->path);
-  BOOST_FILESYSTEM_CATCH
+  bool did_create = false;
+  RETURN_NOT_OK(DoCreateDir(dir_path, true, &did_create));
   if (created) {
-    *created = res;
+    *created = did_create;
   }
   return Status::OK();
 }
 
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
+
 Status DeleteDirTree(const PlatformFilename& dir_path, bool* deleted) {
   BOOST_FILESYSTEM_TRY
-  const auto& path = dir_path.impl_->path;
+  const auto& path = dir_path.impl()->boost_path();
   // XXX There is a race here.
   auto st = bfs::symlink_status(path);
   if (st.type() != bfs::file_not_found && st.type() != bfs::directory_file) {
@@ -356,7 +491,7 @@ Status DeleteDirTree(const PlatformFilename& dir_path, 
bool* deleted) {
 
 Status DeleteDirContents(const PlatformFilename& dir_path, bool* deleted) {
   BOOST_FILESYSTEM_TRY
-  const auto& path = dir_path.impl_->path;
+  const auto& path = dir_path.impl()->boost_path();
   // XXX There is a race here.
   auto st = bfs::symlink_status(path);
   if (st.type() == bfs::file_not_found) {
@@ -380,41 +515,74 @@ Status DeleteDirContents(const PlatformFilename& 
dir_path, bool* deleted) {
   return Status::OK();
 }
 
+#else  // ARROW_WITH_BOOST_FILESYSTEM
+
+Status DeleteDirTree(const PlatformFilename& dir_path, bool* deleted) {
+  return Status::NotImplemented("DeleteDirTree not available in this Arrow 
build");
+}
+
+Status DeleteDirContents(const PlatformFilename& dir_path, bool* deleted) {
+  return Status::NotImplemented("DeleteDirContents not available in this Arrow 
build");
+}
+
+#endif
+
 Status DeleteFile(const PlatformFilename& file_path, bool* deleted) {
-  BOOST_FILESYSTEM_TRY
-  bool res = false;
-  const auto& path = file_path.impl_->path;
-  // XXX There is a race here, and boost::filesystem doesn't allow deleting
-  // only files and not empty directories.
-  auto st = bfs::symlink_status(path);
-  if (!bfs::is_directory(st)) {
-    res = bfs::remove(path);
+  bool did_delete = false;
+#ifdef _WIN32
+  if (DeleteFileW(file_path.ToNative().c_str())) {
+    did_delete = true;
+  } else {
+    int errnum = GetLastError();
+    if (errnum != ERROR_FILE_NOT_FOUND) {
+      return Status::IOError("Cannot delete file '", file_path.ToString(),
+                             "': ", WinErrorMessage(errnum));
+    }
+  }
+#else
+  if (unlink(file_path.ToNative().c_str()) == 0) {
+    did_delete = true;
   } else {
-    return Status::IOError("Cannot delete directory '", path.string(), "'");
+    if (errno != ENOENT) {
+      return Status::IOError("Cannot delete file '", file_path.ToString(),
+                             "': ", ErrnoMessage(errno));
+    }
   }
+#endif
   if (deleted) {
-    *deleted = res;
+    *deleted = did_delete;
   }
-  BOOST_FILESYSTEM_CATCH
   return Status::OK();
 }
 
 Status FileExists(const PlatformFilename& path, bool* out) {
-  BOOST_FILESYSTEM_TRY
-  *out = bfs::exists(path.impl_->path);
-  BOOST_FILESYSTEM_CATCH
+#ifdef _WIN32
+  if (GetFileAttributesW(path.ToNative().c_str()) != INVALID_FILE_ATTRIBUTES) {
+    *out = true;
+  } else {
+    int errnum = GetLastError();
+    if (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND) {
+      return Status::IOError("Failed getting information for path '", 
path.ToString(),
+                             "': ", WinErrorMessage(errnum));
+    }
+    *out = false;
+  }
+#else
+  struct stat st;
+  if (stat(path.ToNative().c_str(), &st) == 0) {
+    *out = true;
+  } else {
+    if (errno != ENOENT && errno != ENOTDIR) {
+      return Status::IOError("Failed getting information for path '", 
path.ToString(),
+                             "': ", ErrnoMessage(errno));
+    }
+    *out = false;
+  }
+#endif
   return Status::OK();
 }
 
 //
-// File name handling
-//
-
-Status FileNameFromString(const std::string& file_name, PlatformFilename* out) 
{
-  return PlatformFilename::FromString(file_name, out);
-}
-
-//
 // Functions for creating file descriptors
 //
 
@@ -862,6 +1030,28 @@ Status GetEnvVar(const std::string& name, std::string* 
out) {
   return GetEnvVar(name.c_str(), out);
 }
 
+#ifdef _WIN32
+Status GetEnvVar(const std::string& name, NativePathString* out) {
+  NativePathString w_name;
+  constexpr int32_t bufsize = 2000;
+  wchar_t w_str[bufsize];
+
+  RETURN_NOT_OK(StringToNative(name, &w_name));
+  auto res = GetEnvironmentVariableW(w_name.c_str(), w_str, bufsize);
+  if (res >= bufsize) {
+    return Status::CapacityError("environment variable value too long");
+  } else if (res == 0) {
+    return Status::KeyError("environment variable undefined");
+  }
+  *out = NativePathString(w_str);
+  return Status::OK();
+}
+
+Status GetEnvVar(const char* name, NativePathString* out) {
+  return GetEnvVar(std::string(name), out);
+}
+#endif
+
 Status SetEnvVar(const char* name, const char* value) {
 #ifdef _WIN32
   if (SetEnvironmentVariableA(name, value)) {
@@ -900,33 +1090,134 @@ Status DelEnvVar(const char* name) {
 
 Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); }
 
-TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
+//
+// Temporary directories
+//
 
-TemporaryDir::~TemporaryDir() {
-  Status st = DeleteDirTree(path_);
-  if (!st.ok()) {
-    ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st;
+#ifdef ARROW_WITH_BOOST_FILESYSTEM
+
+namespace {
+
+#if _WIN32
+NativePathString GetWindowsDirectoryPath() {
+  auto size = GetWindowsDirectoryW(nullptr, 0);
+  ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
+  std::vector<wchar_t> w_str(size);
+  size = GetWindowsDirectoryW(w_str.data(), size);
+  ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
+  return {w_str.data(), size};
+}
+#endif
+
+// Return a list of preferred locations for temporary files
+std::vector<NativePathString> GetPlatformTemporaryDirs() {
+  struct TempDirSelector {
+    std::string env_var;
+    NativePathString path_append;
+  };
+
+  std::vector<TempDirSelector> selectors;
+  NativePathString fallback_tmp;
+
+#if _WIN32
+  selectors = {
+      {"TMP", L""}, {"TEMP", L""}, {"LOCALAPPDATA", L"Temp"}, {"USERPROFILE", 
L"Temp"}};
+  fallback_tmp = GetWindowsDirectoryPath();
+
+#else
+  selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
+#ifdef __ANDROID__
+  fallback_tmp = "/data/local/tmp";
+#else
+  fallback_tmp = "/tmp";
+#endif
+#endif
+
+  std::vector<NativePathString> temp_dirs;
+  for (const auto& sel : selectors) {
+    NativePathString p;
+    Status st = GetEnvVar(sel.env_var, &p);
+    if (st.IsKeyError()) {
+      // Environment variable absent, skip
+      continue;
+    }
+    if (!st.ok()) {
+      ARROW_LOG(WARNING) << "Failed getting env var '" << sel.env_var
+                         << "': " << st.ToString();
+      continue;
+    }
+    if (p.empty()) {
+      // Environment variable set to empty string, skip
+      continue;
+    }
+    if (sel.path_append.empty()) {
+      temp_dirs.push_back(p);
+    } else {
+      temp_dirs.push_back(p + kNativeSep + sel.path_append);
+    }
+  }
+  temp_dirs.push_back(fallback_tmp);
+  return temp_dirs;
+}
+
+std::string MakeRandomName(int num_chars) {
+  static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz";
+  std::random_device gen;
+  std::uniform_int_distribution<int> dist(0, static_cast<int>(chars.length() - 
1));
+
+  std::string s;
+  s.reserve(num_chars);
+  for (int i = 0; i < num_chars; ++i) {
+    s += chars[dist(gen)];
   }
+  return s;
 }
+}  // namespace
 
 Status TemporaryDir::Make(const std::string& prefix, 
std::unique_ptr<TemporaryDir>* out) {
-  bfs::path path;
   std::string suffix = MakeRandomName(8);
+  NativePathString base_name;
+  RETURN_NOT_OK(StringToNative(prefix + suffix, &base_name));
+
+  auto base_dirs = GetPlatformTemporaryDirs();
+  DCHECK_NE(base_dirs.size(), 0);
+
+  auto st = Status::OK();
+  for (const auto& p : base_dirs) {
+    PlatformFilename fn(p + kNativeSep + base_name + kNativeSep);
+    bool created = false;
+    st = CreateDir(fn, &created);
+    if (!st.ok()) {
+      continue;
+    }
+    if (!created) {
+      // XXX Should we retry with another random name?
+      return Status::IOError("Path already exists: '", fn.ToString(), "'");
+    } else {
+      out->reset(new TemporaryDir(std::move(fn)));
+      return Status::OK();
+    }
+  }
 
-  BOOST_FILESYSTEM_TRY
-  path = bfs::temp_directory_path() / (prefix + suffix);
-  path += "/";
-  BOOST_FILESYSTEM_CATCH
+  DCHECK(!st.ok());
+  return st;
+}
 
-  PlatformFilename fn(path.native());
-  bool created = false;
-  RETURN_NOT_OK(CreateDir(fn, &created));
-  if (!created) {
-    // XXX Should we retry?
-    return Status::IOError("Path already exists: '", fn.ToString(), "'");
+#else  // ARROW_WITH_BOOST_FILESYSTEM
+
+Status TemporaryDir::Make(const std::string& prefix, 
std::unique_ptr<TemporaryDir>* out) {
+  return Status::NotImplemented("TemporaryDir not available in this Arrow 
build");
+}
+
+#endif
+
+TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
+
+TemporaryDir::~TemporaryDir() {
+  Status st = DeleteDirTree(path_);
+  if (!st.ok()) {
+    ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st;
   }
-  out->reset(new TemporaryDir(std::move(fn)));
-  return Status::OK();
 }
 
 SignalHandler::SignalHandler() : SignalHandler(static_cast<Callback>(nullptr)) 
{}
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index 1464aaf..b0fd11c 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -113,6 +113,8 @@ using NativePathString = std::string;
 
 class ARROW_EXPORT PlatformFilename {
  public:
+  struct Impl;
+
   ~PlatformFilename();
   PlatformFilename();
   PlatformFilename(const PlatformFilename&);
@@ -124,24 +126,22 @@ class ARROW_EXPORT PlatformFilename {
   const NativePathString& ToNative() const;
   std::string ToString() const;
 
+  PlatformFilename Parent() const;
+
   // These functions can fail for character encoding reasons.
   static Status FromString(const std::string& file_name, PlatformFilename* 
out);
   Status Join(const std::string& child_name, PlatformFilename* out) const;
 
+  bool operator==(const PlatformFilename& other) const;
+  bool operator!=(const PlatformFilename& other) const;
+
+  // Made public to avoid the proliferation of friend declarations.
+  const Impl* impl() const { return impl_.get(); }
+
  private:
-  struct Impl;
   std::unique_ptr<Impl> impl_;
 
-  explicit PlatformFilename(const Impl& impl);
-  explicit PlatformFilename(Impl&& impl);
-
-  // Those functions need access to the embedded path object
-  friend ARROW_EXPORT Status CreateDir(const PlatformFilename&, bool*);
-  friend ARROW_EXPORT Status CreateDirTree(const PlatformFilename&, bool*);
-  friend ARROW_EXPORT Status DeleteDirContents(const PlatformFilename&, bool*);
-  friend ARROW_EXPORT Status DeleteDirTree(const PlatformFilename&, bool*);
-  friend ARROW_EXPORT Status DeleteFile(const PlatformFilename&, bool*);
-  friend ARROW_EXPORT Status FileExists(const PlatformFilename&, bool*);
+  explicit PlatformFilename(Impl impl);
 };
 
 ARROW_EXPORT
@@ -199,6 +199,12 @@ ARROW_EXPORT
 Status GetEnvVar(const char* name, std::string* out);
 ARROW_EXPORT
 Status GetEnvVar(const std::string& name, std::string* out);
+#ifdef _WIN32
+ARROW_EXPORT
+Status GetEnvVar(const char* name, NativePathString* out);
+ARROW_EXPORT
+Status GetEnvVar(const std::string& name, NativePathString* out);
+#endif
 ARROW_EXPORT
 Status SetEnvVar(const char* name, const char* value);
 ARROW_EXPORT
@@ -219,8 +225,11 @@ class ARROW_EXPORT TemporaryDir {
  public:
   ~TemporaryDir();
 
+  /// '/'-terminated path to the temporary dir
   const PlatformFilename& path() { return path_; }
 
+  /// Create a temporary subdirectory in the system temporary dir,
+  /// named starting with `prefix`.
   static Status Make(const std::string& prefix, std::unique_ptr<TemporaryDir>* 
out);
 
  private:
diff --git a/cpp/src/arrow/util/io_util_test.cc 
b/cpp/src/arrow/util/io_util_test.cc
index 23001f0..016f715 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -77,6 +77,188 @@ TEST(PlatformFilename, Invalid) {
   ASSERT_RAISES(Invalid, PlatformFilename::FromString(s, &fn));
 }
 
+TEST(PlatformFilename, Join) {
+  PlatformFilename fn, joined;
+  ASSERT_OK(PlatformFilename::FromString("a/b", &fn));
+  ASSERT_OK(fn.Join("c/d", &joined));
+  ASSERT_EQ(joined.ToString(), "a/b/c/d");
+#if _WIN32
+  ASSERT_EQ(joined.ToNative(), L"a\\b\\c\\d");
+#else
+  ASSERT_EQ(joined.ToNative(), "a/b/c/d");
+#endif
+
+  ASSERT_OK(PlatformFilename::FromString("a/b/", &fn));
+  ASSERT_OK(fn.Join("c/d", &joined));
+  ASSERT_EQ(joined.ToString(), "a/b/c/d");
+#if _WIN32
+  ASSERT_EQ(joined.ToNative(), L"a\\b\\c\\d");
+#else
+  ASSERT_EQ(joined.ToNative(), "a/b/c/d");
+#endif
+
+  ASSERT_OK(PlatformFilename::FromString("", &fn));
+  ASSERT_OK(fn.Join("c/d", &joined));
+  ASSERT_EQ(joined.ToString(), "c/d");
+#if _WIN32
+  ASSERT_EQ(joined.ToNative(), L"c\\d");
+#else
+  ASSERT_EQ(joined.ToNative(), "c/d");
+#endif
+
+#if _WIN32
+  ASSERT_OK(PlatformFilename::FromString("a\\b", &fn));
+  ASSERT_OK(fn.Join("c\\d", &joined));
+  ASSERT_EQ(joined.ToString(), "a/b/c/d");
+  ASSERT_EQ(joined.ToNative(), L"a\\b\\c\\d");
+
+  ASSERT_OK(PlatformFilename::FromString("a\\b\\", &fn));
+  ASSERT_OK(fn.Join("c\\d", &joined));
+  ASSERT_EQ(joined.ToString(), "a/b/c/d");
+  ASSERT_EQ(joined.ToNative(), L"a\\b\\c\\d");
+#endif
+}
+
+TEST(PlatformFilename, JoinInvalid) {
+  PlatformFilename fn, joined;
+  ASSERT_OK(PlatformFilename::FromString("a/b", &fn));
+  std::string s = "foo";
+  s += '\x00';
+  ASSERT_RAISES(Invalid, fn.Join(s, &joined));
+}
+
+TEST(PlatformFilename, Parent) {
+  PlatformFilename fn;
+
+  // Relative
+  ASSERT_OK(PlatformFilename::FromString("ab/cd", &fn));
+  ASSERT_EQ(fn.ToString(), "ab/cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+#if _WIN32
+  ASSERT_OK(PlatformFilename::FromString("ab/cd\\ef", &fn));
+  ASSERT_EQ(fn.ToString(), "ab/cd/ef");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab/cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+#endif
+
+  // Absolute
+  ASSERT_OK(PlatformFilename::FromString("/ab/cd/ef", &fn));
+  ASSERT_EQ(fn.ToString(), "/ab/cd/ef");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab/cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/");
+#if _WIN32
+  ASSERT_OK(PlatformFilename::FromString("\\ab\\cd/ef", &fn));
+  ASSERT_EQ(fn.ToString(), "/ab/cd/ef");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab/cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/");
+#endif
+
+  // Empty
+  ASSERT_OK(PlatformFilename::FromString("", &fn));
+  ASSERT_EQ(fn.ToString(), "");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "");
+
+  // Multiple separators, relative
+  ASSERT_OK(PlatformFilename::FromString("ab//cd///ef", &fn));
+  ASSERT_EQ(fn.ToString(), "ab//cd///ef");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab//cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+#if _WIN32
+  ASSERT_OK(PlatformFilename::FromString("ab\\\\cd\\\\\\ef", &fn));
+  ASSERT_EQ(fn.ToString(), "ab//cd///ef");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab//cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab");
+#endif
+
+  // Multiple separators, absolute
+  ASSERT_OK(PlatformFilename::FromString("//ab//cd///ef", &fn));
+  ASSERT_EQ(fn.ToString(), "//ab//cd///ef");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//ab//cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//");
+#if _WIN32
+  ASSERT_OK(PlatformFilename::FromString("\\\\ab\\cd\\ef", &fn));
+  ASSERT_EQ(fn.ToString(), "//ab/cd/ef");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//ab/cd");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//ab");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "//");
+#endif
+
+  // Trailing slashes
+  ASSERT_OK(PlatformFilename::FromString("/ab/cd/ef/", &fn));
+  ASSERT_EQ(fn.ToString(), "/ab/cd/ef/");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab/cd");
+  ASSERT_OK(PlatformFilename::FromString("/ab/cd/ef//", &fn));
+  ASSERT_EQ(fn.ToString(), "/ab/cd/ef//");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab/cd");
+  ASSERT_OK(PlatformFilename::FromString("ab/", &fn));
+  ASSERT_EQ(fn.ToString(), "ab/");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab/");
+  ASSERT_OK(PlatformFilename::FromString("ab//", &fn));
+  ASSERT_EQ(fn.ToString(), "ab//");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab//");
+#if _WIN32
+  ASSERT_OK(PlatformFilename::FromString("\\ab\\cd\\ef\\", &fn));
+  ASSERT_EQ(fn.ToString(), "/ab/cd/ef/");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab/cd");
+  ASSERT_OK(PlatformFilename::FromString("\\ab\\cd\\ef\\\\", &fn));
+  ASSERT_EQ(fn.ToString(), "/ab/cd/ef//");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "/ab/cd");
+  ASSERT_OK(PlatformFilename::FromString("ab\\", &fn));
+  ASSERT_EQ(fn.ToString(), "ab/");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab/");
+  ASSERT_OK(PlatformFilename::FromString("ab\\\\", &fn));
+  ASSERT_EQ(fn.ToString(), "ab//");
+  fn = fn.Parent();
+  ASSERT_EQ(fn.ToString(), "ab//");
+#endif
+}
+
 TEST(CreateDirDeleteDir, Basics) {
   const std::string BASE = "xxx-io-util-test-dir";
   bool created, deleted;

Reply via email to