paleolimbot commented on code in PR #2918:
URL: https://github.com/apache/arrow-adbc/pull/2918#discussion_r2127436917


##########
c/vendor/toml++/toml.hpp:
##########


Review Comment:
   Just pointing out the change in scope introduced here (the current driver 
manager is ~2000 lines, and this is ~18,000 lines of vendored parser). Opting 
out of the driver manager is pretty easy (e.g., how we were able to get GDAL to 
support ADBC in its default build) and so I think this is OK, but there's a 
chance it could result in more opting out of the driver manager.



##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -138,6 +329,174 @@ struct ManagedLibrary {
     // release() from the DLL - how to handle this?
   }
 
+  AdbcStatusCode GetDriverInfo(const std::string_view driver_name,
+                               const AdbcLoadOption load_options, DriverInfo& 
info,
+                               struct AdbcError* error) {
+    if (driver_name.empty()) {
+      SetError(error, "[DriverManager] Driver name is empty");
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    std::filesystem::path driver_path(driver_name);
+    const bool allow_relative_paths =
+        load_options & ADBC_LOAD_OPTION_ALLOW_RELATIVE_PATHS;
+    if (driver_path.has_extension()) {
+      if (driver_path.is_relative() && !allow_relative_paths) {
+        SetError(
+            error,
+            "[DriverManager] Driver path is relative and relative paths are 
not allowed");
+        return ADBC_STATUS_INVALID_ARGUMENT;
+      }
+
+      if (HasExtension(driver_path, ".toml")) {
+        // if the extension is .toml, attempt to load the manifest
+        // erroring if we fail
+
+        auto status = LoadDriverManifest(driver_path, info, error);
+        if (status == ADBC_STATUS_OK) {
+          return Load(info.lib_path.c_str(), error);
+        }
+        return status;
+      }
+
+      // if the extension is not .toml, then just try to load the provided
+      // path as if it was an absolute path to a driver library
+      return Load(driver_path.string().c_str(), error);
+    }
+
+    if (driver_path.is_absolute()) {
+      // if we have an absolute path without an extension, first see if 
there's a
+      // toml file with the same name.
+      driver_path.replace_extension(".toml");
+      if (std::filesystem::exists(driver_path)) {
+        auto status = LoadDriverManifest(driver_path, info, error);
+        if (status == ADBC_STATUS_OK) {
+          return Load(info.lib_path.c_str(), error);
+        }
+      }
+
+      // otherwise just try to load the provided path as if it was an absolute 
path
+      return Load(driver_name.data(), error);
+    }
+
+    if (driver_path.has_extension()) {
+      if (driver_path.is_relative() &&
+          (load_options & ADBC_LOAD_OPTION_ALLOW_RELATIVE_PATHS) == 0) {
+        SetError(
+            error,
+            "[DriverManager] Driver path is relative and relative paths are 
not allowed");
+        return ADBC_STATUS_INVALID_ARGUMENT;
+      }
+
+#if defined(_WIN32)
+      static const std::wstring kPlatformLibrarySuffix = ".dll";
+#elif defined(__APPLE__)
+      static const std::string kPlatformLibrarySuffix = ".dylib";
+#else
+      static const std::string kPlatformLibrarySuffix = ".so";
+#endif
+      if (HasExtension(driver_path, kPlatformLibrarySuffix)) {
+        return Load(driver_path.string().c_str(), error);
+      }
+
+      SetError(error, "[DriverManager] Driver name has unrecognized extension: 
" +
+                          driver_path.extension().string());
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    // not an absolute path, no extension. Let's search the configured paths
+    // based on the options
+    return FindDriver(driver_path, load_options, info, error);
+  }
+
+  AdbcStatusCode SearchPaths(const std::filesystem::path& driver_path,
+                             const std::vector<std::filesystem::path>& 
search_paths,
+                             DriverInfo& info, struct AdbcError* error) {
+    for (const auto& search_path : search_paths) {
+      std::filesystem::path full_path = search_path / driver_path;
+
+      // check for toml first, then dll
+      full_path.replace_extension(".toml");
+      if (std::filesystem::exists(full_path)) {
+        auto status = LoadDriverManifest(full_path, info, nullptr);
+        if (status == ADBC_STATUS_OK) {
+          return Load(info.lib_path.c_str(), error);
+        }
+      }
+
+      full_path.replace_extension("");  // remove the .toml extension
+      auto status = Load(full_path.string().c_str(), nullptr);
+      if (status == ADBC_STATUS_OK) {
+        return status;
+      }
+    }
+
+    return ADBC_STATUS_NOT_FOUND;
+  }
+
+  AdbcStatusCode FindDriver(const std::filesystem::path& driver_path,
+                            const AdbcLoadOption load_options, DriverInfo& 
info,
+                            struct AdbcError* error) {
+    if (driver_path.empty()) {
+      SetError(error, "[DriverManager] Driver path is empty");
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+#ifdef _WIN32
+    // windows is slightly more complex since we also need to check registry 
keys
+    // so we can't just grab the search paths only.
+    if (load_options & ADBC_LOAD_OPTION_SEARCH_ENV) {
+      auto search_paths = GetSearchPaths(ADBC_LOAD_OPTION_SEARCH_ENV);
+      auto status = SearchPaths(driver_path, search_paths, info, error);
+      if (status == ADBC_STATUS_OK) {
+        return status;
+      }
+    }
+
+    if (load_options & ADBC_LOAD_OPTION_SEARCH_USER) {
+      // Check the user registry for the driver
+      auto status =
+          LoadDriverFromRegistry(HKEY_CURRENT_USER, driver_path.string(), 
info, error);
+      if (status == ADBC_STATUS_OK) {
+        return Load(info.lib_path.c_str(), error);
+      }
+
+      status = SearchPaths(driver_path, 
GetSearchPaths(ADBC_LOAD_OPTION_SEARCH_USER),
+                           info, error);
+      if (status == ADBC_STATUS_OK) {
+        return status;
+      }
+    }
+
+    if (load_options & ADBC_LOAD_OPTION_SEARCH_SYSTEM) {
+      // Check the system registry for the driver
+      auto status =
+          LoadDriverFromRegistry(HKEY_LOCAL_MACHINE, driver_path.string(), 
info, error);
+      if (status == ADBC_STATUS_OK) {
+        return Load(info.lib_path.c_str(), error);
+      }
+
+      status = SearchPaths(driver_path, 
GetSearchPaths(ADBC_LOAD_OPTION_SEARCH_SYSTEM),
+                           info, error);
+      if (status == ADBC_STATUS_OK) {
+        return status;
+      }
+    }
+
+    SetError(error, "[DriverManager] Driver not found: " + 
driver_path.string());
+    return ADBC_STATUS_NOT_FOUND;
+#else
+    // Otherwise, search the configured paths
+    auto search_paths = GetSearchPaths(load_options);
+    auto status = SearchPaths(driver_path, search_paths, info, error);
+    if (status == ADBC_STATUS_NOT_FOUND) {
+      // If we reach here, we didn't find the driver
+      SetError(error, "[DriverManager] Driver not found: " + 
driver_path.string());

Review Comment:
   It is probably worth listing the search path in the error (or in its extra 
key/value data, if we can do that here) for debugging purposes? I can see that 
being one of the first questions when a driver fails to load.



##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -632,6 +985,36 @@ static const char kDefaultEntrypoint[] = "AdbcDriverInit";
 
 // Other helpers (intentionally not in an anonymous namespace so they can be 
tested)
 
+std::vector<std::filesystem::path> AdbcParsePath(const std::string_view& path) 
{
+  std::vector<std::filesystem::path> result;
+  if (path.empty()) {
+    return result;
+  }
+
+#ifdef _WIN32
+  constexpr char delimiter = ';';
+#else
+  constexpr char delimiter = ':';
+#endif  // _WIN32
+
+  bool in_quotes = false;

Review Comment:
   This is perhaps a good reason to expose a version that just takes a list of 
strings (the caller may have opinions/need to work around something if this 
logic isn't correct)



##########
r/adbcdrivermanager/src/Makevars:
##########
@@ -16,7 +16,7 @@
 # under the License.
 
 CXX_STD = CXX17
-PKG_CPPFLAGS=-I../src/c/include -I../src/c -DADBC_EXPORT=""
+PKG_CPPFLAGS=-I../src/c/include -I../src/c -I../src/c/vendor -DADBC_EXPORT=""

Review Comment:
   Thanks for handling this 😬 



##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -114,7 +125,187 @@ struct OwnedError {
   }
 };
 
+std::filesystem::path UserConfigDir() {
+  std::filesystem::path config_dir;
+#if defined(_WIN32)
+  auto dir = std::getenv("AppData");
+  if (dir) {
+    config_dir = std::filesystem::path(dir);
+    config_dir /= "ADBC/drivers";
+  }
+#elif defined(__APPLE__)
+  auto dir = std::getenv("HOME");
+  if (dir) {
+    config_dir = std::filesystem::path(dir);
+    config_dir /= "Library/Application Support/ADBC";
+  }
+#elif defined(__linux__)
+  auto dir = std::getenv("XDG_CONFIG_HOME");
+  if (!dir) {
+    dir = std::getenv("HOME");
+    if (dir) {
+      config_dir = std::filesystem::path(dir);
+    }
+  } else {
+    config_dir = std::filesystem::path(dir);
+  }
+
+  if (!config_dir.empty()) {
+    config_dir /= "adbc";
+  }
+#endif
+
+  return config_dir;

Review Comment:
   Do you need this (and the System config directory) to have an architecture 
suffix? (i.e., will Python running on ARM Windows accidentally load an x64 
binary?). I'm not sure how many people are running x64 on MacOS these days but 
it's potentially a problem there, too. (As a data point, DuckDB separates 
extensions built for these platforms in separate folders in a user config 
directory)



##########
c/include/arrow-adbc/adbc_driver_manager.h:
##########
@@ -51,6 +63,52 @@ ADBC_EXPORT
 AdbcStatusCode AdbcLoadDriver(const char* driver_name, const char* entrypoint,
                               int version, void* driver, struct AdbcError* 
error);
 
+/// \brief Common entry point to search for and load a driver or manifest.
+///
+/// The driver manager can fill in default implementations of some ADBC 
functions
+/// for drivers. Drivers must implement a minimum level of functionality for 
this
+/// to be possible, however, and some functions must be implemented by the 
driver.
+///
+/// This function is different from AdbcLoadDriver in that it also accepts the 
name
+/// of a driver manifest file, and allows specifying options to control what
+/// directories it will search through. The behavior is as follows:
+///
+/// If the passed in driver_name is an absolute path:
+/// - If the path has a `.toml` extension, it will attempt to parse the 
manifest and load
+///   the driver specified within it. Erroring if this fails.
+/// - If the path has an extension other than `.toml`, it will attempt to load 
the path as
+///   a shared library. Erroring if this fails.
+///
+/// If the passed in driver_name does not have an extension and is not an 
absolute path:
+/// - The load_options parameter will control whether the driver manager will 
search
+///   the ADBC_CONFIG_PATH environment variable, the user configuration 
directory, and/or
+///   the system level directory of /etc/adbc for either a manifest file or a 
shared
+///   library.
+/// - For each path to be search, it will first look for 
<path>/<driver_name>.toml. If
+///   that file exists, it will attempt to parse the manifest and load the 
driver
+///   specified within it. Erroring if this fails.
+/// - If the manifest file does not exist, it will then look for
+/// <path>/<driver_name>.<extension>
+///   where <extension> is one of the following: `.so`, `.dll`, `.dylib`. If 
it can load
+///   that shared library, then success is returned. Otherwise it moves to the 
next
+///   directory until the search is either successful, or all directories have 
been
+///   searched.
+///
+/// \param[in] driver_name An identifier for the driver (e.g. a path to a
+///   shared library on Linux or the basename of a manifest file).
+/// \param[in] entrypoint  An identifier for the entrypoint (e.g. the symbol to
+///   call for AdbcDriverInitFunc on Linux).  If not provided, search for an
+///   entrypoint based on the driver name.
+/// \param[in] version The ADBC revision to attempt to initialize.
+/// \param[in] load_options bit mask of AdbcLoadOption to control the 
directories searched
+/// \param[out] raw_driver The table of function pointers to initialize
+/// \param[out] error An optional location to return an error message
+/// @return
+ADBC_EXPORT
+AdbcStatusCode AdbcFindLoadDriver(const char* driver_name, const char* 
entrypoint,
+                                  const int version, const AdbcLoadOption 
load_options,
+                                  void* driver, struct AdbcError* error);

Review Comment:
   I think we probably want a version that accepts a `const char** search_path` 
as an escape hatch in the event an application has specific requirements (e.g., 
maybe it has some user-configurable ADBC configuration of its own) but doesn't 
want to rewrite the TOML parsing logic?



##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -114,7 +129,175 @@ struct OwnedError {
   }
 };
 
+std::filesystem::path UserConfigDir() {
+  std::filesystem::path config_dir;
+#if defined(_WIN32)
+  auto dir = std::getenv("AppData");

Review Comment:
   Also, does this need to use the Windows API? (The only other place I've seen 
this does:)
   
   
https://github.com/r-lib/rappdirs/blob/955f2e7997ed6d65cc47a3d77d0cf324c2685897/src/win-path.c#L13



##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -114,7 +129,175 @@ struct OwnedError {
   }
 };
 
+std::filesystem::path UserConfigDir() {
+  std::filesystem::path config_dir;
+#if defined(_WIN32)
+  auto dir = std::getenv("AppData");

Review Comment:
   A realistic test for this would be to create a Windows user with a é (e.g.) 
in the name and ensure that this works (at least that's what happened the last 
time I had to debug this!)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to