paleolimbot commented on code in PR #2918:
URL: https://github.com/apache/arrow-adbc/pull/2918#discussion_r2127436917
##########
c/vendor/toml++/toml.hpp:
##########
Review Comment:
Just pointing out the change in scope introduced here (the current driver
manager is ~2000 lines, and this is ~18,000 lines of vendored parser). Opting
out of the driver manager is pretty easy (e.g., how we were able to get GDAL to
support ADBC in its default build) and so I think this is OK, but there's a
chance it could result in more opting out of the driver manager.
##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -138,6 +329,174 @@ struct ManagedLibrary {
// release() from the DLL - how to handle this?
}
+ AdbcStatusCode GetDriverInfo(const std::string_view driver_name,
+ const AdbcLoadOption load_options, DriverInfo&
info,
+ struct AdbcError* error) {
+ if (driver_name.empty()) {
+ SetError(error, "[DriverManager] Driver name is empty");
+ return ADBC_STATUS_INVALID_ARGUMENT;
+ }
+
+ std::filesystem::path driver_path(driver_name);
+ const bool allow_relative_paths =
+ load_options & ADBC_LOAD_OPTION_ALLOW_RELATIVE_PATHS;
+ if (driver_path.has_extension()) {
+ if (driver_path.is_relative() && !allow_relative_paths) {
+ SetError(
+ error,
+ "[DriverManager] Driver path is relative and relative paths are
not allowed");
+ return ADBC_STATUS_INVALID_ARGUMENT;
+ }
+
+ if (HasExtension(driver_path, ".toml")) {
+ // if the extension is .toml, attempt to load the manifest
+ // erroring if we fail
+
+ auto status = LoadDriverManifest(driver_path, info, error);
+ if (status == ADBC_STATUS_OK) {
+ return Load(info.lib_path.c_str(), error);
+ }
+ return status;
+ }
+
+ // if the extension is not .toml, then just try to load the provided
+ // path as if it was an absolute path to a driver library
+ return Load(driver_path.string().c_str(), error);
+ }
+
+ if (driver_path.is_absolute()) {
+ // if we have an absolute path without an extension, first see if
there's a
+ // toml file with the same name.
+ driver_path.replace_extension(".toml");
+ if (std::filesystem::exists(driver_path)) {
+ auto status = LoadDriverManifest(driver_path, info, error);
+ if (status == ADBC_STATUS_OK) {
+ return Load(info.lib_path.c_str(), error);
+ }
+ }
+
+ // otherwise just try to load the provided path as if it was an absolute
path
+ return Load(driver_name.data(), error);
+ }
+
+ if (driver_path.has_extension()) {
+ if (driver_path.is_relative() &&
+ (load_options & ADBC_LOAD_OPTION_ALLOW_RELATIVE_PATHS) == 0) {
+ SetError(
+ error,
+ "[DriverManager] Driver path is relative and relative paths are
not allowed");
+ return ADBC_STATUS_INVALID_ARGUMENT;
+ }
+
+#if defined(_WIN32)
+ static const std::wstring kPlatformLibrarySuffix = ".dll";
+#elif defined(__APPLE__)
+ static const std::string kPlatformLibrarySuffix = ".dylib";
+#else
+ static const std::string kPlatformLibrarySuffix = ".so";
+#endif
+ if (HasExtension(driver_path, kPlatformLibrarySuffix)) {
+ return Load(driver_path.string().c_str(), error);
+ }
+
+ SetError(error, "[DriverManager] Driver name has unrecognized extension:
" +
+ driver_path.extension().string());
+ return ADBC_STATUS_INVALID_ARGUMENT;
+ }
+
+ // not an absolute path, no extension. Let's search the configured paths
+ // based on the options
+ return FindDriver(driver_path, load_options, info, error);
+ }
+
+ AdbcStatusCode SearchPaths(const std::filesystem::path& driver_path,
+ const std::vector<std::filesystem::path>&
search_paths,
+ DriverInfo& info, struct AdbcError* error) {
+ for (const auto& search_path : search_paths) {
+ std::filesystem::path full_path = search_path / driver_path;
+
+ // check for toml first, then dll
+ full_path.replace_extension(".toml");
+ if (std::filesystem::exists(full_path)) {
+ auto status = LoadDriverManifest(full_path, info, nullptr);
+ if (status == ADBC_STATUS_OK) {
+ return Load(info.lib_path.c_str(), error);
+ }
+ }
+
+ full_path.replace_extension(""); // remove the .toml extension
+ auto status = Load(full_path.string().c_str(), nullptr);
+ if (status == ADBC_STATUS_OK) {
+ return status;
+ }
+ }
+
+ return ADBC_STATUS_NOT_FOUND;
+ }
+
+ AdbcStatusCode FindDriver(const std::filesystem::path& driver_path,
+ const AdbcLoadOption load_options, DriverInfo&
info,
+ struct AdbcError* error) {
+ if (driver_path.empty()) {
+ SetError(error, "[DriverManager] Driver path is empty");
+ return ADBC_STATUS_INVALID_ARGUMENT;
+ }
+
+#ifdef _WIN32
+ // windows is slightly more complex since we also need to check registry
keys
+ // so we can't just grab the search paths only.
+ if (load_options & ADBC_LOAD_OPTION_SEARCH_ENV) {
+ auto search_paths = GetSearchPaths(ADBC_LOAD_OPTION_SEARCH_ENV);
+ auto status = SearchPaths(driver_path, search_paths, info, error);
+ if (status == ADBC_STATUS_OK) {
+ return status;
+ }
+ }
+
+ if (load_options & ADBC_LOAD_OPTION_SEARCH_USER) {
+ // Check the user registry for the driver
+ auto status =
+ LoadDriverFromRegistry(HKEY_CURRENT_USER, driver_path.string(),
info, error);
+ if (status == ADBC_STATUS_OK) {
+ return Load(info.lib_path.c_str(), error);
+ }
+
+ status = SearchPaths(driver_path,
GetSearchPaths(ADBC_LOAD_OPTION_SEARCH_USER),
+ info, error);
+ if (status == ADBC_STATUS_OK) {
+ return status;
+ }
+ }
+
+ if (load_options & ADBC_LOAD_OPTION_SEARCH_SYSTEM) {
+ // Check the system registry for the driver
+ auto status =
+ LoadDriverFromRegistry(HKEY_LOCAL_MACHINE, driver_path.string(),
info, error);
+ if (status == ADBC_STATUS_OK) {
+ return Load(info.lib_path.c_str(), error);
+ }
+
+ status = SearchPaths(driver_path,
GetSearchPaths(ADBC_LOAD_OPTION_SEARCH_SYSTEM),
+ info, error);
+ if (status == ADBC_STATUS_OK) {
+ return status;
+ }
+ }
+
+ SetError(error, "[DriverManager] Driver not found: " +
driver_path.string());
+ return ADBC_STATUS_NOT_FOUND;
+#else
+ // Otherwise, search the configured paths
+ auto search_paths = GetSearchPaths(load_options);
+ auto status = SearchPaths(driver_path, search_paths, info, error);
+ if (status == ADBC_STATUS_NOT_FOUND) {
+ // If we reach here, we didn't find the driver
+ SetError(error, "[DriverManager] Driver not found: " +
driver_path.string());
Review Comment:
It is probably worth listing the search path in the error (or in its extra
key/value data, if we can do that here) for debugging purposes? I can see that
being one of the first questions when a driver fails to load.
##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -632,6 +985,36 @@ static const char kDefaultEntrypoint[] = "AdbcDriverInit";
// Other helpers (intentionally not in an anonymous namespace so they can be
tested)
+std::vector<std::filesystem::path> AdbcParsePath(const std::string_view& path)
{
+ std::vector<std::filesystem::path> result;
+ if (path.empty()) {
+ return result;
+ }
+
+#ifdef _WIN32
+ constexpr char delimiter = ';';
+#else
+ constexpr char delimiter = ':';
+#endif // _WIN32
+
+ bool in_quotes = false;
Review Comment:
This is perhaps a good reason to expose a version that just takes a list of
strings (the caller may have opinions/need to work around something if this
logic isn't correct)
##########
r/adbcdrivermanager/src/Makevars:
##########
@@ -16,7 +16,7 @@
# under the License.
CXX_STD = CXX17
-PKG_CPPFLAGS=-I../src/c/include -I../src/c -DADBC_EXPORT=""
+PKG_CPPFLAGS=-I../src/c/include -I../src/c -I../src/c/vendor -DADBC_EXPORT=""
Review Comment:
Thanks for handling this 😬
##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -114,7 +125,187 @@ struct OwnedError {
}
};
+std::filesystem::path UserConfigDir() {
+ std::filesystem::path config_dir;
+#if defined(_WIN32)
+ auto dir = std::getenv("AppData");
+ if (dir) {
+ config_dir = std::filesystem::path(dir);
+ config_dir /= "ADBC/drivers";
+ }
+#elif defined(__APPLE__)
+ auto dir = std::getenv("HOME");
+ if (dir) {
+ config_dir = std::filesystem::path(dir);
+ config_dir /= "Library/Application Support/ADBC";
+ }
+#elif defined(__linux__)
+ auto dir = std::getenv("XDG_CONFIG_HOME");
+ if (!dir) {
+ dir = std::getenv("HOME");
+ if (dir) {
+ config_dir = std::filesystem::path(dir);
+ }
+ } else {
+ config_dir = std::filesystem::path(dir);
+ }
+
+ if (!config_dir.empty()) {
+ config_dir /= "adbc";
+ }
+#endif
+
+ return config_dir;
Review Comment:
Do you need this (and the System config directory) to have an architecture
suffix? (i.e., will Python running on ARM Windows accidentally load an x64
binary?). I'm not sure how many people are running x64 on MacOS these days but
it's potentially a problem there, too. (As a data point, DuckDB separates
extensions built for these platforms in separate folders in a user config
directory)
##########
c/include/arrow-adbc/adbc_driver_manager.h:
##########
@@ -51,6 +63,52 @@ ADBC_EXPORT
AdbcStatusCode AdbcLoadDriver(const char* driver_name, const char* entrypoint,
int version, void* driver, struct AdbcError*
error);
+/// \brief Common entry point to search for and load a driver or manifest.
+///
+/// The driver manager can fill in default implementations of some ADBC
functions
+/// for drivers. Drivers must implement a minimum level of functionality for
this
+/// to be possible, however, and some functions must be implemented by the
driver.
+///
+/// This function is different from AdbcLoadDriver in that it also accepts the
name
+/// of a driver manifest file, and allows specifying options to control what
+/// directories it will search through. The behavior is as follows:
+///
+/// If the passed in driver_name is an absolute path:
+/// - If the path has a `.toml` extension, it will attempt to parse the
manifest and load
+/// the driver specified within it. Erroring if this fails.
+/// - If the path has an extension other than `.toml`, it will attempt to load
the path as
+/// a shared library. Erroring if this fails.
+///
+/// If the passed in driver_name does not have an extension and is not an
absolute path:
+/// - The load_options parameter will control whether the driver manager will
search
+/// the ADBC_CONFIG_PATH environment variable, the user configuration
directory, and/or
+/// the system level directory of /etc/adbc for either a manifest file or a
shared
+/// library.
+/// - For each path to be search, it will first look for
<path>/<driver_name>.toml. If
+/// that file exists, it will attempt to parse the manifest and load the
driver
+/// specified within it. Erroring if this fails.
+/// - If the manifest file does not exist, it will then look for
+/// <path>/<driver_name>.<extension>
+/// where <extension> is one of the following: `.so`, `.dll`, `.dylib`. If
it can load
+/// that shared library, then success is returned. Otherwise it moves to the
next
+/// directory until the search is either successful, or all directories have
been
+/// searched.
+///
+/// \param[in] driver_name An identifier for the driver (e.g. a path to a
+/// shared library on Linux or the basename of a manifest file).
+/// \param[in] entrypoint An identifier for the entrypoint (e.g. the symbol to
+/// call for AdbcDriverInitFunc on Linux). If not provided, search for an
+/// entrypoint based on the driver name.
+/// \param[in] version The ADBC revision to attempt to initialize.
+/// \param[in] load_options bit mask of AdbcLoadOption to control the
directories searched
+/// \param[out] raw_driver The table of function pointers to initialize
+/// \param[out] error An optional location to return an error message
+/// @return
+ADBC_EXPORT
+AdbcStatusCode AdbcFindLoadDriver(const char* driver_name, const char*
entrypoint,
+ const int version, const AdbcLoadOption
load_options,
+ void* driver, struct AdbcError* error);
Review Comment:
I think we probably want a version that accepts a `const char** search_path`
as an escape hatch in the event an application has specific requirements (e.g.,
maybe it has some user-configurable ADBC configuration of its own) but doesn't
want to rewrite the TOML parsing logic?
##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -114,7 +129,175 @@ struct OwnedError {
}
};
+std::filesystem::path UserConfigDir() {
+ std::filesystem::path config_dir;
+#if defined(_WIN32)
+ auto dir = std::getenv("AppData");
Review Comment:
Also, does this need to use the Windows API? (The only other place I've seen
this does:)
https://github.com/r-lib/rappdirs/blob/955f2e7997ed6d65cc47a3d77d0cf324c2685897/src/win-path.c#L13
##########
c/driver_manager/adbc_driver_manager.cc:
##########
@@ -114,7 +129,175 @@ struct OwnedError {
}
};
+std::filesystem::path UserConfigDir() {
+ std::filesystem::path config_dir;
+#if defined(_WIN32)
+ auto dir = std::getenv("AppData");
Review Comment:
A realistic test for this would be to create a Windows user with a é (e.g.)
in the name and ensure that this works (at least that's what happened the last
time I had to debug this!)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]