This is an automated email from the ASF dual-hosted git repository. curth pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push: new 27c68c011 feat(csharp/src/Drivers/Databricks): Databricks Proxy Configurator (#2789) 27c68c011 is described below commit 27c68c011138d5c64e2a641730e820e9537a0875 Author: Todd Meng <todd.m...@databricks.com> AuthorDate: Wed May 14 12:49:30 2025 -0700 feat(csharp/src/Drivers/Databricks): Databricks Proxy Configurator (#2789) First PR for Proxy support for Databricks Driver. Includes a SparkProxyConfigurator, HttpClientFactory that contains the Configurator, and necessary SparkParameters. Also includes some unit tests. Tested integration with rest of driver in follow up PR. Follow-up: Integrate with the rest of the driver. This will also include behavior for Oauth and CloudFetch Other off-line discussions: - Kerberos proxy login not currently supported in JDBC OSS, so we're not currently supporting here. --- .../Drivers/Apache/Hive2/HiveServer2Parameters.cs | 46 +++++ .../Apache/Hive2/HiveServer2ProxyConfigurator.cs | 215 +++++++++++++++++++++ .../Hive2/HiveServer2ProxyConfiguratorTests.cs | 149 ++++++++++++++ 3 files changed, 410 insertions(+) diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs b/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs index d40ee2cc9..e4ae62069 100644 --- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs +++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs @@ -64,4 +64,50 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2 public const string TrustedCertificatePath = "adbc.standard_options.tls.trusted_certificate_path"; public const string DisableServerCertificateValidation = "adbc.standard_options.tls.disable_server_certificate_validation"; } + + public static class HttpProxyOptions + { + /// <summary> + /// Whether to use a proxy for HTTP connections. + /// Default value is false (disabled) if not specified. + /// </summary> + public const string UseProxy = "adbc.proxy_options.use_proxy"; + + /// <summary> + /// Hostname or IP address of the proxy server. + /// Required when UseProxy. + /// </summary> + public const string ProxyHost = "adbc.proxy_options.proxy_host"; + + /// <summary> + /// Port number of the proxy server. + /// Required when UseProxy. + /// </summary> + public const string ProxyPort = "adbc.proxy_options.proxy_port"; + + /// <summary> + /// Comma-separated list of hosts or domains that should bypass the proxy. + /// For example: "localhost,127.0.0.1,.internal.domain.com" + /// Allows for wildcard pattern matching, i.e. "*.internal.domain.com" + /// </summary> + public const string ProxyIgnoreList = "adbc.proxy_options.proxy_ignore_list"; + + /// <summary> + /// Whether to enable proxy authentication. + /// Default value is false (disabled) if not specified. + /// </summary> + public const string ProxyAuth = "adbc.proxy_options.proxy_auth"; + + /// <summary> + /// Username for proxy authentication. + /// Required when ProxyAuth. + /// </summary> + public const string ProxyUID = "adbc.proxy_options.proxy_uid"; + + /// <summary> + /// Password for proxy authentication. + /// Required when ProxyAuth. + /// </summary> + public const string ProxyPWD = "adbc.proxy_options.proxy_pwd"; + } } diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2ProxyConfigurator.cs b/csharp/src/Drivers/Apache/Hive2/HiveServer2ProxyConfigurator.cs new file mode 100644 index 000000000..a16efd7f2 --- /dev/null +++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2ProxyConfigurator.cs @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Http; +using System.Text.RegularExpressions; + +namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2 +{ + /// <summary> + /// Default implementation of proxy configuration for HTTP connections + /// </summary> + internal class HiveServer2ProxyConfigurator + { + private readonly bool _useProxy; + private readonly string? _proxyHost; + private readonly int? _proxyPort; + private readonly bool _proxyAuth; + private readonly string? _proxyUid; + private readonly string? _proxyPwd; + private readonly string[]? _proxyBypassList; + + /// <summary> + /// Initializes a new instance of the <see cref="HiveServer2ProxyConfigurator"/> class. + /// </summary> + /// <param name="useProxy">Whether to use a proxy</param> + /// <param name="proxyPort">The proxy port</param> + /// <param name="proxyHost">The proxy host</param> + /// <param name="proxyAuth">Whether to use proxy authentication</param> + /// <param name="proxyUid">The proxy username</param> + /// <param name="proxyPwd">The proxy password</param> + /// <param name="proxyIgnoreList">Comma-separated list of hosts to bypass the proxy</param> + internal HiveServer2ProxyConfigurator( + bool useProxy, + int? proxyPort = null, + string? proxyHost = null, + bool proxyAuth = false, + string? proxyUid = null, + string? proxyPwd = null, + string? proxyIgnoreList = null) + { + if (useProxy) + { + if (proxyHost == null) + throw new ArgumentNullException(nameof(proxyHost)); + if (proxyPort == null) + throw new ArgumentNullException(nameof(proxyPort)); + } + + if (proxyAuth) + { + if (proxyUid == null) + throw new ArgumentNullException(nameof(proxyUid)); + if (proxyPwd == null) + throw new ArgumentNullException(nameof(proxyPwd)); + } + + _useProxy = useProxy; + _proxyHost = proxyHost; + _proxyPort = proxyPort; + _proxyAuth = proxyAuth; + _proxyUid = proxyUid; + _proxyPwd = proxyPwd; + + if (!string.IsNullOrEmpty(proxyIgnoreList)) + { + _proxyBypassList = ParseProxyIgnoreList(proxyIgnoreList); + } + } + + /// <summary> + /// Initializes a new instance of the <see cref="HiveServer2ProxyConfigurator"/> class from connection properties. + /// </summary> + /// <param name="properties">The connection properties</param> + internal static HiveServer2ProxyConfigurator FromProperties(IReadOnlyDictionary<string, string> properties) + { + bool useProxy = properties.TryGetValue(HttpProxyOptions.UseProxy, out string? useProxyStr) && bool.TryParse(useProxyStr, out bool useProxyBool) && useProxyBool; + + if (!useProxy) + { + return new HiveServer2ProxyConfigurator(false, 0); + } + + // Get proxy host + if (!properties.TryGetValue(HttpProxyOptions.ProxyHost, out string? proxyHost) || + string.IsNullOrEmpty(proxyHost)) + { + throw new ArgumentException($"Parameter '{HttpProxyOptions.UseProxy}' is set to 'true' but '{HttpProxyOptions.ProxyHost}' is not specified"); + } + + // Get proxy port + if (!properties.TryGetValue(HttpProxyOptions.ProxyPort, out string? proxyPortStr)) + { + throw new ArgumentException($"Parameter '{HttpProxyOptions.ProxyPort}' is required when '{HttpProxyOptions.UseProxy}' is set to 'true'"); + } + + if (!int.TryParse(proxyPortStr, out int proxyPort) || proxyPort <= 0 || proxyPort > 65535) + { + throw new ArgumentOutOfRangeException( + HttpProxyOptions.ProxyPort, + $"Invalid proxy port: {proxyPortStr}. Must be between 1 and 65535."); + } + + // Get proxy authentication settings + bool proxyAuth = properties.TryGetValue(HttpProxyOptions.ProxyAuth, out string? proxyAuthStr) && bool.TryParse(proxyAuthStr, out bool proxyAuthBool) && proxyAuthBool; + + string? proxyUid = null; + string? proxyPwd = null; + + if (proxyAuth) + { + properties.TryGetValue(HttpProxyOptions.ProxyUID, out proxyUid); + properties.TryGetValue(HttpProxyOptions.ProxyPWD, out proxyPwd); + + if (string.IsNullOrEmpty(proxyUid)) + { + throw new ArgumentException($"Parameter '{HttpProxyOptions.ProxyAuth}' is set to 'true' but '{HttpProxyOptions.ProxyUID}' is not specified"); + } + + if (string.IsNullOrEmpty(proxyPwd)) + { + throw new ArgumentException($"Parameter '{HttpProxyOptions.ProxyAuth}' is set to 'true' but '{HttpProxyOptions.ProxyPWD}' is not specified"); + } + } + + // Get proxy bypass list + string? proxyIgnoreList; + properties.TryGetValue(HttpProxyOptions.ProxyIgnoreList, out proxyIgnoreList); + + return new HiveServer2ProxyConfigurator( + useProxy, + proxyPort, + proxyHost, + proxyAuth, + proxyUid, + proxyPwd, + proxyIgnoreList); + } + + /// <summary> + /// Configures proxy settings on an HttpClientHandler + /// </summary> + /// <param name="handler">The HttpClientHandler to configure</param> + internal void ConfigureProxy(HttpClientHandler handler) + { + if (_useProxy) + { + // Create and configure the proxy + var proxy = new WebProxy(_proxyHost!, _proxyPort!.Value); + // Configure authentication if needed + if (_proxyAuth && !string.IsNullOrEmpty(_proxyUid)) + { + proxy.Credentials = new NetworkCredential(_proxyUid, _proxyPwd); + } + + // Configure bypass list + if (_proxyBypassList != null && _proxyBypassList.Length > 0) + { + proxy.BypassList = _proxyBypassList; + } + + // Apply proxy to handler + handler.Proxy = proxy; + handler.UseProxy = true; + } + else + { + // No proxy configuration + handler.UseProxy = false; + } + } + + // http client bypass list in c# expects regex strings, hence why some handling is done to make hosts in regex format. + // I assume we don't want to expect users to pass in regex strings (though we still allow for wildcard pattern here) + private static string[] ParseProxyIgnoreList(string? proxyIgnoreList) + { + if (string.IsNullOrEmpty(proxyIgnoreList)) + return []; + + string[] rawHosts = proxyIgnoreList!.Split(','); + string[] patterns = new string[rawHosts.Length]; + + for (int i = 0; i < rawHosts.Length; i++) + { + string host = rawHosts[i].Trim(); + if (string.IsNullOrEmpty(host)) + continue; + + // Convert wildcard pattern to regex pattern + string pattern = "^" + Regex.Escape(host) + .Replace("\\*", ".*") + .Replace("\\?", ".") + "$"; + patterns[i] = pattern; + } + + return patterns; + } + } +} diff --git a/csharp/test/Drivers/Apache/Hive2/HiveServer2ProxyConfiguratorTests.cs b/csharp/test/Drivers/Apache/Hive2/HiveServer2ProxyConfiguratorTests.cs new file mode 100644 index 000000000..c077d620d --- /dev/null +++ b/csharp/test/Drivers/Apache/Hive2/HiveServer2ProxyConfiguratorTests.cs @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Http; +using Xunit; + +namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2.Tests +{ + public class HiveServer2ProxyConfiguratorTests + { + [Fact] + public void ConfigureProxy_NoProxySettings_DisablesProxy() + { + // Arrange + var properties = new Dictionary<string, string>(); + var configurator = HiveServer2ProxyConfigurator.FromProperties(properties); + var handler = new HttpClientHandler(); + + // Act + configurator.ConfigureProxy(handler); + + // Assert + Assert.False(handler.UseProxy); + } + + [Fact] + public void ConfigureProxy_UseProxyWithInvalidPort_ThrowsArgumentOutOfRangeException() + { + // Arrange + var properties = new Dictionary<string, string> + { + { HttpProxyOptions.UseProxy, "1" }, + { HttpProxyOptions.ProxyHost, "proxy.example.com" }, + { HttpProxyOptions.ProxyPort, "99999" } // Invalid port + }; + + // Act & Assert + var ex = Assert.Throws<ArgumentOutOfRangeException>(() => HiveServer2ProxyConfigurator.FromProperties(properties)); + Assert.Equal(HttpProxyOptions.ProxyPort, ex.ParamName); + } + + [Fact] + public void ConfigureProxy_ValidProxySettings_ConfiguresProxy() + { + // Arrange + var properties = new Dictionary<string, string> + { + { HttpProxyOptions.UseProxy, "1" }, + { HttpProxyOptions.ProxyHost, "proxy.example.com" }, + { HttpProxyOptions.ProxyPort, "8080" } + }; + var configurator = HiveServer2ProxyConfigurator.FromProperties(properties); + var handler = new HttpClientHandler(); + + // Act + configurator.ConfigureProxy(handler); + + // Assert + Assert.True(handler.UseProxy); + Assert.NotNull(handler.Proxy); + Assert.IsType<WebProxy>(handler.Proxy); + } + + [Fact] + public void ConfigureProxy_ProxyWithAuthentication_ConfiguresProxyCredentials() + { + // Arrange + var properties = new Dictionary<string, string> + { + { HttpProxyOptions.UseProxy, "true" }, + { HttpProxyOptions.ProxyHost, "proxy.example.com" }, + { HttpProxyOptions.ProxyPort, "8080" }, + { HttpProxyOptions.ProxyAuth, "true" }, + { HttpProxyOptions.ProxyUID, "username" }, + { HttpProxyOptions.ProxyPWD, "password" } + }; + var configurator = HiveServer2ProxyConfigurator.FromProperties(properties); + var handler = new HttpClientHandler(); + + // Act + configurator.ConfigureProxy(handler); + + // Assert + Assert.True(handler.UseProxy); + Assert.NotNull(handler.Proxy); + Assert.IsType<WebProxy>(handler.Proxy); + + var proxy = (WebProxy)handler.Proxy; + var credentials = proxy.Credentials; + Assert.NotNull(credentials); + Assert.IsType<NetworkCredential>(credentials); + + var networkCredential = (NetworkCredential)credentials; + Assert.Equal("username", networkCredential.UserName); + Assert.Equal("password", networkCredential.Password); + } + + [Fact] + public void ConfigureProxy_ProxyWithBypassList_ConfiguresProxyBypassList() + { + // Arrange + var properties = new Dictionary<string, string> + { + { HttpProxyOptions.UseProxy, "1" }, + { HttpProxyOptions.ProxyHost, "proxy.example.com" }, + { HttpProxyOptions.ProxyPort, "8080" }, + { HttpProxyOptions.ProxyIgnoreList, "localhost,127.0.0.1,*.internal.domain.com" } + }; + var configurator = HiveServer2ProxyConfigurator.FromProperties(properties); + var handler = new HttpClientHandler(); + + // Act + configurator.ConfigureProxy(handler); + + // Assert + Assert.True(handler.UseProxy); + Assert.NotNull(handler.Proxy); + Assert.IsType<WebProxy>(handler.Proxy); + + // We can't directly check the bypass list, but we can use reflection to verify it's set + var proxy = (WebProxy)handler.Proxy; + var bypassList = proxy.BypassList; + Assert.NotNull(bypassList); + Assert.NotEmpty(bypassList); + + // Check if the bypass list contains the expected patterns + Assert.Contains("^localhost$", bypassList); + Assert.Contains("^127\\.0\\.0\\.1$", bypassList); + Assert.Contains("^.*\\.internal\\.domain\\.com$", bypassList); + } + } +}