This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 423162371 feat(csharp/src/Drivers/Apache/Spark): add option to ignore 
TLS/SSL certificate exceptions (#2188)
423162371 is described below

commit 423162371b420e11d2603a8ab6775bd01c391d3c
Author: Bruce Irschick <[email protected]>
AuthorDate: Thu Sep 26 14:03:16 2024 -0700

    feat(csharp/src/Drivers/Apache/Spark): add option to ignore TLS/SSL 
certificate exceptions (#2188)
    
    Add connection property with TLS/SSL options.
    
    Options allow ignoring of server certificate errors typically due to
    self-signed certificates and SSH tunneling.
    
    | Property               | Description | Default |
    | :---                   | :---        | :---    |
    | `adbc.spark.tls_options` | Comma-separated list of TLS/SSL options.
    Each option indicates the TLS/SSL option when connecting to a Spark
    server. <br><br>Allowed values: `allow_self_signed`,
    `allow_hostname_mismatch`. <br><br>Option `allow_self_signed` allows
    certificate errors due to an unknown certificate authority, typically
    when using a self-signed certificate. Option `allow_hostname_mismatch`
    allow certificate errors due to a mismatch of the hostname. (e.g., when
    connecting through an SSH tunnel). Example
    `adbc.spark.tls_options=allow_self_signed` | |
---
 .../Drivers/Apache/Hive2/HiveServer2Connection.cs  |  2 +
 .../Drivers/Apache/Hive2/HiveServer2Parameters.cs  |  6 +++
 .../Drivers/Apache/Hive2/HiveServer2TlsOption.cs   | 53 ++++++++++++++++++++++
 csharp/src/Drivers/Apache/Spark/README.md          |  1 +
 .../Apache/Spark/SparkDatabricksConnection.cs      |  3 ++
 .../Drivers/Apache/Spark/SparkHttpConnection.cs    | 24 +++++++++-
 csharp/src/Drivers/Apache/Spark/SparkParameters.cs |  1 +
 .../Apache/Hive2/HiveServer2ParametersTest.cs      | 37 +++++++++++++--
 .../Drivers/Apache/Spark/SparkTestConfiguration.cs |  3 ++
 .../Drivers/Apache/Spark/SparkTestEnvironment.cs   |  4 ++
 10 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs 
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
index 2100f5744..5d614652e 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
@@ -73,6 +73,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
 
         protected internal DataTypeConversion DataTypeConversion { get; set; } 
= DataTypeConversion.None;
 
+        protected internal HiveServer2TlsOption TlsOptions { get; set; } = 
HiveServer2TlsOption.Empty;
+
         protected abstract Task<TTransport> CreateTransportAsync();
 
         protected abstract Task<TProtocol> CreateProtocolAsync(TTransport 
transport);
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs 
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs
index 5eec97823..2170cd17b 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs
@@ -24,4 +24,10 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
         public const string None = "none";
         public const string Scalar = "scalar";
     }
+
+    public static class TlsOptions
+    {
+        public const string AllowSelfSigned = "allow_self_signed";
+        public const string AllowHostnameMismatch = "allow_hostname_mismatch";
+    }
 }
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsOption.cs 
b/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsOption.cs
new file mode 100644
index 000000000..84f56a485
--- /dev/null
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsOption.cs
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
+{
+    [Flags]
+    internal enum HiveServer2TlsOption
+    {
+        Empty = 0,
+        AllowSelfSigned = 1,
+        AllowHostnameMismatch = 2,
+    }
+
+    internal static class TlsOptionsParser
+    {
+        internal const string SupportedList = TlsOptions.AllowSelfSigned + "," 
+ TlsOptions.AllowHostnameMismatch;
+
+        internal static HiveServer2TlsOption Parse(string? tlsOptions)
+        {
+            HiveServer2TlsOption options = HiveServer2TlsOption.Empty;
+            if (tlsOptions == null) return options;
+
+            string[] valueList = tlsOptions.Split(',');
+            foreach (string tlsOption in valueList)
+            {
+                options |= (tlsOption?.Trim().ToLowerInvariant()) switch
+                {
+                    null or "" => HiveServer2TlsOption.Empty,
+                    TlsOptions.AllowSelfSigned => 
HiveServer2TlsOption.AllowSelfSigned,
+                    TlsOptions.AllowHostnameMismatch => 
HiveServer2TlsOption.AllowHostnameMismatch,
+                    _ => throw new 
ArgumentOutOfRangeException(nameof(tlsOptions), tlsOption, "Invalid or 
unsupported TLS option"),
+                };
+            }
+            return options;
+        }
+    }
+}
diff --git a/csharp/src/Drivers/Apache/Spark/README.md 
b/csharp/src/Drivers/Apache/Spark/README.md
index 6928b9bb9..b35e9b16a 100644
--- a/csharp/src/Drivers/Apache/Spark/README.md
+++ b/csharp/src/Drivers/Apache/Spark/README.md
@@ -36,6 +36,7 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
 | `username`             | The user name used for basic authentication | |
 | `password`             | The password for the user name used for basic 
authentication. | |
 | `adbc.spark.data_type_conv` | Comma-separated list of data conversion 
options. Each option indicates the type of conversion to perform on data 
returned from the Spark server. <br><br>Allowed values: `none`, `scalar`. 
<br><br>Option `none` indicates there is no conversion from Spark type to 
native type (i.e., no conversion from String to Timestamp for Apache Spark over 
HTTP). Example `adbc.spark.conv_data_type=none`. <br><br>Option `scalar` will 
perform conversion (if necessary) from th [...]
+| `adbc.spark.tls_options` | Comma-separated list of TLS/SSL options. Each 
option indicates the TLS/SSL option when connecting to a Spark server. 
<br><br>Allowed values: `allow_self_signed`, `allow_hostname_mismatch`. 
<br><br>Option `allow_self_signed` allows certificate errors due to an unknown 
certificate authority, typically when using a self-signed certificate. Option 
`allow_hostname_mismatch` allow certificate errors due to a mismatch of the 
hostname. (e.g., when connecting through  [...]
 | `adbc.statement.batch_size` | Sets the maximum number of rows to retrieve in 
a single batch request. | `50000` |
 | `adbc.statement.polltime_milliseconds` | If polling is necessary to get a 
result, this option sets the length of time (in milliseconds) to wait between 
polls. | `500` |
 
diff --git a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs 
b/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
index a20964ce4..64bb8672a 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
@@ -49,6 +49,9 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
             Properties.TryGetValue(SparkParameters.DataTypeConv, out string? 
dataTypeConv);
             // Note: In Databricks, scalar types are provided implicitly.
             DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv);
+
+            Properties.TryGetValue(SparkParameters.TLSOptions, out string? 
tlsOptions);
+            TlsOptions = TlsOptionsParser.Parse(tlsOptions);
         }
 
         protected override Task<TGetResultSetMetadataResp> 
GetResultSetMetadataAsync(TGetSchemasResp response) =>
diff --git a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs 
b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
index 8d7dc6fe6..c1199b1b5 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
@@ -21,6 +21,7 @@ using System.Diagnostics;
 using System.Net;
 using System.Net.Http;
 using System.Net.Http.Headers;
+using System.Net.Security;
 using System.Text;
 using System.Threading;
 using System.Threading.Tasks;
@@ -116,6 +117,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
         {
             Properties.TryGetValue(SparkParameters.DataTypeConv, out string? 
dataTypeConv);
             DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv);
+            Properties.TryGetValue(SparkParameters.TLSOptions, out string? 
tlsOptions);
+            TlsOptions = Hive2.TlsOptionsParser.Parse(tlsOptions);
         }
 
         internal override IArrowArrayStream NewReader<T>(T statement, Schema 
schema) => new HiveServer2Reader(statement, schema, dataTypeConversion: 
statement.Connection.DataTypeConversion);
@@ -141,7 +144,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
             Uri baseAddress = GetBaseAddress(uri, hostName, path, port);
             AuthenticationHeaderValue? authenticationHeaderValue = 
GetAuthenticationHeaderValue(authTypeValue, token, username, password);
 
-            HttpClient httpClient = new();
+            HttpClientHandler httpClientHandler = NewHttpClientHandler();
+            HttpClient httpClient = new(httpClientHandler);
             httpClient.BaseAddress = baseAddress;
             httpClient.DefaultRequestHeaders.Authorization = 
authenticationHeaderValue;
             httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(s_userAgent);
@@ -154,6 +158,24 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
             return Task.FromResult<TTransport>(transport);
         }
 
+        private HttpClientHandler NewHttpClientHandler()
+        {
+            HttpClientHandler httpClientHandler = new();
+            if (TlsOptions != HiveServer2TlsOption.Empty)
+            {
+                httpClientHandler.ServerCertificateCustomValidationCallback = 
(request, certificate, chain, policyErrors) =>
+                {
+                    if (policyErrors == SslPolicyErrors.None) return true;
+
+                    return
+                       
(!policyErrors.HasFlag(SslPolicyErrors.RemoteCertificateChainErrors) || 
TlsOptions.HasFlag(HiveServer2TlsOption.AllowSelfSigned))
+                    && 
(!policyErrors.HasFlag(SslPolicyErrors.RemoteCertificateNameMismatch) || 
TlsOptions.HasFlag(HiveServer2TlsOption.AllowHostnameMismatch));
+                };
+            }
+
+            return httpClientHandler;
+        }
+
         private static AuthenticationHeaderValue? 
GetAuthenticationHeaderValue(SparkAuthType authType, string? token, string? 
username, string? password)
         {
             if (!string.IsNullOrEmpty(token) && (authType == 
SparkAuthType.Empty || authType == SparkAuthType.Token))
diff --git a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs 
b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
index f2251c648..a27873038 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
@@ -31,6 +31,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
         public const string AuthType = "adbc.spark.auth_type";
         public const string Type = "adbc.spark.type";
         public const string DataTypeConv = "adbc.spark.data_type_conv";
+        public const string TLSOptions = "adbc.spark.tls_options";
     }
 
     public static class SparkAuthTypeConstants
diff --git a/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs 
b/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
index a10e95329..992e5ffb1 100644
--- a/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
+++ b/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
@@ -25,8 +25,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
     public class HiveServer2ParametersTest
     {
         [SkippableTheory]
-        [MemberData(nameof(GetParametersTestData))]
-        internal void TestParametersParse(string? dataTypeConversion, 
DataTypeConversion expected, Type? exceptionType = default)
+        [MemberData(nameof(GetParametersDataTypeConvTestData))]
+        internal void TestParametersDataTypeConvParse(string? 
dataTypeConversion, DataTypeConversion expected, Type? exceptionType = default)
         {
             if (exceptionType == default)
                 Assert.Equal(expected, 
DataTypeConversionParser.Parse(dataTypeConversion));
@@ -34,7 +34,17 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
                 Assert.Throws(exceptionType, () => 
DataTypeConversionParser.Parse(dataTypeConversion));
         }
 
-        public static IEnumerable<object?[]> GetParametersTestData()
+        [SkippableTheory]
+        [MemberData(nameof(GetParametersTlsOptionTestData))]
+        internal void TestParametersTlsOptionParse(string? tlsOptions, 
HiveServer2TlsOption expected, Type? exceptionType = default)
+        {
+            if (exceptionType == default)
+                Assert.Equal(expected, TlsOptionsParser.Parse(tlsOptions));
+            else
+                Assert.Throws(exceptionType, () => 
TlsOptionsParser.Parse(tlsOptions));
+        }
+
+        public static IEnumerable<object?[]> 
GetParametersDataTypeConvTestData()
         {
             // Default
             yield return new object?[] { null, DataTypeConversion.Scalar };
@@ -58,5 +68,26 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
             yield return new object?[] { $"xxx", DataTypeConversion.Empty, 
typeof(ArgumentOutOfRangeException) };
             yield return new object?[] { $"none,scalar,xxx", 
DataTypeConversion.None | DataTypeConversion.Scalar, 
typeof(ArgumentOutOfRangeException)  };
         }
+
+        public static IEnumerable<object?[]> GetParametersTlsOptionTestData()
+        {
+            // Default
+            yield return new object?[] { null, HiveServer2TlsOption.Empty };
+            yield return new object?[] { "", HiveServer2TlsOption.Empty};
+            yield return new object?[] { " ", HiveServer2TlsOption.Empty };
+            // Explicit
+            yield return new object?[] { $"{TlsOptions.AllowSelfSigned}", 
HiveServer2TlsOption.AllowSelfSigned };
+            yield return new object?[] { 
$"{TlsOptions.AllowHostnameMismatch}", 
HiveServer2TlsOption.AllowHostnameMismatch };
+            // Ignore empty
+            yield return new object?[] { $",{TlsOptions.AllowSelfSigned}", 
HiveServer2TlsOption.AllowSelfSigned };
+            yield return new object?[] { 
$",{TlsOptions.AllowHostnameMismatch},", 
HiveServer2TlsOption.AllowHostnameMismatch };
+            // Combined, embedded space, mixed-case
+            yield return new object?[] { 
$"{TlsOptions.AllowSelfSigned},{TlsOptions.AllowHostnameMismatch}", 
HiveServer2TlsOption.AllowSelfSigned | 
HiveServer2TlsOption.AllowHostnameMismatch };
+            yield return new object?[] { 
$"{TlsOptions.AllowHostnameMismatch},{TlsOptions.AllowSelfSigned}", 
HiveServer2TlsOption.AllowSelfSigned  | 
HiveServer2TlsOption.AllowHostnameMismatch };
+            yield return new object?[] { $" {TlsOptions.AllowHostnameMismatch} 
, {TlsOptions.AllowSelfSigned} ", HiveServer2TlsOption.AllowSelfSigned | 
HiveServer2TlsOption.AllowHostnameMismatch };
+            yield return new object?[] { 
$"{TlsOptions.AllowSelfSigned.ToUpperInvariant()},{TlsOptions.AllowHostnameMismatch.ToUpperInvariant()}",
 HiveServer2TlsOption.AllowSelfSigned | 
HiveServer2TlsOption.AllowHostnameMismatch };
+            // Invalid
+            yield return new object?[] { 
$"xxx,{TlsOptions.AllowSelfSigned.ToUpperInvariant()},{TlsOptions.AllowHostnameMismatch.ToUpperInvariant()}",
 HiveServer2TlsOption.Empty, typeof(ArgumentOutOfRangeException) };
+        }
     }
 }
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs 
b/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs
index e8614d753..7eb513d31 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs
@@ -27,5 +27,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
 
         [JsonPropertyName("data_type_conv"), JsonIgnore(Condition = 
JsonIgnoreCondition.WhenWritingDefault)]
         public string DataTypeConversion { get; set; } = string.Empty;
+
+        [JsonPropertyName("tls_options"), JsonIgnore(Condition = 
JsonIgnoreCondition.WhenWritingDefault)]
+        public string TlsOptions { get; set; } = string.Empty;
     }
 }
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs 
b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
index 7b2a4fe41..7812aa363 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
@@ -98,6 +98,10 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
             {
                 parameters.Add(SparkParameters.DataTypeConv, 
testConfiguration.DataTypeConversion!);
             }
+            if (!string.IsNullOrEmpty(testConfiguration.TlsOptions))
+            {
+                parameters.Add(SparkParameters.TLSOptions, 
testConfiguration.TlsOptions!);
+            }
 
             return parameters;
         }

Reply via email to