This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 423162371 feat(csharp/src/Drivers/Apache/Spark): add option to ignore
TLS/SSL certificate exceptions (#2188)
423162371 is described below
commit 423162371b420e11d2603a8ab6775bd01c391d3c
Author: Bruce Irschick <[email protected]>
AuthorDate: Thu Sep 26 14:03:16 2024 -0700
feat(csharp/src/Drivers/Apache/Spark): add option to ignore TLS/SSL
certificate exceptions (#2188)
Add connection property with TLS/SSL options.
Options allow ignoring of server certificate errors typically due to
self-signed certificates and SSH tunneling.
| Property | Description | Default |
| :--- | :--- | :--- |
| `adbc.spark.tls_options` | Comma-separated list of TLS/SSL options.
Each option indicates the TLS/SSL option when connecting to a Spark
server. <br><br>Allowed values: `allow_self_signed`,
`allow_hostname_mismatch`. <br><br>Option `allow_self_signed` allows
certificate errors due to an unknown certificate authority, typically
when using a self-signed certificate. Option `allow_hostname_mismatch`
allow certificate errors due to a mismatch of the hostname. (e.g., when
connecting through an SSH tunnel). Example
`adbc.spark.tls_options=allow_self_signed` | |
---
.../Drivers/Apache/Hive2/HiveServer2Connection.cs | 2 +
.../Drivers/Apache/Hive2/HiveServer2Parameters.cs | 6 +++
.../Drivers/Apache/Hive2/HiveServer2TlsOption.cs | 53 ++++++++++++++++++++++
csharp/src/Drivers/Apache/Spark/README.md | 1 +
.../Apache/Spark/SparkDatabricksConnection.cs | 3 ++
.../Drivers/Apache/Spark/SparkHttpConnection.cs | 24 +++++++++-
csharp/src/Drivers/Apache/Spark/SparkParameters.cs | 1 +
.../Apache/Hive2/HiveServer2ParametersTest.cs | 37 +++++++++++++--
.../Drivers/Apache/Spark/SparkTestConfiguration.cs | 3 ++
.../Drivers/Apache/Spark/SparkTestEnvironment.cs | 4 ++
10 files changed, 130 insertions(+), 4 deletions(-)
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
index 2100f5744..5d614652e 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
@@ -73,6 +73,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
protected internal DataTypeConversion DataTypeConversion { get; set; }
= DataTypeConversion.None;
+ protected internal HiveServer2TlsOption TlsOptions { get; set; } =
HiveServer2TlsOption.Empty;
+
protected abstract Task<TTransport> CreateTransportAsync();
protected abstract Task<TProtocol> CreateProtocolAsync(TTransport
transport);
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs
index 5eec97823..2170cd17b 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs
@@ -24,4 +24,10 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
public const string None = "none";
public const string Scalar = "scalar";
}
+
+ public static class TlsOptions
+ {
+ public const string AllowSelfSigned = "allow_self_signed";
+ public const string AllowHostnameMismatch = "allow_hostname_mismatch";
+ }
}
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsOption.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsOption.cs
new file mode 100644
index 000000000..84f56a485
--- /dev/null
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsOption.cs
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
+{
+ [Flags]
+ internal enum HiveServer2TlsOption
+ {
+ Empty = 0,
+ AllowSelfSigned = 1,
+ AllowHostnameMismatch = 2,
+ }
+
+ internal static class TlsOptionsParser
+ {
+ internal const string SupportedList = TlsOptions.AllowSelfSigned + ","
+ TlsOptions.AllowHostnameMismatch;
+
+ internal static HiveServer2TlsOption Parse(string? tlsOptions)
+ {
+ HiveServer2TlsOption options = HiveServer2TlsOption.Empty;
+ if (tlsOptions == null) return options;
+
+ string[] valueList = tlsOptions.Split(',');
+ foreach (string tlsOption in valueList)
+ {
+ options |= (tlsOption?.Trim().ToLowerInvariant()) switch
+ {
+ null or "" => HiveServer2TlsOption.Empty,
+ TlsOptions.AllowSelfSigned =>
HiveServer2TlsOption.AllowSelfSigned,
+ TlsOptions.AllowHostnameMismatch =>
HiveServer2TlsOption.AllowHostnameMismatch,
+ _ => throw new
ArgumentOutOfRangeException(nameof(tlsOptions), tlsOption, "Invalid or
unsupported TLS option"),
+ };
+ }
+ return options;
+ }
+ }
+}
diff --git a/csharp/src/Drivers/Apache/Spark/README.md
b/csharp/src/Drivers/Apache/Spark/README.md
index 6928b9bb9..b35e9b16a 100644
--- a/csharp/src/Drivers/Apache/Spark/README.md
+++ b/csharp/src/Drivers/Apache/Spark/README.md
@@ -36,6 +36,7 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
| `username` | The user name used for basic authentication | |
| `password` | The password for the user name used for basic
authentication. | |
| `adbc.spark.data_type_conv` | Comma-separated list of data conversion
options. Each option indicates the type of conversion to perform on data
returned from the Spark server. <br><br>Allowed values: `none`, `scalar`.
<br><br>Option `none` indicates there is no conversion from Spark type to
native type (i.e., no conversion from String to Timestamp for Apache Spark over
HTTP). Example `adbc.spark.conv_data_type=none`. <br><br>Option `scalar` will
perform conversion (if necessary) from th [...]
+| `adbc.spark.tls_options` | Comma-separated list of TLS/SSL options. Each
option indicates the TLS/SSL option when connecting to a Spark server.
<br><br>Allowed values: `allow_self_signed`, `allow_hostname_mismatch`.
<br><br>Option `allow_self_signed` allows certificate errors due to an unknown
certificate authority, typically when using a self-signed certificate. Option
`allow_hostname_mismatch` allow certificate errors due to a mismatch of the
hostname. (e.g., when connecting through [...]
| `adbc.statement.batch_size` | Sets the maximum number of rows to retrieve in
a single batch request. | `50000` |
| `adbc.statement.polltime_milliseconds` | If polling is necessary to get a
result, this option sets the length of time (in milliseconds) to wait between
polls. | `500` |
diff --git a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
b/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
index a20964ce4..64bb8672a 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
@@ -49,6 +49,9 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
Properties.TryGetValue(SparkParameters.DataTypeConv, out string?
dataTypeConv);
// Note: In Databricks, scalar types are provided implicitly.
DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv);
+
+ Properties.TryGetValue(SparkParameters.TLSOptions, out string?
tlsOptions);
+ TlsOptions = TlsOptionsParser.Parse(tlsOptions);
}
protected override Task<TGetResultSetMetadataResp>
GetResultSetMetadataAsync(TGetSchemasResp response) =>
diff --git a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
index 8d7dc6fe6..c1199b1b5 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
@@ -21,6 +21,7 @@ using System.Diagnostics;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
+using System.Net.Security;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
@@ -116,6 +117,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
{
Properties.TryGetValue(SparkParameters.DataTypeConv, out string?
dataTypeConv);
DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv);
+ Properties.TryGetValue(SparkParameters.TLSOptions, out string?
tlsOptions);
+ TlsOptions = Hive2.TlsOptionsParser.Parse(tlsOptions);
}
internal override IArrowArrayStream NewReader<T>(T statement, Schema
schema) => new HiveServer2Reader(statement, schema, dataTypeConversion:
statement.Connection.DataTypeConversion);
@@ -141,7 +144,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
Uri baseAddress = GetBaseAddress(uri, hostName, path, port);
AuthenticationHeaderValue? authenticationHeaderValue =
GetAuthenticationHeaderValue(authTypeValue, token, username, password);
- HttpClient httpClient = new();
+ HttpClientHandler httpClientHandler = NewHttpClientHandler();
+ HttpClient httpClient = new(httpClientHandler);
httpClient.BaseAddress = baseAddress;
httpClient.DefaultRequestHeaders.Authorization =
authenticationHeaderValue;
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(s_userAgent);
@@ -154,6 +158,24 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
return Task.FromResult<TTransport>(transport);
}
+ private HttpClientHandler NewHttpClientHandler()
+ {
+ HttpClientHandler httpClientHandler = new();
+ if (TlsOptions != HiveServer2TlsOption.Empty)
+ {
+ httpClientHandler.ServerCertificateCustomValidationCallback =
(request, certificate, chain, policyErrors) =>
+ {
+ if (policyErrors == SslPolicyErrors.None) return true;
+
+ return
+
(!policyErrors.HasFlag(SslPolicyErrors.RemoteCertificateChainErrors) ||
TlsOptions.HasFlag(HiveServer2TlsOption.AllowSelfSigned))
+ &&
(!policyErrors.HasFlag(SslPolicyErrors.RemoteCertificateNameMismatch) ||
TlsOptions.HasFlag(HiveServer2TlsOption.AllowHostnameMismatch));
+ };
+ }
+
+ return httpClientHandler;
+ }
+
private static AuthenticationHeaderValue?
GetAuthenticationHeaderValue(SparkAuthType authType, string? token, string?
username, string? password)
{
if (!string.IsNullOrEmpty(token) && (authType ==
SparkAuthType.Empty || authType == SparkAuthType.Token))
diff --git a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
index f2251c648..a27873038 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
@@ -31,6 +31,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
public const string AuthType = "adbc.spark.auth_type";
public const string Type = "adbc.spark.type";
public const string DataTypeConv = "adbc.spark.data_type_conv";
+ public const string TLSOptions = "adbc.spark.tls_options";
}
public static class SparkAuthTypeConstants
diff --git a/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
b/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
index a10e95329..992e5ffb1 100644
--- a/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
+++ b/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
@@ -25,8 +25,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
public class HiveServer2ParametersTest
{
[SkippableTheory]
- [MemberData(nameof(GetParametersTestData))]
- internal void TestParametersParse(string? dataTypeConversion,
DataTypeConversion expected, Type? exceptionType = default)
+ [MemberData(nameof(GetParametersDataTypeConvTestData))]
+ internal void TestParametersDataTypeConvParse(string?
dataTypeConversion, DataTypeConversion expected, Type? exceptionType = default)
{
if (exceptionType == default)
Assert.Equal(expected,
DataTypeConversionParser.Parse(dataTypeConversion));
@@ -34,7 +34,17 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
Assert.Throws(exceptionType, () =>
DataTypeConversionParser.Parse(dataTypeConversion));
}
- public static IEnumerable<object?[]> GetParametersTestData()
+ [SkippableTheory]
+ [MemberData(nameof(GetParametersTlsOptionTestData))]
+ internal void TestParametersTlsOptionParse(string? tlsOptions,
HiveServer2TlsOption expected, Type? exceptionType = default)
+ {
+ if (exceptionType == default)
+ Assert.Equal(expected, TlsOptionsParser.Parse(tlsOptions));
+ else
+ Assert.Throws(exceptionType, () =>
TlsOptionsParser.Parse(tlsOptions));
+ }
+
+ public static IEnumerable<object?[]>
GetParametersDataTypeConvTestData()
{
// Default
yield return new object?[] { null, DataTypeConversion.Scalar };
@@ -58,5 +68,26 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
yield return new object?[] { $"xxx", DataTypeConversion.Empty,
typeof(ArgumentOutOfRangeException) };
yield return new object?[] { $"none,scalar,xxx",
DataTypeConversion.None | DataTypeConversion.Scalar,
typeof(ArgumentOutOfRangeException) };
}
+
+ public static IEnumerable<object?[]> GetParametersTlsOptionTestData()
+ {
+ // Default
+ yield return new object?[] { null, HiveServer2TlsOption.Empty };
+ yield return new object?[] { "", HiveServer2TlsOption.Empty};
+ yield return new object?[] { " ", HiveServer2TlsOption.Empty };
+ // Explicit
+ yield return new object?[] { $"{TlsOptions.AllowSelfSigned}",
HiveServer2TlsOption.AllowSelfSigned };
+ yield return new object?[] {
$"{TlsOptions.AllowHostnameMismatch}",
HiveServer2TlsOption.AllowHostnameMismatch };
+ // Ignore empty
+ yield return new object?[] { $",{TlsOptions.AllowSelfSigned}",
HiveServer2TlsOption.AllowSelfSigned };
+ yield return new object?[] {
$",{TlsOptions.AllowHostnameMismatch},",
HiveServer2TlsOption.AllowHostnameMismatch };
+ // Combined, embedded space, mixed-case
+ yield return new object?[] {
$"{TlsOptions.AllowSelfSigned},{TlsOptions.AllowHostnameMismatch}",
HiveServer2TlsOption.AllowSelfSigned |
HiveServer2TlsOption.AllowHostnameMismatch };
+ yield return new object?[] {
$"{TlsOptions.AllowHostnameMismatch},{TlsOptions.AllowSelfSigned}",
HiveServer2TlsOption.AllowSelfSigned |
HiveServer2TlsOption.AllowHostnameMismatch };
+ yield return new object?[] { $" {TlsOptions.AllowHostnameMismatch}
, {TlsOptions.AllowSelfSigned} ", HiveServer2TlsOption.AllowSelfSigned |
HiveServer2TlsOption.AllowHostnameMismatch };
+ yield return new object?[] {
$"{TlsOptions.AllowSelfSigned.ToUpperInvariant()},{TlsOptions.AllowHostnameMismatch.ToUpperInvariant()}",
HiveServer2TlsOption.AllowSelfSigned |
HiveServer2TlsOption.AllowHostnameMismatch };
+ // Invalid
+ yield return new object?[] {
$"xxx,{TlsOptions.AllowSelfSigned.ToUpperInvariant()},{TlsOptions.AllowHostnameMismatch.ToUpperInvariant()}",
HiveServer2TlsOption.Empty, typeof(ArgumentOutOfRangeException) };
+ }
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs
b/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs
index e8614d753..7eb513d31 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestConfiguration.cs
@@ -27,5 +27,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[JsonPropertyName("data_type_conv"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
public string DataTypeConversion { get; set; } = string.Empty;
+
+ [JsonPropertyName("tls_options"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
+ public string TlsOptions { get; set; } = string.Empty;
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
index 7b2a4fe41..7812aa363 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
@@ -98,6 +98,10 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
parameters.Add(SparkParameters.DataTypeConv,
testConfiguration.DataTypeConversion!);
}
+ if (!string.IsNullOrEmpty(testConfiguration.TlsOptions))
+ {
+ parameters.Add(SparkParameters.TLSOptions,
testConfiguration.TlsOptions!);
+ }
return parameters;
}