birschick-bq commented on code in PR #2376:
URL: https://github.com/apache/arrow-adbc/pull/2376#discussion_r1890817780
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2SslImpl.cs:
##########
@@ -0,0 +1,72 @@
+using System;
Review Comment:
Needs copyright/license comments.
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs:
##########
@@ -112,10 +112,9 @@ internal async Task OpenAsync()
protected internal DataTypeConversion DataTypeConversion { get; set; }
= DataTypeConversion.None;
- protected internal HiveServer2TlsOption TlsOptions { get; set; } =
HiveServer2TlsOption.Empty;
+ protected internal Dictionary<string, string> tlsOptions { get; set; }
= new Dictionary<string, string>();
Review Comment:
I'm not a fan of using a Dictionary of string options. It means the
TryParse/Parse/Convert step needs to be done multiple times. Could you make
TlsOptions a `class` instead of `enum`?
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2SslImpl.cs:
##########
@@ -0,0 +1,72 @@
+using System;
+using System.Collections.Generic;
+using System.Net.Security;
+using System.Security.Cryptography.X509Certificates;
+using System.Net.Http;
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
+{
+ static class HiveServer2SslImpl
+ {
+ static internal Dictionary<string, string>
ValidateTlsOptions(IReadOnlyDictionary<string, string> Properties)
+ {
+ Dictionary<string, string> tlsOptions = new Dictionary<string,
string>();
+ Properties.TryGetValue(TlsOptions.IsSslEnabled, out string?
isSslEnabled);
+ if (!Convert.ToBoolean(isSslEnabled))
+ {
+ tlsOptions[TlsOptions.IsSslEnabled] = "False";
+ return tlsOptions;
+ }
+ tlsOptions[TlsOptions.IsSslEnabled] = "True";
+ Properties.TryGetValue(TlsOptions.AllowSelfSigned, out string?
allowSelfSigned);
+ if (!Convert.ToBoolean(allowSelfSigned))
+ {
+ tlsOptions[TlsOptions.AllowSelfSigned] = "False";
+ return tlsOptions;
+ }
+ tlsOptions[TlsOptions.AllowSelfSigned] = "True";
+ Properties.TryGetValue(TlsOptions.AllowHostnameMismatch, out
string? allowHostnameMismatch);
+ tlsOptions[TlsOptions.AllowHostnameMismatch] =
(allowHostnameMismatch != null) ? allowHostnameMismatch : "False";
+ Properties.TryGetValue(TlsOptions.TrustedCertificatePath, out
string? trustedCertificatePath);
+ tlsOptions[TlsOptions.TrustedCertificatePath] =
(trustedCertificatePath != null) ? trustedCertificatePath : "";
+ return tlsOptions;
+ }
+ static internal HttpClientHandler
NewHttpClientHandler(Dictionary<string, string> tlsOptions)
+ {
+ HttpClientHandler httpClientHandler = new();
+ tlsOptions.TryGetValue(TlsOptions.IsSslEnabled, out string? isSSl);
+ if (Convert.ToBoolean(isSSl))
+ {
+ tlsOptions.TryGetValue(TlsOptions.AllowSelfSigned, out string?
allowSelfSigned);
+ tlsOptions.TryGetValue(TlsOptions.AllowHostnameMismatch, out
string? allowHostnameMismatch);
+ tlsOptions.TryGetValue(TlsOptions.TrustedCertificatePath, out
string? trustedCertificatePath);
+ httpClientHandler.ServerCertificateCustomValidationCallback =
(request, certificate, chain, policyErrors) =>
+ {
+ if (policyErrors == SslPolicyErrors.None) return true;
+ if (trustedCertificatePath == null ||
trustedCertificatePath == "")
Review Comment:
```suggestion
if (string.IsNullOrEmpty(trustedCertificatePath))
```
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2SslImpl.cs:
##########
@@ -0,0 +1,72 @@
+using System;
+using System.Collections.Generic;
+using System.Net.Security;
+using System.Security.Cryptography.X509Certificates;
+using System.Net.Http;
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
+{
+ static class HiveServer2SslImpl
+ {
+ static internal Dictionary<string, string>
ValidateTlsOptions(IReadOnlyDictionary<string, string> Properties)
+ {
+ Dictionary<string, string> tlsOptions = new Dictionary<string,
string>();
+ Properties.TryGetValue(TlsOptions.IsSslEnabled, out string?
isSslEnabled);
+ if (!Convert.ToBoolean(isSslEnabled))
+ {
+ tlsOptions[TlsOptions.IsSslEnabled] = "False";
+ return tlsOptions;
+ }
+ tlsOptions[TlsOptions.IsSslEnabled] = "True";
+ Properties.TryGetValue(TlsOptions.AllowSelfSigned, out string?
allowSelfSigned);
+ if (!Convert.ToBoolean(allowSelfSigned))
+ {
+ tlsOptions[TlsOptions.AllowSelfSigned] = "False";
+ return tlsOptions;
+ }
+ tlsOptions[TlsOptions.AllowSelfSigned] = "True";
+ Properties.TryGetValue(TlsOptions.AllowHostnameMismatch, out
string? allowHostnameMismatch);
+ tlsOptions[TlsOptions.AllowHostnameMismatch] =
(allowHostnameMismatch != null) ? allowHostnameMismatch : "False";
+ Properties.TryGetValue(TlsOptions.TrustedCertificatePath, out
string? trustedCertificatePath);
+ tlsOptions[TlsOptions.TrustedCertificatePath] =
(trustedCertificatePath != null) ? trustedCertificatePath : "";
+ return tlsOptions;
+ }
+ static internal HttpClientHandler
NewHttpClientHandler(Dictionary<string, string> tlsOptions)
Review Comment:
nit:
```suggestion
static internal HttpClientHandler
NewHttpClientHandler(Dictionary<string, string> tlsOptions)
```
##########
csharp/src/Drivers/Apache/Spark/README.md:
##########
@@ -41,6 +41,11 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
| `adbc.apache.statement.batch_size` | Sets the maximum number of rows to
retrieve in a single batch request. | `50000` |
| `adbc.apache.statement.polltime_ms` | If polling is necessary to get a
result, this option sets the length of time (in milliseconds) to wait between
polls. | `500` |
| `adbc.apache.statement.query_timeout_s` | Sets the maximum time (in seconds)
for a query to complete. Values can be 0 (infinite) or greater than zero. |
`60` |
+| `ssl` | If ssl needs to enabled or not | `false` |
+| `allow_self_signed` | If self signed ssl certificate needs to be allowed or
not | `false` |
+| `allow_hostname_mismatch` | If hostname mismatch is allowed for ssl |
`false` |
Review Comment:
`allow_self_signed` and `allow_hostname_mismatch` are already defined in the
`adbc.spark.tls_options`. No need to redefine them.
You could add the `tls`, or `enable` to `adbc.spark.tls_options` to
explicitly enable tls/ssl?
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs:
##########
@@ -112,10 +112,9 @@ internal async Task OpenAsync()
protected internal DataTypeConversion DataTypeConversion { get; set; }
= DataTypeConversion.None;
- protected internal HiveServer2TlsOption TlsOptions { get; set; } =
HiveServer2TlsOption.Empty;
+ protected internal Dictionary<string, string> tlsOptions { get; set; }
= new Dictionary<string, string>();
Review Comment:
nit: properties should start with a capital letter
```suggestion
protected internal Dictionary<string, string> TlsOptions { get; set;
} = new Dictionary<string, string>();
```
##########
csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs:
##########
@@ -34,16 +34,6 @@ internal void TestParametersDataTypeConvParse(string?
dataTypeConversion, DataTy
Assert.Throws(exceptionType, () =>
DataTypeConversionParser.Parse(dataTypeConversion));
}
- [SkippableTheory]
- [MemberData(nameof(GetParametersTlsOptionTestData))]
- internal void TestParametersTlsOptionParse(string? tlsOptions,
HiveServer2TlsOption expected, Type? exceptionType = default)
Review Comment:
Will you add some tests for the options?
##########
csharp/src/Drivers/Apache/Spark/SparkConnection.cs:
##########
@@ -258,6 +258,7 @@ internal SparkConnection(IReadOnlyDictionary<string,
string> properties)
private void ValidateProperties()
{
+ tlsOptions = HiveServer2SslImpl.ValidateTlsOptions(Properties);
Review Comment:
Can we put this inside of `ValidateOptions`? I'd prefer we validate the
Authentication/Connection first.
##########
csharp/src/Drivers/Apache/Spark/README.md:
##########
@@ -41,6 +41,11 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
| `adbc.apache.statement.batch_size` | Sets the maximum number of rows to
retrieve in a single batch request. | `50000` |
| `adbc.apache.statement.polltime_ms` | If polling is necessary to get a
result, this option sets the length of time (in milliseconds) to wait between
polls. | `500` |
| `adbc.apache.statement.query_timeout_s` | Sets the maximum time (in seconds)
for a query to complete. Values can be 0 (infinite) or greater than zero. |
`60` |
+| `ssl` | If ssl needs to enabled or not | `false` |
+| `allow_self_signed` | If self signed ssl certificate needs to be allowed or
not | `false` |
+| `allow_hostname_mismatch` | If hostname mismatch is allowed for ssl |
`false` |
+| `trusted_certificate_path` | The full path of the ssl certificate file
containing custom CA certificates for verifying the server when connecting over
TLS | `` |
Review Comment:
We need to continue to use namespace/canonical name convention.
```suggestion
| `adbc.spark.trusted_certificate_path` | The full path of the tls/ssl
certificate file containing custom CA certificates for verifying the server
when connecting over TLS | |
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]