This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 5f1f675f5 feat(csharp/src/Drivers/Apache/Spark): add
request_timeout_ms option to allow longer HTTP request length (#2218)
5f1f675f5 is described below
commit 5f1f675f53f7bb844fcb970f4d2f3c6d901ad5cb
Author: Bruce Irschick <[email protected]>
AuthorDate: Wed Oct 9 14:56:52 2024 -0700
feat(csharp/src/Drivers/Apache/Spark): add request_timeout_ms option to
allow longer HTTP request length (#2218)
Adds a new connection option to allow longer HTTP request length
| Property | Description | Default |
| :--- | :--- | :--- |
| `adbc.spark.http_request_timeout_ms` | Sets the timeout (in
milliseconds) when making requests to the Spark server (type: `http`).
Set the value higher than the default if you notice errors due to
network timeouts. | `30000` |
---
csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs | 2 ++
csharp/src/Drivers/Apache/Spark/README.md | 1 +
csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs | 15 +++++++++++++--
csharp/src/Drivers/Apache/Spark/SparkParameters.cs | 1 +
csharp/test/Drivers/Apache/ApacheTestConfiguration.cs | 3 +++
csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs | 5 +++++
csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs | 4 ++++
7 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
index 3878f771e..2853bbe04 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
@@ -88,6 +88,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
protected internal HiveServer2TlsOption TlsOptions { get; set; } =
HiveServer2TlsOption.Empty;
+ protected internal int HttpRequestTimeout { get; set; } = 30000;
+
protected abstract Task<TTransport> CreateTransportAsync();
protected abstract Task<TProtocol> CreateProtocolAsync(TTransport
transport);
diff --git a/csharp/src/Drivers/Apache/Spark/README.md
b/csharp/src/Drivers/Apache/Spark/README.md
index b35e9b16a..0fddb4838 100644
--- a/csharp/src/Drivers/Apache/Spark/README.md
+++ b/csharp/src/Drivers/Apache/Spark/README.md
@@ -37,6 +37,7 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
| `password` | The password for the user name used for basic
authentication. | |
| `adbc.spark.data_type_conv` | Comma-separated list of data conversion
options. Each option indicates the type of conversion to perform on data
returned from the Spark server. <br><br>Allowed values: `none`, `scalar`.
<br><br>Option `none` indicates there is no conversion from Spark type to
native type (i.e., no conversion from String to Timestamp for Apache Spark over
HTTP). Example `adbc.spark.conv_data_type=none`. <br><br>Option `scalar` will
perform conversion (if necessary) from th [...]
| `adbc.spark.tls_options` | Comma-separated list of TLS/SSL options. Each
option indicates the TLS/SSL option when connecting to a Spark server.
<br><br>Allowed values: `allow_self_signed`, `allow_hostname_mismatch`.
<br><br>Option `allow_self_signed` allows certificate errors due to an unknown
certificate authority, typically when using a self-signed certificate. Option
`allow_hostname_mismatch` allow certificate errors due to a mismatch of the
hostname. (e.g., when connecting through [...]
+| `adbc.spark.http_request_timeout_ms` | Sets the timeout (in milliseconds)
when making requests to the Spark server (type: `http`). Set the value higher
than the default if you notice errors due to network timeouts. | `30000` |
| `adbc.statement.batch_size` | Sets the maximum number of rows to retrieve in
a single batch request. | `50000` |
| `adbc.statement.polltime_milliseconds` | If polling is necessary to get a
result, this option sets the length of time (in milliseconds) to wait between
polls. | `500` |
diff --git a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
index cdc015f76..f67b3316e 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
@@ -18,6 +18,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
+using System.Globalization;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
@@ -118,7 +119,14 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
Properties.TryGetValue(SparkParameters.DataTypeConv, out string?
dataTypeConv);
DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv);
Properties.TryGetValue(SparkParameters.TLSOptions, out string?
tlsOptions);
- TlsOptions = Hive2.TlsOptionsParser.Parse(tlsOptions);
+ TlsOptions = TlsOptionsParser.Parse(tlsOptions);
+
Properties.TryGetValue(SparkParameters.HttpRequestTimeoutMilliseconds, out
string? requestTimeoutMs);
+ if (requestTimeoutMs != null)
+ {
+ HttpRequestTimeout = int.TryParse(requestTimeoutMs,
NumberStyles.Integer, CultureInfo.InvariantCulture, out int
requestTimeoutMsValue) && requestTimeoutMsValue > 0
+ ? requestTimeoutMsValue
+ : throw new
ArgumentOutOfRangeException(SparkParameters.HttpRequestTimeoutMilliseconds,
requestTimeoutMs, $"must be a value between 1 .. {int.MaxValue}. default is
30000 milliseconds.");
+ }
}
internal override IArrowArrayStream NewReader<T>(T statement, Schema
schema, CancellationToken cancellationToken = default) => new
HiveServer2Reader(statement, schema, dataTypeConversion:
statement.Connection.DataTypeConversion, cancellationToken);
@@ -154,7 +162,10 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
httpClient.DefaultRequestHeaders.ExpectContinue = false;
TConfiguration config = new();
- ThriftHttpTransport transport = new(httpClient, config);
+ ThriftHttpTransport transport = new(httpClient, config)
+ {
+ ConnectTimeout = HttpRequestTimeout,
+ };
return Task.FromResult<TTransport>(transport);
}
diff --git a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
index a27873038..4722efce5 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
@@ -32,6 +32,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
public const string Type = "adbc.spark.type";
public const string DataTypeConv = "adbc.spark.data_type_conv";
public const string TLSOptions = "adbc.spark.tls_options";
+ public const string HttpRequestTimeoutMilliseconds =
"adbc.spark.http_request_timeout_ms";
}
public static class SparkAuthTypeConstants
diff --git a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
index 2a46300e8..7d1229203 100644
--- a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
+++ b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
@@ -51,5 +51,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache
[JsonPropertyName("polltime_milliseconds"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
public string PollTimeMilliseconds { get; set; } = string.Empty;
+ [JsonPropertyName("http_request_timeout_ms"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
+ public string HttpRequestTimeoutMilliseconds { get; set; } =
string.Empty;
+
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
b/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
index 77d93f65f..c2faa9d12 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
@@ -85,6 +85,11 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [AdbcOptions.Uri] =
"httpxxz://hostname.com" }, typeof(ArgumentOutOfRangeException)));
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [AdbcOptions.Uri] =
"http-//hostname.com" }, typeof(UriFormatException)));
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [AdbcOptions.Uri] =
"httpxxz://hostname.com:1234567890" }, typeof(UriFormatException)));
+ Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword" ,
[SparkParameters.HttpRequestTimeoutMilliseconds] = "0" },
typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.HttpRequestTimeoutMilliseconds] = "-1" },
typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.HttpRequestTimeoutMilliseconds] = ((long)int.MaxValue +
1).ToString() }, typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.HttpRequestTimeoutMilliseconds] = "non-numeric" },
typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.HttpRequestTimeoutMilliseconds] = "" },
typeof(ArgumentOutOfRangeException)));
}
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
index 336ae9677..1b79facf4 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
@@ -110,6 +110,10 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
parameters.Add(HiveServer2Statement.Options.PollTimeMilliseconds,
testConfiguration.PollTimeMilliseconds!);
}
+ if
(!string.IsNullOrEmpty(testConfiguration.HttpRequestTimeoutMilliseconds))
+ {
+ parameters.Add(SparkParameters.HttpRequestTimeoutMilliseconds,
testConfiguration.HttpRequestTimeoutMilliseconds!);
+ }
return parameters;
}