This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 5f1f675f5 feat(csharp/src/Drivers/Apache/Spark): add 
request_timeout_ms option to allow longer HTTP request length (#2218)
5f1f675f5 is described below

commit 5f1f675f53f7bb844fcb970f4d2f3c6d901ad5cb
Author: Bruce Irschick <[email protected]>
AuthorDate: Wed Oct 9 14:56:52 2024 -0700

    feat(csharp/src/Drivers/Apache/Spark): add request_timeout_ms option to 
allow longer HTTP request length (#2218)
    
    Adds a new connection option to allow longer HTTP request length
    
    | Property               | Description | Default |
    | :---                   | :---        | :---    |
    | `adbc.spark.http_request_timeout_ms` | Sets the timeout (in
    milliseconds) when making requests to the Spark server (type: `http`).
    Set the value higher than the default if you notice errors due to
    network timeouts. | `30000` |
---
 csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs |  2 ++
 csharp/src/Drivers/Apache/Spark/README.md                |  1 +
 csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs   | 15 +++++++++++++--
 csharp/src/Drivers/Apache/Spark/SparkParameters.cs       |  1 +
 csharp/test/Drivers/Apache/ApacheTestConfiguration.cs    |  3 +++
 csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs  |  5 +++++
 csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs |  4 ++++
 7 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs 
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
index 3878f771e..2853bbe04 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Connection.cs
@@ -88,6 +88,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
 
         protected internal HiveServer2TlsOption TlsOptions { get; set; } = 
HiveServer2TlsOption.Empty;
 
+        protected internal int HttpRequestTimeout { get; set; } = 30000;
+
         protected abstract Task<TTransport> CreateTransportAsync();
 
         protected abstract Task<TProtocol> CreateProtocolAsync(TTransport 
transport);
diff --git a/csharp/src/Drivers/Apache/Spark/README.md 
b/csharp/src/Drivers/Apache/Spark/README.md
index b35e9b16a..0fddb4838 100644
--- a/csharp/src/Drivers/Apache/Spark/README.md
+++ b/csharp/src/Drivers/Apache/Spark/README.md
@@ -37,6 +37,7 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
 | `password`             | The password for the user name used for basic 
authentication. | |
 | `adbc.spark.data_type_conv` | Comma-separated list of data conversion 
options. Each option indicates the type of conversion to perform on data 
returned from the Spark server. <br><br>Allowed values: `none`, `scalar`. 
<br><br>Option `none` indicates there is no conversion from Spark type to 
native type (i.e., no conversion from String to Timestamp for Apache Spark over 
HTTP). Example `adbc.spark.conv_data_type=none`. <br><br>Option `scalar` will 
perform conversion (if necessary) from th [...]
 | `adbc.spark.tls_options` | Comma-separated list of TLS/SSL options. Each 
option indicates the TLS/SSL option when connecting to a Spark server. 
<br><br>Allowed values: `allow_self_signed`, `allow_hostname_mismatch`. 
<br><br>Option `allow_self_signed` allows certificate errors due to an unknown 
certificate authority, typically when using a self-signed certificate. Option 
`allow_hostname_mismatch` allow certificate errors due to a mismatch of the 
hostname. (e.g., when connecting through  [...]
+| `adbc.spark.http_request_timeout_ms` | Sets the timeout (in milliseconds) 
when making requests to the Spark server (type: `http`). Set the value higher 
than the default if you notice errors due to network timeouts. | `30000` |
 | `adbc.statement.batch_size` | Sets the maximum number of rows to retrieve in 
a single batch request. | `50000` |
 | `adbc.statement.polltime_milliseconds` | If polling is necessary to get a 
result, this option sets the length of time (in milliseconds) to wait between 
polls. | `500` |
 
diff --git a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs 
b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
index cdc015f76..f67b3316e 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
@@ -18,6 +18,7 @@
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Globalization;
 using System.Net;
 using System.Net.Http;
 using System.Net.Http.Headers;
@@ -118,7 +119,14 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
             Properties.TryGetValue(SparkParameters.DataTypeConv, out string? 
dataTypeConv);
             DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv);
             Properties.TryGetValue(SparkParameters.TLSOptions, out string? 
tlsOptions);
-            TlsOptions = Hive2.TlsOptionsParser.Parse(tlsOptions);
+            TlsOptions = TlsOptionsParser.Parse(tlsOptions);
+            
Properties.TryGetValue(SparkParameters.HttpRequestTimeoutMilliseconds, out 
string? requestTimeoutMs);
+            if (requestTimeoutMs != null)
+            {
+                HttpRequestTimeout = int.TryParse(requestTimeoutMs, 
NumberStyles.Integer, CultureInfo.InvariantCulture, out int 
requestTimeoutMsValue) && requestTimeoutMsValue > 0
+                    ? requestTimeoutMsValue
+                    : throw new 
ArgumentOutOfRangeException(SparkParameters.HttpRequestTimeoutMilliseconds, 
requestTimeoutMs, $"must be a value between 1 .. {int.MaxValue}. default is 
30000 milliseconds.");
+            }
         }
 
         internal override IArrowArrayStream NewReader<T>(T statement, Schema 
schema, CancellationToken cancellationToken = default) => new 
HiveServer2Reader(statement, schema, dataTypeConversion: 
statement.Connection.DataTypeConversion, cancellationToken);
@@ -154,7 +162,10 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
             httpClient.DefaultRequestHeaders.ExpectContinue = false;
 
             TConfiguration config = new();
-            ThriftHttpTransport transport = new(httpClient, config);
+            ThriftHttpTransport transport = new(httpClient, config)
+            {
+                ConnectTimeout = HttpRequestTimeout,
+            };
             return Task.FromResult<TTransport>(transport);
         }
 
diff --git a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs 
b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
index a27873038..4722efce5 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
@@ -32,6 +32,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
         public const string Type = "adbc.spark.type";
         public const string DataTypeConv = "adbc.spark.data_type_conv";
         public const string TLSOptions = "adbc.spark.tls_options";
+        public const string HttpRequestTimeoutMilliseconds = 
"adbc.spark.http_request_timeout_ms";
     }
 
     public static class SparkAuthTypeConstants
diff --git a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs 
b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
index 2a46300e8..7d1229203 100644
--- a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
+++ b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
@@ -51,5 +51,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache
         [JsonPropertyName("polltime_milliseconds"), JsonIgnore(Condition = 
JsonIgnoreCondition.WhenWritingDefault)]
         public string PollTimeMilliseconds { get; set; } = string.Empty;
 
+        [JsonPropertyName("http_request_timeout_ms"), JsonIgnore(Condition = 
JsonIgnoreCondition.WhenWritingDefault)]
+        public string HttpRequestTimeoutMilliseconds { get; set; } = 
string.Empty;
+
     }
 }
diff --git a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs 
b/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
index 77d93f65f..c2faa9d12 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
@@ -85,6 +85,11 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
                 Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] = 
"valid.server.com", [SparkParameters.Token] = "abcdef", [AdbcOptions.Uri] = 
"httpxxz://hostname.com" }, typeof(ArgumentOutOfRangeException)));
                 Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] = 
"valid.server.com", [SparkParameters.Token] = "abcdef", [AdbcOptions.Uri] = 
"http-//hostname.com" }, typeof(UriFormatException)));
                 Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] = 
"valid.server.com", [SparkParameters.Token] = "abcdef", [AdbcOptions.Uri] = 
"httpxxz://hostname.com:1234567890" }, typeof(UriFormatException)));
+                Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com", 
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword" , 
[SparkParameters.HttpRequestTimeoutMilliseconds] = "0" }, 
typeof(ArgumentOutOfRangeException)));
+                Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com", 
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword", 
[SparkParameters.HttpRequestTimeoutMilliseconds] = "-1" }, 
typeof(ArgumentOutOfRangeException)));
+                Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com", 
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword", 
[SparkParameters.HttpRequestTimeoutMilliseconds] = ((long)int.MaxValue + 
1).ToString() }, typeof(ArgumentOutOfRangeException)));
+                Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com", 
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword", 
[SparkParameters.HttpRequestTimeoutMilliseconds] = "non-numeric" }, 
typeof(ArgumentOutOfRangeException)));
+                Add(new(new() { [SparkParameters.Type] = 
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com", 
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword", 
[SparkParameters.HttpRequestTimeoutMilliseconds] = "" }, 
typeof(ArgumentOutOfRangeException)));
             }
         }
     }
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs 
b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
index 336ae9677..1b79facf4 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
@@ -110,6 +110,10 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
             {
                 
parameters.Add(HiveServer2Statement.Options.PollTimeMilliseconds, 
testConfiguration.PollTimeMilliseconds!);
             }
+            if 
(!string.IsNullOrEmpty(testConfiguration.HttpRequestTimeoutMilliseconds))
+            {
+                parameters.Add(SparkParameters.HttpRequestTimeoutMilliseconds, 
testConfiguration.HttpRequestTimeoutMilliseconds!);
+            }
 
             return parameters;
         }

Reply via email to