This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 155c2f519 fix(csharp/src/Drivers/Apache/Spark): correct BatchSize 
implementation for base reader (#2199)
155c2f519 is described below

commit 155c2f519dc71cc13076a39d755d087a5c089b2c
Author: Bruce Irschick <[email protected]>
AuthorDate: Mon Sep 30 11:55:43 2024 -0700

    fix(csharp/src/Drivers/Apache/Spark): correct BatchSize implementation for 
base reader (#2199)
    
    Fixes
    1. How HiveServer2Statement.BatchSize is used in the HiveServer2Reader
    2. Batch size valid range
    3. Test cases
---
 .../src/Drivers/Apache/Hive2/HiveServer2Reader.cs  |  7 ++-----
 .../Drivers/Apache/Hive2/HiveServer2Statement.cs   |  6 +++---
 .../Apache.Arrow.Adbc.Tests/TestConfiguration.cs   |  5 ++++-
 .../test/Drivers/Apache/ApacheTestConfiguration.cs |  6 ++++++
 .../Drivers/Apache/Spark/SparkTestEnvironment.cs   |  8 ++++++++
 csharp/test/Drivers/Apache/Spark/StatementTests.cs | 22 +++++++++++++++++++---
 6 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs 
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
index e72076b13..828a5fe58 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
@@ -37,7 +37,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
         private const char AsciiPeriod = '.';
 
         private HiveServer2Statement? _statement;
-        private readonly long _batchSize;
         private readonly DataTypeConversion _dataTypeConversion;
         private static readonly IReadOnlyDictionary<ArrowTypeId, 
Func<StringArray, IArrowType, IArrowArray>> s_arrowStringConverters =
             new Dictionary<ArrowTypeId, Func<StringArray, IArrowType, 
IArrowArray>>()
@@ -51,12 +50,10 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
         public HiveServer2Reader(
             HiveServer2Statement statement,
             Schema schema,
-            DataTypeConversion dataTypeConversion,
-            long batchSize = HiveServer2Connection.BatchSizeDefault)
+            DataTypeConversion dataTypeConversion)
         {
             _statement = statement;
             Schema = schema;
-            _batchSize = batchSize;
             _dataTypeConversion = dataTypeConversion;
         }
 
@@ -69,7 +66,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                 return null;
             }
 
-            var request = new TFetchResultsReq(_statement.OperationHandle, 
TFetchOrientation.FETCH_NEXT, _batchSize);
+            var request = new TFetchResultsReq(_statement.OperationHandle, 
TFetchOrientation.FETCH_NEXT, _statement.BatchSize);
             TFetchResultsResp response = await 
_statement.Connection.Client.FetchResults(request, cancellationToken);
 
             int columnCount = response.Results.Columns.Count;
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs 
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs
index 97594161e..824feceb9 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs
@@ -143,11 +143,11 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
 
         private void UpdatePollTimeIfValid(string key, string value) => 
PollTimeMilliseconds = !string.IsNullOrEmpty(key) && int.TryParse(value, 
result: out int pollTimeMilliseconds) && pollTimeMilliseconds >= 0
             ? pollTimeMilliseconds
-            : throw new ArgumentException($"The value '{value}' for option 
'{key}' is invalid. Must be a numeric value greater than or equal to zero.", 
nameof(value));
+            : throw new ArgumentOutOfRangeException(key, value, $"The value 
'{value}' for option '{key}' is invalid. Must be a numeric value greater than 
or equal to -1.");
 
-        private void UpdateBatchSizeIfValid(string key, string value) => 
BatchSize = !string.IsNullOrEmpty(value) && int.TryParse(value, out int 
batchSize) && batchSize > 0
+        private void UpdateBatchSizeIfValid(string key, string value) => 
BatchSize = !string.IsNullOrEmpty(value) && long.TryParse(value, out long 
batchSize) && batchSize > 0
             ? batchSize
-            : throw new ArgumentException($"The value '{value}' for option 
'{key}' is invalid. Must be a numeric value greater than zero.", nameof(value));
+            : throw new ArgumentOutOfRangeException(key, value, $"The value 
'{value}' for option '{key}' is invalid. Must be a numeric value greater than 
zero.");
 
         public override void Dispose()
         {
diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs 
b/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs
index 6365acd50..36a46a55a 100644
--- a/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs
+++ b/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs
@@ -15,6 +15,7 @@
 * limitations under the License.
 */
 
+using System;
 using System.Text.Json.Serialization;
 
 namespace Apache.Arrow.Adbc.Tests
@@ -22,7 +23,7 @@ namespace Apache.Arrow.Adbc.Tests
     /// <summary>
     /// Base test configuration values.
     /// </summary>
-    public abstract class TestConfiguration
+    public abstract class TestConfiguration : ICloneable
     {
         /// <summary>
         /// The query to run.
@@ -41,6 +42,8 @@ namespace Apache.Arrow.Adbc.Tests
         /// </summary>
         [JsonPropertyName("metadata")]
         public TestMetadata Metadata { get; set; } = new TestMetadata();
+
+        public virtual object Clone() => MemberwiseClone();
     }
 
     /// <summary>
diff --git a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs 
b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
index 056d3890b..2a46300e8 100644
--- a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
+++ b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
@@ -45,5 +45,11 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache
         [JsonPropertyName("uri"), JsonIgnore(Condition = 
JsonIgnoreCondition.WhenWritingDefault)]
         public string Uri { get; set; } = string.Empty;
 
+        [JsonPropertyName("batch_size"), JsonIgnore(Condition = 
JsonIgnoreCondition.WhenWritingDefault)]
+        public string BatchSize { get; set; } = string.Empty;
+
+        [JsonPropertyName("polltime_milliseconds"), JsonIgnore(Condition = 
JsonIgnoreCondition.WhenWritingDefault)]
+        public string PollTimeMilliseconds { get; set; } = string.Empty;
+
     }
 }
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs 
b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
index 7812aa363..336ae9677 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
@@ -102,6 +102,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
             {
                 parameters.Add(SparkParameters.TLSOptions, 
testConfiguration.TlsOptions!);
             }
+            if (!string.IsNullOrEmpty(testConfiguration.BatchSize))
+            {
+                parameters.Add(HiveServer2Statement.Options.BatchSize, 
testConfiguration.BatchSize!);
+            }
+            if (!string.IsNullOrEmpty(testConfiguration.PollTimeMilliseconds))
+            {
+                
parameters.Add(HiveServer2Statement.Options.PollTimeMilliseconds, 
testConfiguration.PollTimeMilliseconds!);
+            }
 
             return parameters;
         }
diff --git a/csharp/test/Drivers/Apache/Spark/StatementTests.cs 
b/csharp/test/Drivers/Apache/Spark/StatementTests.cs
index 783d9fd33..08a738c76 100644
--- a/csharp/test/Drivers/Apache/Spark/StatementTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/StatementTests.cs
@@ -55,10 +55,17 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
         [InlineData("2147483647")]
         public void CanSetOptionPollTime(string value, bool throws = false)
         {
+            var testConfiguration = TestConfiguration.Clone() as 
SparkTestConfiguration;
+            testConfiguration!.PollTimeMilliseconds = value;
+            if (throws)
+            {
+                Assert.Throws<ArgumentOutOfRangeException>(() => 
NewConnection(testConfiguration).CreateStatement());
+            }
+
             AdbcStatement statement = NewConnection().CreateStatement();
             if (throws)
             {
-                Assert.Throws<ArgumentException>(() => 
statement.SetOption(SparkStatement.Options.PollTimeMilliseconds, value));
+                Assert.Throws<ArgumentOutOfRangeException>(() => 
statement.SetOption(SparkStatement.Options.PollTimeMilliseconds, value));
             }
             else
             {
@@ -73,16 +80,25 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
         [InlineData("-1", true)]
         [InlineData("one", true)]
         [InlineData("-2147483648", true)]
-        [InlineData("2147483648", true)]
+        [InlineData("2147483648", false)]
+        [InlineData("9223372036854775807", false)]
+        [InlineData("9223372036854775808", true)]
         [InlineData("0", true)]
         [InlineData("1")]
         [InlineData("2147483647")]
         public void CanSetOptionBatchSize(string value, bool throws = false)
         {
+            var testConfiguration = TestConfiguration.Clone() as 
SparkTestConfiguration;
+            testConfiguration!.BatchSize = value;
+            if (throws)
+            {
+                Assert.Throws<ArgumentOutOfRangeException>(() => 
NewConnection(testConfiguration).CreateStatement());
+            }
+
             AdbcStatement statement = NewConnection().CreateStatement();
             if (throws)
             {
-                Assert.Throws<ArgumentException>(() => 
statement.SetOption(SparkStatement.Options.BatchSize, value));
+                Assert.Throws<ArgumentOutOfRangeException>(() => 
statement!.SetOption(SparkStatement.Options.BatchSize, value));
             }
             else
             {

Reply via email to