This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 155c2f519 fix(csharp/src/Drivers/Apache/Spark): correct BatchSize
implementation for base reader (#2199)
155c2f519 is described below
commit 155c2f519dc71cc13076a39d755d087a5c089b2c
Author: Bruce Irschick <[email protected]>
AuthorDate: Mon Sep 30 11:55:43 2024 -0700
fix(csharp/src/Drivers/Apache/Spark): correct BatchSize implementation for
base reader (#2199)
Fixes
1. How HiveServer2Statement.BatchSize is used in the HiveServer2Reader
2. Batch size valid range
3. Test cases
---
.../src/Drivers/Apache/Hive2/HiveServer2Reader.cs | 7 ++-----
.../Drivers/Apache/Hive2/HiveServer2Statement.cs | 6 +++---
.../Apache.Arrow.Adbc.Tests/TestConfiguration.cs | 5 ++++-
.../test/Drivers/Apache/ApacheTestConfiguration.cs | 6 ++++++
.../Drivers/Apache/Spark/SparkTestEnvironment.cs | 8 ++++++++
csharp/test/Drivers/Apache/Spark/StatementTests.cs | 22 +++++++++++++++++++---
6 files changed, 42 insertions(+), 12 deletions(-)
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
index e72076b13..828a5fe58 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
@@ -37,7 +37,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
private const char AsciiPeriod = '.';
private HiveServer2Statement? _statement;
- private readonly long _batchSize;
private readonly DataTypeConversion _dataTypeConversion;
private static readonly IReadOnlyDictionary<ArrowTypeId,
Func<StringArray, IArrowType, IArrowArray>> s_arrowStringConverters =
new Dictionary<ArrowTypeId, Func<StringArray, IArrowType,
IArrowArray>>()
@@ -51,12 +50,10 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
public HiveServer2Reader(
HiveServer2Statement statement,
Schema schema,
- DataTypeConversion dataTypeConversion,
- long batchSize = HiveServer2Connection.BatchSizeDefault)
+ DataTypeConversion dataTypeConversion)
{
_statement = statement;
Schema = schema;
- _batchSize = batchSize;
_dataTypeConversion = dataTypeConversion;
}
@@ -69,7 +66,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
return null;
}
- var request = new TFetchResultsReq(_statement.OperationHandle,
TFetchOrientation.FETCH_NEXT, _batchSize);
+ var request = new TFetchResultsReq(_statement.OperationHandle,
TFetchOrientation.FETCH_NEXT, _statement.BatchSize);
TFetchResultsResp response = await
_statement.Connection.Client.FetchResults(request, cancellationToken);
int columnCount = response.Results.Columns.Count;
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs
index 97594161e..824feceb9 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs
@@ -143,11 +143,11 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
private void UpdatePollTimeIfValid(string key, string value) =>
PollTimeMilliseconds = !string.IsNullOrEmpty(key) && int.TryParse(value,
result: out int pollTimeMilliseconds) && pollTimeMilliseconds >= 0
? pollTimeMilliseconds
- : throw new ArgumentException($"The value '{value}' for option
'{key}' is invalid. Must be a numeric value greater than or equal to zero.",
nameof(value));
+ : throw new ArgumentOutOfRangeException(key, value, $"The value
'{value}' for option '{key}' is invalid. Must be a numeric value greater than
or equal to -1.");
- private void UpdateBatchSizeIfValid(string key, string value) =>
BatchSize = !string.IsNullOrEmpty(value) && int.TryParse(value, out int
batchSize) && batchSize > 0
+ private void UpdateBatchSizeIfValid(string key, string value) =>
BatchSize = !string.IsNullOrEmpty(value) && long.TryParse(value, out long
batchSize) && batchSize > 0
? batchSize
- : throw new ArgumentException($"The value '{value}' for option
'{key}' is invalid. Must be a numeric value greater than zero.", nameof(value));
+ : throw new ArgumentOutOfRangeException(key, value, $"The value
'{value}' for option '{key}' is invalid. Must be a numeric value greater than
zero.");
public override void Dispose()
{
diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs
b/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs
index 6365acd50..36a46a55a 100644
--- a/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs
+++ b/csharp/test/Apache.Arrow.Adbc.Tests/TestConfiguration.cs
@@ -15,6 +15,7 @@
* limitations under the License.
*/
+using System;
using System.Text.Json.Serialization;
namespace Apache.Arrow.Adbc.Tests
@@ -22,7 +23,7 @@ namespace Apache.Arrow.Adbc.Tests
/// <summary>
/// Base test configuration values.
/// </summary>
- public abstract class TestConfiguration
+ public abstract class TestConfiguration : ICloneable
{
/// <summary>
/// The query to run.
@@ -41,6 +42,8 @@ namespace Apache.Arrow.Adbc.Tests
/// </summary>
[JsonPropertyName("metadata")]
public TestMetadata Metadata { get; set; } = new TestMetadata();
+
+ public virtual object Clone() => MemberwiseClone();
}
/// <summary>
diff --git a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
index 056d3890b..2a46300e8 100644
--- a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
+++ b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
@@ -45,5 +45,11 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache
[JsonPropertyName("uri"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
public string Uri { get; set; } = string.Empty;
+ [JsonPropertyName("batch_size"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
+ public string BatchSize { get; set; } = string.Empty;
+
+ [JsonPropertyName("polltime_milliseconds"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
+ public string PollTimeMilliseconds { get; set; } = string.Empty;
+
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
index 7812aa363..336ae9677 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
@@ -102,6 +102,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
parameters.Add(SparkParameters.TLSOptions,
testConfiguration.TlsOptions!);
}
+ if (!string.IsNullOrEmpty(testConfiguration.BatchSize))
+ {
+ parameters.Add(HiveServer2Statement.Options.BatchSize,
testConfiguration.BatchSize!);
+ }
+ if (!string.IsNullOrEmpty(testConfiguration.PollTimeMilliseconds))
+ {
+
parameters.Add(HiveServer2Statement.Options.PollTimeMilliseconds,
testConfiguration.PollTimeMilliseconds!);
+ }
return parameters;
}
diff --git a/csharp/test/Drivers/Apache/Spark/StatementTests.cs
b/csharp/test/Drivers/Apache/Spark/StatementTests.cs
index 783d9fd33..08a738c76 100644
--- a/csharp/test/Drivers/Apache/Spark/StatementTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/StatementTests.cs
@@ -55,10 +55,17 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[InlineData("2147483647")]
public void CanSetOptionPollTime(string value, bool throws = false)
{
+ var testConfiguration = TestConfiguration.Clone() as
SparkTestConfiguration;
+ testConfiguration!.PollTimeMilliseconds = value;
+ if (throws)
+ {
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
NewConnection(testConfiguration).CreateStatement());
+ }
+
AdbcStatement statement = NewConnection().CreateStatement();
if (throws)
{
- Assert.Throws<ArgumentException>(() =>
statement.SetOption(SparkStatement.Options.PollTimeMilliseconds, value));
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
statement.SetOption(SparkStatement.Options.PollTimeMilliseconds, value));
}
else
{
@@ -73,16 +80,25 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[InlineData("-1", true)]
[InlineData("one", true)]
[InlineData("-2147483648", true)]
- [InlineData("2147483648", true)]
+ [InlineData("2147483648", false)]
+ [InlineData("9223372036854775807", false)]
+ [InlineData("9223372036854775808", true)]
[InlineData("0", true)]
[InlineData("1")]
[InlineData("2147483647")]
public void CanSetOptionBatchSize(string value, bool throws = false)
{
+ var testConfiguration = TestConfiguration.Clone() as
SparkTestConfiguration;
+ testConfiguration!.BatchSize = value;
+ if (throws)
+ {
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
NewConnection(testConfiguration).CreateStatement());
+ }
+
AdbcStatement statement = NewConnection().CreateStatement();
if (throws)
{
- Assert.Throws<ArgumentException>(() =>
statement.SetOption(SparkStatement.Options.BatchSize, value));
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
statement!.SetOption(SparkStatement.Options.BatchSize, value));
}
else
{