This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 6027c11be feat(csharp/src/Drivers/BigQuery): support evaluation kind
and statement type setting (#2698)
6027c11be is described below
commit 6027c11bec69671aa509006c94f3667c9e525187
Author: qifanzhang-ms <[email protected]>
AuthorDate: Sat Apr 19 03:26:08 2025 +0800
feat(csharp/src/Drivers/BigQuery): support evaluation kind and statement
type setting (#2698)
Support evaluation kind and statement type setting for GBQ driver, and
improve code at BigQueryStatement.cs.
---
csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 5 +-
csharp/src/Drivers/BigQuery/BigQueryParameters.cs | 3 ++
csharp/src/Drivers/BigQuery/BigQueryStatement.cs | 61 ++++++++++++++--------
csharp/src/Drivers/BigQuery/readme.md | 9 ++++
.../Drivers/BigQuery/BigQueryTestConfiguration.cs | 9 ++++
.../test/Drivers/BigQuery/BigQueryTestingUtils.cs | 15 ++++++
csharp/test/Drivers/BigQuery/DriverTests.cs | 23 ++++++++
csharp/test/Drivers/BigQuery/readme.md | 3 ++
8 files changed, 104 insertions(+), 24 deletions(-)
diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs
b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs
index 447f288f3..0e84e6959 100644
--- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs
+++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs
@@ -1058,7 +1058,10 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
BigQueryParameters.LargeDecimalsAsString,
BigQueryParameters.LargeResultsDestinationTable,
BigQueryParameters.GetQueryResultsOptionsTimeout,
- BigQueryParameters.MaxFetchConcurrency
+ BigQueryParameters.MaxFetchConcurrency,
+ BigQueryParameters.StatementType,
+ BigQueryParameters.StatementIndex,
+ BigQueryParameters.EvaluationKind
};
foreach (string key in statementOptions)
diff --git a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs
b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs
index abfb7c102..02873c2f1 100644
--- a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs
+++ b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs
@@ -39,6 +39,9 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
public const string GetQueryResultsOptionsTimeout =
"adbc.bigquery.get_query_results_options.timeout";
public const string MaxFetchConcurrency =
"adbc.bigquery.max_fetch_concurrency";
public const string IncludePublicProjectId =
"adbc.bigquery.include_public_project_id";
+ public const string StatementType =
"adbc.bigquery.multiple_statement.statement_type";
+ public const string StatementIndex =
"adbc.bigquery.multiple_statement.statement_index";
+ public const string EvaluationKind =
"adbc.bigquery.multiple_statement.evaluation_kind";
}
/// <summary>
diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
index ddf34104f..578f2eef7 100644
--- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
+++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
@@ -51,52 +51,71 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
public override QueryResult ExecuteQuery()
{
+ // Create job
QueryOptions queryOptions = ValidateOptions();
-
BigQueryJob job = this.client.CreateQueryJob(SqlQuery, null,
queryOptions);
+ // Get results
GetQueryResultsOptions getQueryResultsOptions = new
GetQueryResultsOptions();
-
if
(this.Options?.TryGetValue(BigQueryParameters.GetQueryResultsOptionsTimeout,
out string? timeoutSeconds) == true &&
int.TryParse(timeoutSeconds, out int seconds) &&
seconds >= 0)
{
getQueryResultsOptions.Timeout = TimeSpan.FromSeconds(seconds);
}
-
BigQueryResults results =
job.GetQueryResults(getQueryResultsOptions);
- BigQueryReadClientBuilder readClientBuilder = new
BigQueryReadClientBuilder();
- readClientBuilder.Credential = this.credential;
- BigQueryReadClient readClient = readClientBuilder.Build();
-
+ // For multi-statement queries, the results.TableReference is null
if (results.TableReference == null)
{
- // To get the results of all statements in a multi-statement
query, enumerate the child jobs and call jobs.getQueryResults on each of them.
- // Related public docs:
https://cloud.google.com/bigquery/docs/multi-statement-queries#get_all_executed_statements
+ string statementType = string.Empty;
+ if
(this.Options?.TryGetValue(BigQueryParameters.StatementType, out string?
statementTypeString) == true)
+ {
+ statementType = statementTypeString;
+ }
+ int statementIndex = 1;
+ if
(this.Options?.TryGetValue(BigQueryParameters.StatementIndex, out string?
statementIndexString) == true &&
+ int.TryParse(statementIndexString, out int
statementIndexInt) &&
+ statementIndexInt > 0)
+ {
+ statementIndex = statementIndexInt;
+ }
+ string evaluationKind = string.Empty;
+ if
(this.Options?.TryGetValue(BigQueryParameters.EvaluationKind, out string?
evaluationKindString) == true)
+ {
+ evaluationKind = evaluationKindString;
+ }
+
+ // To get the results of all statements in a multi-statement
query, enumerate the child jobs. Related public docs:
https://cloud.google.com/bigquery/docs/multi-statement-queries#get_all_executed_statements.
+ // Can filter by StatementType and EvaluationKind. Related
public docs:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobstatistics2,
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#evaluationkind
ListJobsOptions listJobsOptions = new ListJobsOptions();
listJobsOptions.ParentJobId = results.JobReference.JobId;
- PagedEnumerable<JobList, BigQueryJob> joblist =
client.ListJobs(listJobsOptions);
- BigQueryJob firstQueryJob = new BigQueryJob(client,
job.Resource);
- foreach (BigQueryJob childJob in joblist)
+ var joblist = client.ListJobs(listJobsOptions)
+ .Select(job => client.GetJob(job.Reference))
+ .Where(job => string.IsNullOrEmpty(evaluationKind) ||
job.Statistics.ScriptStatistics.EvaluationKind.Equals(evaluationKind,
StringComparison.OrdinalIgnoreCase))
+ .Where(job => string.IsNullOrEmpty(statementType) ||
job.Statistics.Query.StatementType.Equals(statementType,StringComparison.OrdinalIgnoreCase))
+ .OrderBy(job => job.Resource.Statistics.CreationTime)
+ .ToList();
+
+ if (joblist.Count > 0)
{
- var tempJob = client.GetJob(childJob.Reference);
- var query = tempJob.Resource?.Configuration?.Query;
- if (query != null && query.DestinationTable != null &&
query.DestinationTable.ProjectId != null && query.DestinationTable.DatasetId !=
null && query.DestinationTable.TableId != null)
+ if (statementIndex < 1 || statementIndex > joblist.Count)
{
- firstQueryJob = tempJob;
+ throw new ArgumentOutOfRangeException($"The specified
index {statementIndex} is out of range. There are {joblist.Count} jobs
available.");
}
+ results = joblist[statementIndex -
1].GetQueryResults(getQueryResultsOptions);
}
- results = firstQueryJob.GetQueryResults();
}
-
if (results.TableReference == null)
{
throw new AdbcException("There is no query statement");
}
+ // BigQuery Read Client for streaming
+ BigQueryReadClientBuilder readClientBuilder = new
BigQueryReadClientBuilder();
+ readClientBuilder.Credential = this.credential;
+ BigQueryReadClient readClient = readClientBuilder.Build();
string table =
$"projects/{results.TableReference.ProjectId}/datasets/{results.TableReference.DatasetId}/tables/{results.TableReference.TableId}";
-
int maxStreamCount = 1;
if
(this.Options?.TryGetValue(BigQueryParameters.MaxFetchConcurrency, out string?
maxStreamCountString) == true)
{
@@ -110,16 +129,12 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
}
ReadSession rs = new ReadSession { Table = table, DataFormat =
DataFormat.Arrow };
ReadSession rrs = readClient.CreateReadSession("projects/" +
results.TableReference.ProjectId, rs, maxStreamCount);
-
long totalRows = results.TotalRows == null ? -1L :
(long)results.TotalRows.Value;
-
var readers = rrs.Streams
.Select(s => ReadChunk(readClient, s.Name))
.Where(chunk => chunk != null)
.Cast<IArrowReader>();
-
IArrowArrayStream stream = new
MultiArrowReader(TranslateSchema(results.Schema), readers);
-
return new QueryResult(totalRows, stream);
}
diff --git a/csharp/src/Drivers/BigQuery/readme.md
b/csharp/src/Drivers/BigQuery/readme.md
index 823a066fa..6b733ca2f 100644
--- a/csharp/src/Drivers/BigQuery/readme.md
+++ b/csharp/src/Drivers/BigQuery/readme.md
@@ -63,6 +63,15 @@
https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/G
**adbc.bigquery.max_fetch_concurrency**<br>
Optional. Sets the
[maxStreamCount](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.Storage.V1/latest/Google.Cloud.BigQuery.Storage.V1.BigQueryReadClient#Google_Cloud_BigQuery_Storage_V1_BigQueryReadClient_CreateReadSession_System_String_Google_Cloud_BigQuery_Storage_V1_ReadSession_System_Int32_Google_Api_Gax_Grpc_CallSettings_)
for the CreateReadSession method. If not set, defaults to 1.
+**adbc.bigquery.multiple_statement.statement_type**<br>
+ Optional. When executing multiple statements, limit
the type of statement returned. If not set, all types of statements are
returned.
+
+**adbc.bigquery.multiple_statement.statement_index**<br>
+ Optional. When executing multiple statements, specify
the result of the statement to be returned (Minimum value is 1). If not set,
the result of the first statement is returned.
+
+**adbc.bigquery.multiple_statement.evaluation_kind**<br>
+ Optional. When executing multiple statements, limit
the evaluation kind returned. If not set, all evaluation kinds are returned.
+
**adbc.bigquery.include_constraints_getobjects**<br>
Optional. Some callers do not need the constraint
details when they get the table information and can improve the speed of
obtaining the results. Setting this value to `"false"` will not include the
constraint details. The default value is `"true"`.
diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
index 9be82bd5c..8b36e1db8 100644
--- a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
+++ b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
@@ -97,6 +97,15 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
[JsonPropertyName("maxStreamCount")]
public int? MaxStreamCount { get; set; }
+ [JsonPropertyName("statementType")]
+ public string StatementType { get; set; } = string.Empty;
+
+ [JsonPropertyName("statementIndex")]
+ public int? StatementIndex { get; set; }
+
+ [JsonPropertyName("evaluationKind")]
+ public string EvaluationKind { get; set; } = string.Empty;
+
/// <summary>
/// How structs should be handled by the ADO.NET client for this
environment.
/// </summary>
diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs
b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs
index 5768b7e0c..07e1876b2 100644
--- a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs
+++ b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs
@@ -115,6 +115,21 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
parameters.Add(BigQueryParameters.MaxFetchConcurrency,
testEnvironment.MaxStreamCount.Value.ToString());
}
+ if (!string.IsNullOrEmpty(testEnvironment.StatementType))
+ {
+ parameters.Add(BigQueryParameters.StatementType,
testEnvironment.StatementType);
+ }
+
+ if (testEnvironment.StatementIndex.HasValue)
+ {
+ parameters.Add(BigQueryParameters.StatementIndex,
testEnvironment.StatementIndex.Value.ToString());
+ }
+
+ if (!string.IsNullOrEmpty(testEnvironment.EvaluationKind))
+ {
+ parameters.Add(BigQueryParameters.EvaluationKind,
testEnvironment.EvaluationKind);
+ }
+
return parameters;
}
diff --git a/csharp/test/Drivers/BigQuery/DriverTests.cs
b/csharp/test/Drivers/BigQuery/DriverTests.cs
index 12f061a6a..15dafc0ec 100644
--- a/csharp/test/Drivers/BigQuery/DriverTests.cs
+++ b/csharp/test/Drivers/BigQuery/DriverTests.cs
@@ -346,5 +346,28 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
}
}
}
+
+ /// <summary>
+ /// Validates if the driver can connect to a live server and
+ /// parse the results of multi-statements.
+ /// </summary>
+ [SkippableFact, Order(9)]
+ public void CanExecuteMultiStatementQuery()
+ {
+ foreach (BigQueryTestEnvironment environment in _environments)
+ {
+ AdbcConnection adbcConnection =
GetAdbcConnection(environment.Name);
+ AdbcStatement statement = adbcConnection.CreateStatement();
+ string query1 = "SELECT * FROM
bigquery-public-data.covid19_ecdc.covid_19_geographic_distribution_worldwide";
+ string query2 = "SELECT " +
+ "CAST(1.7976931348623157e+308 as FLOAT64) as number,
" +
+
"PARSE_NUMERIC(\"9.99999999999999999999999999999999E+28\") as decimal, " +
+
"PARSE_BIGNUMERIC(\"5.7896044618658097711785492504343953926634992332820282019728792003956564819968E+37\")
as big_decimal";
+ string combinedQuery = query1 + ";" + query2 + ";";
+ statement.SqlQuery = combinedQuery;
+ QueryResult queryResult = statement.ExecuteQuery();
+ Tests.DriverTests.CanExecuteQuery(queryResult, 61900,
environment.Name);
+ }
+ }
}
}
diff --git a/csharp/test/Drivers/BigQuery/readme.md
b/csharp/test/Drivers/BigQuery/readme.md
index 5d6db3d16..fdd1e18ac 100644
--- a/csharp/test/Drivers/BigQuery/readme.md
+++ b/csharp/test/Drivers/BigQuery/readme.md
@@ -47,6 +47,9 @@ The following values can be setup in the configuration
- **scopes** - Comma separated list (string) of scopes applied during the
test.
- **queryTimeout** - The timeout (in seconds) for a query. Similar to a
CommandTimeout.
- **maxStreamCount** - The max stream count.
+ - **statementType** - When executing multiple statements, limit the type of
statement returned.
+ - **statementIndex** - When executing multiple statements, specify the
result of the statement to be returned.
+ - **evaluationKind** - When executing multiple statements, limit the
evaluation kind returned.
- **includeTableConstraints** - Whether to include table constraints in the
GetObjects query.
- **largeResultsDestinationTable** - Sets the
[DestinationTable](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/Google.Cloud.BigQuery.V2.QueryOptions#Google_Cloud_BigQuery_V2_QueryOptions_DestinationTable)
value of the QueryOptions if configured. Expects the format to be
`{projectId}.{datasetId}.{tableId}` to set the corresponding values in the
[TableReference](https://github.com/googleapis/google-api-dotnet-client/blob/6c415c73788b848711e47c6dd33c2f93c7
[...]
- **allowLargeResults** - Whether to allow large results .