CurtHagenlocher commented on code in PR #2655:
URL: https://github.com/apache/arrow-adbc/pull/2655#discussion_r2054534280
##########
csharp/src/Drivers/BigQuery/BigQueryConnection.cs:
##########
@@ -58,14 +57,44 @@ public class BigQueryConnection : AdbcConnection
public BigQueryConnection(IReadOnlyDictionary<string, string>
properties)
{
- this.properties = properties;
+ if (properties == null)
+ {
+ this.properties = new Dictionary<string, string>();
+ }
+ else
+ {
+ this.properties = properties.ToDictionary(k => k.Key, v =>
v.Value);
+ }
// add the default value for now and set to true until C# has a
BigDecimal
- Dictionary<string, string> modifiedProperties =
this.properties.ToDictionary(k => k.Key, v => v.Value);
- modifiedProperties[BigQueryParameters.LargeDecimalsAsString] =
BigQueryConstants.TreatLargeDecimalAsString;
- this.properties = new ReadOnlyDictionary<string,
string>(modifiedProperties);
+ this.properties[BigQueryParameters.LargeDecimalsAsString] =
BigQueryConstants.TreatLargeDecimalAsString;
+ this.httpClient = new HttpClient();
+
+ if
(this.properties.TryGetValue(BigQueryParameters.MaximumRetryAttempts, out
string? sRetryAttempts) &&
+ int.TryParse(sRetryAttempts, out int retries) &&
+ retries >= 0)
+ {
+ MaxRetryAttempts = retries;
+ }
+
+ if (this.properties.TryGetValue(BigQueryParameters.RetryDelayMs,
out string? sRetryDelay) &&
+ int.TryParse(sRetryDelay, out int delay) &&
+ delay >= 0)
+ {
+ RetryDelayMs = delay;
+ }
}
+ public Func<Task>? UpdateToken { get; set; }
Review Comment:
Should this get a doc comment?
##########
csharp/src/Drivers/BigQuery/BigQueryConnection.cs:
##########
@@ -337,33 +419,49 @@ private IArrowArray[] GetCatalogs(
StringArray.Builder catalogNameBuilder = new StringArray.Builder();
List<IArrowArray?> catalogDbSchemasValues = new
List<IArrowArray?>();
string catalogRegexp = PatternToRegEx(catalogPattern);
- PagedEnumerable<ProjectList, CloudProject>? catalogs =
this.client?.ListProjects();
+ PagedEnumerable<ProjectList, CloudProject>? catalogs;
+ List<string> projectIds = new List<string>();
- if (catalogs != null)
+ try
{
- List<string> projectIds = catalogs.Select(x =>
x.ProjectId).ToList();
+ Func<Task<PagedEnumerable<ProjectList, CloudProject>?>> func =
() => Task.Run(() =>
+ {
+ // stick with this call because PagedAsyncEnumerable has
different behaviors for selecting items
+ return Client?.ListProjects();
+ });
- if (this.includePublicProjectIds &&
!projectIds.Contains(publicProjectId))
- projectIds.Add(publicProjectId);
+ catalogs =
ExecuteWithRetriesAsync<PagedEnumerable<ProjectList,
CloudProject>?>(func).GetAwaiter().GetResult();
+
+ if (catalogs != null)
+ {
+ projectIds = catalogs.Select(x => x.ProjectId).ToList();
+ }
+ }
+ catch
+ {
+ // TODO: Logging
+ }
Review Comment:
We weren't swallowing exceptions here before; why are we doing it now?
Consider adding a comment with the justification.
##########
csharp/src/Drivers/BigQuery/BigQueryConnection.cs:
##########
@@ -434,7 +538,7 @@ private StructArray GetDbSchemas(
nullBitmapBuffer.Build());
}
- private StructArray GetTableSchemas(
+ StructArray GetTableSchemas(
Review Comment:
We should have a documented "preferred coding conventions". Much of the
other C# code in this overall repo is explicitly using "private", and getting
rid of it here has added a bunch of unrelated churn to this PR.
##########
csharp/src/Drivers/BigQuery/BigQueryConnection.cs:
##########
@@ -36,17 +37,15 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
/// <summary>
/// BigQuery-specific implementation of <see cref="AdbcConnection"/>
/// </summary>
- public class BigQueryConnection : AdbcConnection
+ public class BigQueryConnection : AdbcConnection, ITokenProtectedResource
Review Comment:
I can't say I'm super happy about this, but it's not like it wasn't already
`public` so we can defer a change until perhaps there's better platform support
for the scenario.
##########
csharp/src/Drivers/BigQuery/BigQueryStatement.cs:
##########
@@ -127,51 +198,57 @@ public override QueryResult ExecuteQuery()
}
}
}
+
ReadSession rs = new ReadSession { Table = table, DataFormat =
DataFormat.Arrow };
- ReadSession rrs = readClient.CreateReadSession("projects/" +
results.TableReference.ProjectId, rs, maxStreamCount);
+ ReadSession rrs =
clientMgr.ReadClient.CreateReadSession("projects/" +
results.TableReference.ProjectId, rs, maxStreamCount);
Review Comment:
Should this have a retry? Given that there's a `CreateReadSessionAsync` I
imagine it must be making a network request.
##########
csharp/src/Drivers/BigQuery/RetryManager.cs:
##########
@@ -0,0 +1,90 @@
+
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Threading.Tasks;
+
+namespace Apache.Arrow.Adbc.Drivers.BigQuery
+{
+ /// <summary>
+ /// Class that will retry calling a method with an exponential backoff.
+ /// </summary>
+ class RetryManager
+ {
+ public static async Task<T> ExecuteWithRetriesAsync<T>(
+ Func<Task<T>> action,
+ int maxRetries = 5,
+ int initialDelayMilliseconds = 200)
+ {
+ return await ExecuteWithRetriesAsync<T>(null, action, maxRetries,
initialDelayMilliseconds);
+ }
+
+ public static async Task<T> ExecuteWithRetriesAsync<T>(
+ ITokenProtectedResource? tokenProtectedResource,
+ Func<Task<T>> action,
+ int maxRetries = 5,
+ int initialDelayMilliseconds = 200)
+ {
+ if (action == null)
+ {
+ throw new AdbcException("There is no method to retry",
AdbcStatusCode.InvalidArgument);
+ }
+
+ int retryCount = 0;
+ int delay = initialDelayMilliseconds;
+
+ while (retryCount < maxRetries)
+ {
+ try
+ {
+ T result = await action();
+ return result;
+ }
+ catch (Exception ex)
+ {
+ retryCount++;
+ if (retryCount >= maxRetries)
+ {
+ if ((tokenProtectedResource?.UpdateToken != null))
+ {
+ if
(tokenProtectedResource?.TokenRequiresUpdate(ex) == true)
+ {
+ throw new AdbcException($"Cannot update access
token after {maxRetries} tries", AdbcStatusCode.Unauthenticated, ex);
+ }
+ }
+
+ throw new AdbcException($"Cannot execute
{action.Method.Name} after {maxRetries} tries", AdbcStatusCode.UnknownError,
ex);
+ }
+
+ if ((tokenProtectedResource?.UpdateToken != null))
+ {
+ if (tokenProtectedResource.TokenRequiresUpdate(ex) ==
true)
+ {
+ await tokenProtectedResource.UpdateToken();
+ }
+ }
+
+ await Task.Delay(delay);
+ delay *= 2;
Review Comment:
I think this should stop doubling at some point. On the tenth iteration,
we're waiting nearly 3.5 minutes. If it finishes around thirty seconds into the
eleventh wait, then we still have to wait almost three more minutes before
checking again.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]