[GitHub] [beam] reuvenlax commented on a diff in pull request #24145: Handle updates to table schema when using Storage API writes.

GitBox Wed, 11 Jan 2023 09:56:21 -0800


reuvenlax commented on code in PR #24145:
URL: https://github.com/apache/beam/pull/24145#discussion_r1067295939



##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWritesShardedRecords.java:
##########
@@ -385,34 +399,57 @@ public void process(
 
       Supplier<String> getOrCreateStream =
           () -> getOrCreateStream(tableId, streamName, streamOffset, 
idleTimer, datasetService);
-      Function<Boolean, AppendClientInfo> getAppendClientInfo =
-          createAppendClient -> {
-            try {
-              @Nullable
-              TableSchema tableSchema =
-                  messageConverters
-                      .get(element.getKey().getKey(), dynamicDestinations, 
datasetService)
-                      .getTableSchema();
-              AppendClientInfo info =
-                  new AppendClientInfo(
-                      tableSchema,
-                      // Make sure that the client is always closed in a 
different thread to avoid
-                      // blocking.
-                      client -> runAsyncIgnoreFailure(closeWriterExecutor, 
client::close));
-              if (createAppendClient) {
-                info = info.createAppendClient(datasetService, 
getOrCreateStream, false);
-              }
-              return info;
-            } catch (Exception e) {
-              throw new RuntimeException(e);
-            }
-          };
-
       AtomicReference<AppendClientInfo> appendClientInfo =
           new AtomicReference<>(
-              APPEND_CLIENTS.get(element.getKey(), () -> 
getAppendClientInfo.apply(true)));
+              APPEND_CLIENTS.get(
+                  element.getKey(),
+                  () -> {
+                    @Nullable TableSchema tableSchema;
+                    if (autoUpdateSchema && updatedSchema.read() != null) {
+                      // We've seen an updated schema, so we use that.
+                      tableSchema = updatedSchema.read();
+                    } else {
+                      // Start off with the base schema. As we get notified of 
schema updates, we
+                      // will
+                      // update the
+                      // descriptor.
+                      tableSchema =
+                          messageConverters
+                              .get(element.getKey().getKey(), 
dynamicDestinations, datasetService)
+                              .getTableSchema();
+                    }
+                    return AppendClientInfo.of(
+                            tableSchema,
+                            // Make sure that the client is always closed in a 
different thread to
+                            // avoid
+                            // blocking.
+                            client -> 
runAsyncIgnoreFailure(closeWriterExecutor, client::close))
+                        .withAppendClient(datasetService, getOrCreateStream, 
false);
+                  }));
+      if (autoUpdateSchema && updatedSchema.read() != null) {
+        if (appendClientInfo.get().hasSchemaChanged(updatedSchema.read())) {
+          appendClientInfo.set(
+              AppendClientInfo.of(
+                  updatedSchema.read(), 
appendClientInfo.get().getCloseAppendClient()));
+          // TODO: invalidate?

Review Comment:
   added invalidate



##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWritesShardedRecords.java:
##########
@@ -640,6 +678,18 @@ public void process(
         }
         appendSplitDistribution.update(numAppends);
 
+        if (updatedSchemaReturned.get() != null) {
+          // Update the table schema and clear the append client.
+          if 
(appendClientInfo.get().hasSchemaChanged(updatedSchemaReturned.get())) {
+            appendClientInfo.set(
+                AppendClientInfo.of(
+                    updatedSchemaReturned.get(), 
appendClientInfo.get().getCloseAppendClient()));
+            // TODO: invalidate?

Review Comment:
   added invalidate



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [beam] reuvenlax commented on a diff in pull request #24145: Handle updates to table schema when using Storage API writes.

Reply via email to