yihua commented on code in PR #8441:
URL: https://github.com/apache/hudi/pull/8441#discussion_r1164508712


##########
hudi-aws/pom.xml:
##########
@@ -77,9 +77,8 @@
         <dependency>
             <groupId>com.amazonaws</groupId>
             <artifactId>dynamodb-lock-client</artifactId>
-            <version>${dynamodb.lockclient.version}</version>
+            <version>1.2.0</version>

Review Comment:
   nit: could you keep the version variable?



##########
hudi-aws/src/main/java/org/apache/hudi/aws/credentials/HoodieAWSCredentialsProviderFactory.java:
##########
@@ -30,16 +30,23 @@
  * Factory class for Hoodie AWSCredentialsProvider.
  */
 public class HoodieAWSCredentialsProviderFactory {
-  public static AWSCredentialsProvider getAwsCredentialsProvider(Properties 
props) {
+  public static AwsCredentialsProvider getAwsCredentialsProvider(Properties 
props) {
     return getAwsCredentialsProviderChain(props);
   }
 
-  private static AWSCredentialsProvider 
getAwsCredentialsProviderChain(Properties props) {
-    List<AWSCredentialsProvider> providers = new ArrayList<>();
-    providers.add(new HoodieConfigAWSCredentialsProvider(props));
-    providers.add(new DefaultAWSCredentialsProviderChain());
-    AWSCredentialsProviderChain providerChain = new 
AWSCredentialsProviderChain(providers);
-    providerChain.setReuseLastProvider(true);
+  private static AwsCredentialsProvider 
getAwsCredentialsProviderChain(Properties props) {
+    List<AwsCredentialsProvider> providers = new ArrayList<>();
+    HoodieConfigAWSCredentialsProvider hoodieConfigAWSCredentialsProvider = 
new HoodieConfigAWSCredentialsProvider(props);
+    if (hoodieConfigAWSCredentialsProvider.resolveCredentials() != null) {
+      providers.add(hoodieConfigAWSCredentialsProvider);
+    }
+    providers.add(DefaultCredentialsProvider.builder()

Review Comment:
   Does this provide default chain as before (`new 
DefaultAWSCredentialsProviderChain()`) or only a single credentials provider?



##########
hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java:
##########
@@ -246,22 +248,22 @@ public void updateTableSchema(String tableName, 
MessageType newSchema) {
       Table table = getTable(awsGlue, databaseName, tableName);
       Map<String, String> newSchemaMap = parquetSchemaToMapSchema(newSchema, 
config.getBoolean(HIVE_SUPPORT_TIMESTAMP_TYPE), false);
       List<Column> newColumns = getColumnsFromSchema(newSchemaMap);
-      StorageDescriptor sd = table.getStorageDescriptor();
-      sd.setColumns(newColumns);
-
-      final Date now = new Date();
-      TableInput updatedTableInput = new TableInput()
-          .withName(tableName)
-          .withTableType(table.getTableType())
-          .withParameters(table.getParameters())
-          .withPartitionKeys(table.getPartitionKeys())
-          .withStorageDescriptor(sd)
-          .withLastAccessTime(now)
-          .withLastAnalyzedTime(now);
-
-      UpdateTableRequest request = new UpdateTableRequest()
-          .withDatabaseName(databaseName)
-          .withTableInput(updatedTableInput);
+      StorageDescriptor sd = table.storageDescriptor();
+      StorageDescriptor partitionSD = sd.copy(copySd -> 
copySd.columns(newColumns));
+      final Instant now = Instant.now();
+      TableInput updatedTableInput = TableInput.builder()
+          .tableType(table.tableType())

Review Comment:
   table name is missed?



##########
hudi-aws/src/main/java/org/apache/hudi/aws/credentials/HoodieAWSCredentialsProviderFactory.java:
##########
@@ -30,16 +30,23 @@
  * Factory class for Hoodie AWSCredentialsProvider.
  */
 public class HoodieAWSCredentialsProviderFactory {
-  public static AWSCredentialsProvider getAwsCredentialsProvider(Properties 
props) {
+  public static AwsCredentialsProvider getAwsCredentialsProvider(Properties 
props) {
     return getAwsCredentialsProviderChain(props);
   }
 
-  private static AWSCredentialsProvider 
getAwsCredentialsProviderChain(Properties props) {
-    List<AWSCredentialsProvider> providers = new ArrayList<>();
-    providers.add(new HoodieConfigAWSCredentialsProvider(props));
-    providers.add(new DefaultAWSCredentialsProviderChain());
-    AWSCredentialsProviderChain providerChain = new 
AWSCredentialsProviderChain(providers);
-    providerChain.setReuseLastProvider(true);
+  private static AwsCredentialsProvider 
getAwsCredentialsProviderChain(Properties props) {
+    List<AwsCredentialsProvider> providers = new ArrayList<>();
+    HoodieConfigAWSCredentialsProvider hoodieConfigAWSCredentialsProvider = 
new HoodieConfigAWSCredentialsProvider(props);
+    if (hoodieConfigAWSCredentialsProvider.resolveCredentials() != null) {
+      providers.add(hoodieConfigAWSCredentialsProvider);

Review Comment:
   Previously there is no such check so wondering if the if check is needed.  
Could it just use default AWS credentials if `awsCredentials` is null?



##########
hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java:
##########
@@ -246,22 +248,22 @@ public void updateTableSchema(String tableName, 
MessageType newSchema) {
       Table table = getTable(awsGlue, databaseName, tableName);
       Map<String, String> newSchemaMap = parquetSchemaToMapSchema(newSchema, 
config.getBoolean(HIVE_SUPPORT_TIMESTAMP_TYPE), false);
       List<Column> newColumns = getColumnsFromSchema(newSchemaMap);
-      StorageDescriptor sd = table.getStorageDescriptor();
-      sd.setColumns(newColumns);
-
-      final Date now = new Date();
-      TableInput updatedTableInput = new TableInput()
-          .withName(tableName)
-          .withTableType(table.getTableType())
-          .withParameters(table.getParameters())
-          .withPartitionKeys(table.getPartitionKeys())
-          .withStorageDescriptor(sd)
-          .withLastAccessTime(now)
-          .withLastAnalyzedTime(now);
-
-      UpdateTableRequest request = new UpdateTableRequest()
-          .withDatabaseName(databaseName)
-          .withTableInput(updatedTableInput);
+      StorageDescriptor sd = table.storageDescriptor();
+      StorageDescriptor partitionSD = sd.copy(copySd -> 
copySd.columns(newColumns));

Review Comment:
   Here it makes a copy instead of in-place change.  Any reason of doing this?



##########
hudi-aws/src/main/java/org/apache/hudi/aws/cloudwatch/CloudWatchReporter.java:
##########
@@ -169,9 +168,9 @@ protected CloudWatchReporter(MetricRegistry registry,
     this.maxDatumsPerRequest = maxDatumsPerRequest;
   }
 
-  private static AmazonCloudWatchAsync getAmazonCloudWatchClient(Properties 
props) {
-    return AmazonCloudWatchAsyncClientBuilder.standard()
-        
.withCredentials(HoodieAWSCredentialsProviderFactory.getAwsCredentialsProvider(props))
+  private static CloudWatchAsyncClient getAmazonCloudWatchClient(Properties 
props) {
+    return CloudWatchAsyncClient.builder()

Review Comment:
   What does `.standard()` do before?  Is it by default in the builder now?



##########
hudi-aws/src/main/java/org/apache/hudi/aws/utils/DynamoTableUtils.java:
##########
@@ -0,0 +1,265 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at
+ *
+ *  http://aws.amazon.com/apache2.0
+ *
+ * or in the "license" file accompanying this file. This file is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+package org.apache.hudi.aws.utils;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.core.exception.SdkClientException;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.ResourceInUseException;
+import 
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.TableDescription;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+/**
+ * Reused code from 
https://github.com/aws/aws-sdk-java-v2/blob/master/services/dynamodb/src/test/java/utils/test/util/TableUtils.java
+ *
+ * Utility methods for working with DynamoDB tables.
+ *
+ * <pre class="brush: java">
+ * // ... create DynamoDB table ...
+ * try {
+ *     waitUntilActive(dynamoDB, myTableName());
+ * } catch (SdkClientException e) {
+ *     // table didn't become active
+ * }
+ * // ... start making calls to table ...
+ * </pre>
+ */
+public class DynamoTableUtils {

Review Comment:
   Why do we have to copy the code from the library?



##########
hudi-utilities/pom.xml:
##########
@@ -456,6 +468,14 @@
       <scope>test</scope>
     </dependency>
 
+    <!-- AWS Services -->
+    <!-- 
https://mvnrepository.com/artifact/software.amazon.awssdk/aws-java-sdk-sqs -->
+    <dependency>
+      <groupId>software.amazon.awssdk</groupId>
+      <artifactId>sqs</artifactId>
+      <version>${aws.sdk.version}</version>
+    </dependency>
+    

Review Comment:
   Should this be in `hudi-aws` module?  Why is it required now?



##########
hudi-utilities/pom.xml:
##########
@@ -136,6 +136,18 @@
       <artifactId>hudi-aws</artifactId>
       <version>${project.version}</version>
     </dependency>
+
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <version>${aws.sdk.httpclient.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpcore</artifactId>
+      <version>${aws.sdk.httpcore.version}</version>
+    </dependency>
+

Review Comment:
   `hudi-aws` module already adds this so there is no need to add both 
dependencies again?



##########
packaging/hudi-utilities-bundle/pom.xml:
##########
@@ -153,6 +153,10 @@
                   <include>commons-codec:commons-codec</include>
                   <include>commons-io:commons-io</include>
                   <include>org.openjdk.jol:jol-core</include>
+                  <include>org.apache.hudi:hudi-aws</include>
+                  <include>software.amazon.awssdk:*</include>
+                  <include>org.apache.httpcomponents:httpclient</include>
+                  <include>org.apache.httpcomponents:httpcore</include>

Review Comment:
   We should avoid this.  If AWS ecosystem is used, `hudi-aws-bundle` should be 
used with `hudi-utilities-bundle`: 
https://hudi.apache.org/releases/release-0.12.2#bundle-updates.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to