giovannifumarola commented on a change in pull request #1823:
URL: https://github.com/apache/iceberg/pull/1823#discussion_r530607329
##########
File path: aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
##########
@@ -114,6 +115,33 @@
*/
public static final String S3FILEIO_ACL = "s3fileio.acl";
+ /**
+ * If {@link org.apache.iceberg.aws.glue.GlueCatalog} should use external
lock or not, default to false.
+ * If set to true, it will use DynamoDB to enforce locking during commits.
+ */
+ public static final String GLUE_CATALOG_LOCK_ENABLED =
"gluecatalog.lock.enabled";
+ public static final boolean GLUE_CATALOG_LOCK_ENABLED_DEFAULT = false;
+
+ /**
+ * The DynamoDB table used for locking.
+ * One lock table is designed to be used for only one catalog.
Review comment:
This is not clear. We are using a single table for a single customer?
##########
File path: aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
##########
@@ -114,6 +115,33 @@
*/
public static final String S3FILEIO_ACL = "s3fileio.acl";
+ /**
+ * If {@link org.apache.iceberg.aws.glue.GlueCatalog} should use external
lock or not, default to false.
+ * If set to true, it will use DynamoDB to enforce locking during commits.
+ */
+ public static final String GLUE_CATALOG_LOCK_ENABLED =
"gluecatalog.lock.enabled";
Review comment:
NIT:
this should be
public static final String AWS_GLUE_CATALOG_LOCK_ENABLED = AWS_GLUE_CATALOG
+ "lock.enabled";
public static final String AWS_GLUE_CATALOG = "aws.gluecatalog.";
You can reuse AWS_GLUE_CATALOG for the other properties.
I would also add aws for readability.
##########
File path: aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
##########
@@ -182,6 +218,14 @@ public AwsProperties(Map<String, String> properties) {
this.s3FileIoAcl = ObjectCannedACL.fromValue(aclType);
Preconditions.checkArgument(s3FileIoAcl == null ||
!s3FileIoAcl.equals(ObjectCannedACL.UNKNOWN_TO_SDK_VERSION),
"Cannot support S3 CannedACL " + aclType);
+
+ this.glueCatalogLockEnabled = PropertyUtil.propertyAsBoolean(properties,
+ GLUE_CATALOG_LOCK_ENABLED, GLUE_CATALOG_LOCK_ENABLED_DEFAULT);
+ this.glueCatalogLockTable =
properties.getOrDefault(GLUE_CATALOG_LOCK_TABLE,
GLUE_CATALOG_LOCK_TABLE_DEFAULT);
Review comment:
NIT: You can use PropertyUtil.propertyAs
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DynamoLockManager.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.aws.AwsClientUtil;
+import org.apache.iceberg.aws.AwsProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+class DynamoLockManager implements LockManager {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(DynamoLockManager.class);
+
+ private static final String LOCK_TABLE_COL_TABLE_ID = "tableId";
+ private static final String LOCK_TABLE_COL_EXPIRE_TS_MILLIS =
"expireTimestampMillis";
+
+ private static final List<KeySchemaElement> LOCK_TABLE_SCHEMA =
Lists.newArrayList(
+ KeySchemaElement.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .keyType(KeyType.HASH)
+ .build()
+ );
+
+ private static final List<AttributeDefinition> LOCK_TABLE_COL_DEFINITIONS =
Lists.newArrayList(
+ AttributeDefinition.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .attributeType(ScalarAttributeType.S)
+ .build()
+ );
+
+ private final DynamoDbClient dynamo;
+ private final AwsProperties awsProperties;
+
+ DynamoLockManager(AwsProperties awsProperties) {
+ this(AwsClientUtil.defaultDynamoClient(), awsProperties);
+ }
+
+ DynamoLockManager(DynamoDbClient dynamo, AwsProperties awsProperties) {
+ this.dynamo = dynamo;
+ this.awsProperties = awsProperties;
+ ensureLockTableExists();
+ }
+
+ private void ensureLockTableExists() {
+ ensureTableExists(awsProperties.glueCatalogLockTable(), LOCK_TABLE_SCHEMA,
LOCK_TABLE_COL_DEFINITIONS);
+ }
+
+ private void ensureTableExists(String tableName, List<KeySchemaElement>
schema,
+ List<AttributeDefinition> definitions) {
+ try {
+ dynamo.describeTable(DescribeTableRequest.builder()
+ .tableName(tableName)
+ .build());
+ } catch (ResourceNotFoundException e) {
+ LOG.info("Glue lock DynamoDB table <{}> not found, try to create",
tableName);
+ dynamo.createTable(CreateTableRequest.builder()
+ .tableName(tableName)
+ .keySchema(schema)
+ .attributeDefinitions(definitions)
+ .billingMode(BillingMode.PAY_PER_REQUEST)
+ .build());
+
+ boolean isTableActive = false;
+ while (!isTableActive) {
+ LOG.info("waiting for DynamoDB table <{}> to be active", tableName);
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException ie) {
+ LOG.warn("Glue lock DynamoDB table creation sleep interrupted", e);
+ }
+ DescribeTableResponse describeTableResponse =
dynamo.describeTable(DescribeTableRequest.builder()
+ .tableName(tableName)
+ .build());
+ isTableActive =
describeTableResponse.table().tableStatus().equals(TableStatus.ACTIVE);
+ }
+ }
+ }
+
+
+ @Override
+ public boolean tryLock(String database, String table, long expireMillis) {
+ String tableId = tableId(database, table);
+
+ Map<String, AttributeValue> key = Maps.newHashMap();
+ key.put(LOCK_TABLE_COL_TABLE_ID,
AttributeValue.builder().s(tableId).build());
+ key.put(LOCK_TABLE_COL_EXPIRE_TS_MILLIS, AttributeValue.builder().n(
+ Long.toString(System.currentTimeMillis() + expireMillis)
+ ).build());
+
+ Map<String, AttributeValue> expressionValues = Maps.newHashMap();
+ expressionValues.put(":tid", AttributeValue.builder().s(tableId).build());
+ expressionValues.put(":ts",
AttributeValue.builder().n(Long.toString(System.currentTimeMillis())).build());
+
+ try {
+
+ dynamo.putItem(PutItemRequest.builder()
+ .tableName(awsProperties.glueCatalogLockTable())
+ .item(key)
+ // succeed only if there is no lock, or the lock is there but
already expired
+ .conditionExpression("attribute_not_exists(" +
+ LOCK_TABLE_COL_TABLE_ID + ") OR (" +
+ LOCK_TABLE_COL_TABLE_ID + " = :tid AND " +
+ LOCK_TABLE_COL_EXPIRE_TS_MILLIS + " < :ts)")
+ .expressionAttributeValues(expressionValues)
+ .build());
+ return true;
+ } catch (Exception e) {
+ // most likely it's ConditionalCheckFailedException, but we will catch
any exception
+ LOG.debug("Acquiring lock {}.{} failed", database, table, e);
+ return false;
+ }
+ }
+
+ @Override
+ public void unlock(String database, String table) {
+ String tableId = tableId(database, table);
+ Map<String, AttributeValue> key = Maps.newHashMap();
+ key.put(LOCK_TABLE_COL_TABLE_ID,
AttributeValue.builder().s(tableId).build());
+
+ dynamo.deleteItem(DeleteItemRequest.builder()
Review comment:
add log line.
We should inform if the item does not exist in the table.
##########
File path: aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
##########
@@ -114,6 +115,33 @@
*/
public static final String S3FILEIO_ACL = "s3fileio.acl";
+ /**
+ * If {@link org.apache.iceberg.aws.glue.GlueCatalog} should use external
lock or not, default to false.
+ * If set to true, it will use DynamoDB to enforce locking during commits.
+ */
+ public static final String GLUE_CATALOG_LOCK_ENABLED =
"gluecatalog.lock.enabled";
+ public static final boolean GLUE_CATALOG_LOCK_ENABLED_DEFAULT = false;
+
+ /**
+ * The DynamoDB table used for locking.
+ * One lock table is designed to be used for only one catalog.
+ * It is recommended to use a different table name for each Glue catalog.
+ * If the table does not exist, it will be created at runtime.
+ */
+ public static final String GLUE_CATALOG_LOCK_TABLE =
"gluecatalog.lock.table";
+ public static final String GLUE_CATALOG_LOCK_TABLE_DEFAULT =
"IcebergGlueCatalogLockTable";
+
+ /**
+ * After the given time in milliseconds, the process will give up the
attempt to acquire a lock.
+ */
+ public static final String GLUE_CATALOG_LOCK_WAIT = "gluecatalog.lock.wait";
Review comment:
NIT: Timeout
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DynamoLockManager.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.aws.AwsClientUtil;
+import org.apache.iceberg.aws.AwsProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+class DynamoLockManager implements LockManager {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(DynamoLockManager.class);
+
+ private static final String LOCK_TABLE_COL_TABLE_ID = "tableId";
+ private static final String LOCK_TABLE_COL_EXPIRE_TS_MILLIS =
"expireTimestampMillis";
+
+ private static final List<KeySchemaElement> LOCK_TABLE_SCHEMA =
Lists.newArrayList(
+ KeySchemaElement.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .keyType(KeyType.HASH)
+ .build()
+ );
+
+ private static final List<AttributeDefinition> LOCK_TABLE_COL_DEFINITIONS =
Lists.newArrayList(
+ AttributeDefinition.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .attributeType(ScalarAttributeType.S)
+ .build()
+ );
+
+ private final DynamoDbClient dynamo;
Review comment:
NIT: dynamoDbClient
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DefaultLockManager.java
##########
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+public class DefaultLockManager implements LockManager {
+
Review comment:
NIT: Javadoc.
e.g.
/** default implementation of lock manager... it returns always true etc..
mainly using for testing purposes or when no external locking systems are
needed.
*/
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DynamoLockManager.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.aws.AwsClientUtil;
+import org.apache.iceberg.aws.AwsProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+class DynamoLockManager implements LockManager {
Review comment:
NIT: Javadoc.
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DynamoLockManager.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.aws.AwsClientUtil;
+import org.apache.iceberg.aws.AwsProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+class DynamoLockManager implements LockManager {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(DynamoLockManager.class);
+
+ private static final String LOCK_TABLE_COL_TABLE_ID = "tableId";
+ private static final String LOCK_TABLE_COL_EXPIRE_TS_MILLIS =
"expireTimestampMillis";
+
+ private static final List<KeySchemaElement> LOCK_TABLE_SCHEMA =
Lists.newArrayList(
+ KeySchemaElement.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .keyType(KeyType.HASH)
+ .build()
+ );
+
+ private static final List<AttributeDefinition> LOCK_TABLE_COL_DEFINITIONS =
Lists.newArrayList(
+ AttributeDefinition.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .attributeType(ScalarAttributeType.S)
+ .build()
+ );
+
+ private final DynamoDbClient dynamo;
+ private final AwsProperties awsProperties;
+
+ DynamoLockManager(AwsProperties awsProperties) {
+ this(AwsClientUtil.defaultDynamoClient(), awsProperties);
+ }
+
+ DynamoLockManager(DynamoDbClient dynamo, AwsProperties awsProperties) {
+ this.dynamo = dynamo;
+ this.awsProperties = awsProperties;
+ ensureLockTableExists();
+ }
+
+ private void ensureLockTableExists() {
+ ensureTableExists(awsProperties.glueCatalogLockTable(), LOCK_TABLE_SCHEMA,
LOCK_TABLE_COL_DEFINITIONS);
+ }
+
+ private void ensureTableExists(String tableName, List<KeySchemaElement>
schema,
+ List<AttributeDefinition> definitions) {
+ try {
+ dynamo.describeTable(DescribeTableRequest.builder()
+ .tableName(tableName)
+ .build());
+ } catch (ResourceNotFoundException e) {
+ LOG.info("Glue lock DynamoDB table <{}> not found, try to create",
tableName);
+ dynamo.createTable(CreateTableRequest.builder()
+ .tableName(tableName)
+ .keySchema(schema)
+ .attributeDefinitions(definitions)
+ .billingMode(BillingMode.PAY_PER_REQUEST)
+ .build());
+
+ boolean isTableActive = false;
+ while (!isTableActive) {
+ LOG.info("waiting for DynamoDB table <{}> to be active", tableName);
+ try {
+ Thread.sleep(5000);
Review comment:
add as config? or is it a standard aws timeout?
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DynamoLockManager.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.aws.AwsClientUtil;
+import org.apache.iceberg.aws.AwsProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+class DynamoLockManager implements LockManager {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(DynamoLockManager.class);
+
+ private static final String LOCK_TABLE_COL_TABLE_ID = "tableId";
+ private static final String LOCK_TABLE_COL_EXPIRE_TS_MILLIS =
"expireTimestampMillis";
+
+ private static final List<KeySchemaElement> LOCK_TABLE_SCHEMA =
Lists.newArrayList(
+ KeySchemaElement.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .keyType(KeyType.HASH)
+ .build()
+ );
+
+ private static final List<AttributeDefinition> LOCK_TABLE_COL_DEFINITIONS =
Lists.newArrayList(
+ AttributeDefinition.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .attributeType(ScalarAttributeType.S)
+ .build()
+ );
+
+ private final DynamoDbClient dynamo;
+ private final AwsProperties awsProperties;
+
+ DynamoLockManager(AwsProperties awsProperties) {
+ this(AwsClientUtil.defaultDynamoClient(), awsProperties);
+ }
+
+ DynamoLockManager(DynamoDbClient dynamo, AwsProperties awsProperties) {
+ this.dynamo = dynamo;
+ this.awsProperties = awsProperties;
+ ensureLockTableExists();
+ }
+
+ private void ensureLockTableExists() {
+ ensureTableExists(awsProperties.glueCatalogLockTable(), LOCK_TABLE_SCHEMA,
LOCK_TABLE_COL_DEFINITIONS);
+ }
+
+ private void ensureTableExists(String tableName, List<KeySchemaElement>
schema,
+ List<AttributeDefinition> definitions) {
+ try {
+ dynamo.describeTable(DescribeTableRequest.builder()
+ .tableName(tableName)
+ .build());
+ } catch (ResourceNotFoundException e) {
+ LOG.info("Glue lock DynamoDB table <{}> not found, try to create",
tableName);
+ dynamo.createTable(CreateTableRequest.builder()
+ .tableName(tableName)
+ .keySchema(schema)
+ .attributeDefinitions(definitions)
+ .billingMode(BillingMode.PAY_PER_REQUEST)
Review comment:
why pay per request instead of provisioned?
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/LockManager.java
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+interface LockManager {
+
+ /**
+ * Try to lock a table once
+ * @param database Glue database name
+ * @param table Glue table name
+ * @param expireMillis max duration to hold the lock in milliseconds
+ * @return if lock acquisition succeeded or not
+ */
+ boolean tryLock(String database, String table, long expireMillis);
Review comment:
This code only locks at the table level. I think in future we can add
additional methods here, e.g. based on the implementation we can have lock at
the row level, folder level, etc.
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DynamoLockManager.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.aws.AwsClientUtil;
+import org.apache.iceberg.aws.AwsProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+class DynamoLockManager implements LockManager {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(DynamoLockManager.class);
+
+ private static final String LOCK_TABLE_COL_TABLE_ID = "tableId";
+ private static final String LOCK_TABLE_COL_EXPIRE_TS_MILLIS =
"expireTimestampMillis";
+
+ private static final List<KeySchemaElement> LOCK_TABLE_SCHEMA =
Lists.newArrayList(
+ KeySchemaElement.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .keyType(KeyType.HASH)
+ .build()
+ );
+
+ private static final List<AttributeDefinition> LOCK_TABLE_COL_DEFINITIONS =
Lists.newArrayList(
+ AttributeDefinition.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .attributeType(ScalarAttributeType.S)
+ .build()
+ );
+
+ private final DynamoDbClient dynamo;
+ private final AwsProperties awsProperties;
+
+ DynamoLockManager(AwsProperties awsProperties) {
+ this(AwsClientUtil.defaultDynamoClient(), awsProperties);
+ }
+
+ DynamoLockManager(DynamoDbClient dynamo, AwsProperties awsProperties) {
+ this.dynamo = dynamo;
+ this.awsProperties = awsProperties;
+ ensureLockTableExists();
+ }
+
+ private void ensureLockTableExists() {
+ ensureTableExists(awsProperties.glueCatalogLockTable(), LOCK_TABLE_SCHEMA,
LOCK_TABLE_COL_DEFINITIONS);
+ }
+
+ private void ensureTableExists(String tableName, List<KeySchemaElement>
schema,
+ List<AttributeDefinition> definitions) {
+ try {
+ dynamo.describeTable(DescribeTableRequest.builder()
+ .tableName(tableName)
+ .build());
+ } catch (ResourceNotFoundException e) {
+ LOG.info("Glue lock DynamoDB table <{}> not found, try to create",
tableName);
+ dynamo.createTable(CreateTableRequest.builder()
+ .tableName(tableName)
+ .keySchema(schema)
+ .attributeDefinitions(definitions)
+ .billingMode(BillingMode.PAY_PER_REQUEST)
+ .build());
+
+ boolean isTableActive = false;
+ while (!isTableActive) {
+ LOG.info("waiting for DynamoDB table <{}> to be active", tableName);
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException ie) {
+ LOG.warn("Glue lock DynamoDB table creation sleep interrupted", e);
+ }
+ DescribeTableResponse describeTableResponse =
dynamo.describeTable(DescribeTableRequest.builder()
+ .tableName(tableName)
+ .build());
+ isTableActive =
describeTableResponse.table().tableStatus().equals(TableStatus.ACTIVE);
+ }
+ }
+ }
+
+
+ @Override
+ public boolean tryLock(String database, String table, long expireMillis) {
+ String tableId = tableId(database, table);
+
+ Map<String, AttributeValue> key = Maps.newHashMap();
+ key.put(LOCK_TABLE_COL_TABLE_ID,
AttributeValue.builder().s(tableId).build());
+ key.put(LOCK_TABLE_COL_EXPIRE_TS_MILLIS, AttributeValue.builder().n(
+ Long.toString(System.currentTimeMillis() + expireMillis)
+ ).build());
+
+ Map<String, AttributeValue> expressionValues = Maps.newHashMap();
+ expressionValues.put(":tid", AttributeValue.builder().s(tableId).build());
+ expressionValues.put(":ts",
AttributeValue.builder().n(Long.toString(System.currentTimeMillis())).build());
+
+ try {
+
+ dynamo.putItem(PutItemRequest.builder()
+ .tableName(awsProperties.glueCatalogLockTable())
+ .item(key)
+ // succeed only if there is no lock, or the lock is there but
already expired
+ .conditionExpression("attribute_not_exists(" +
+ LOCK_TABLE_COL_TABLE_ID + ") OR (" +
+ LOCK_TABLE_COL_TABLE_ID + " = :tid AND " +
+ LOCK_TABLE_COL_EXPIRE_TS_MILLIS + " < :ts)")
+ .expressionAttributeValues(expressionValues)
+ .build());
+ return true;
+ } catch (Exception e) {
+ // most likely it's ConditionalCheckFailedException, but we will catch
any exception
+ LOG.debug("Acquiring lock {}.{} failed", database, table, e);
Review comment:
this should be warn.
##########
File path: aws/src/main/java/org/apache/iceberg/aws/glue/DynamoLockManager.java
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.aws.glue;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.aws.AwsClientUtil;
+import org.apache.iceberg.aws.AwsProperties;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import
software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+
+class DynamoLockManager implements LockManager {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(DynamoLockManager.class);
+
+ private static final String LOCK_TABLE_COL_TABLE_ID = "tableId";
+ private static final String LOCK_TABLE_COL_EXPIRE_TS_MILLIS =
"expireTimestampMillis";
+
+ private static final List<KeySchemaElement> LOCK_TABLE_SCHEMA =
Lists.newArrayList(
+ KeySchemaElement.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .keyType(KeyType.HASH)
+ .build()
+ );
+
+ private static final List<AttributeDefinition> LOCK_TABLE_COL_DEFINITIONS =
Lists.newArrayList(
+ AttributeDefinition.builder()
+ .attributeName(LOCK_TABLE_COL_TABLE_ID)
+ .attributeType(ScalarAttributeType.S)
+ .build()
+ );
+
+ private final DynamoDbClient dynamo;
+ private final AwsProperties awsProperties;
+
+ DynamoLockManager(AwsProperties awsProperties) {
+ this(AwsClientUtil.defaultDynamoClient(), awsProperties);
+ }
+
+ DynamoLockManager(DynamoDbClient dynamo, AwsProperties awsProperties) {
+ this.dynamo = dynamo;
+ this.awsProperties = awsProperties;
+ ensureLockTableExists();
+ }
+
+ private void ensureLockTableExists() {
+ ensureTableExists(awsProperties.glueCatalogLockTable(), LOCK_TABLE_SCHEMA,
LOCK_TABLE_COL_DEFINITIONS);
+ }
+
+ private void ensureTableExists(String tableName, List<KeySchemaElement>
schema,
Review comment:
NIT: ensureTableExistsOrCreate
because the function creates a table if does not exists.
"Ensure" gives the impressions of a boolean check.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]