YuriGusev commented on code in PR #1:
URL: 
https://github.com/apache/flink-connector-dynamodb/pull/1#discussion_r1003526661


##########
flink-connector-dynamodb/src/main/java/org/apache/flink/streaming/connectors/dynamodb/sink/DynamoDbSinkWriter.java:
##########
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.connectors.dynamodb.sink;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.api.connector.sink2.Sink.InitContext;
+import org.apache.flink.connector.base.sink.writer.AsyncSinkWriter;
+import org.apache.flink.connector.base.sink.writer.BufferedRequestState;
+import org.apache.flink.connector.base.sink.writer.ElementConverter;
+import org.apache.flink.metrics.Counter;
+import org.apache.flink.metrics.groups.SinkWriterMetricGroup;
+import org.apache.flink.streaming.connectors.dynamodb.util.AWSDynamoDbUtil;
+import 
org.apache.flink.streaming.connectors.dynamodb.util.DynamoDbExceptionUtils;
+
+import org.apache.flink.shaded.guava30.com.google.common.collect.ImmutableMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
+import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemResponse;
+import software.amazon.awssdk.services.dynamodb.model.WriteRequest;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.Consumer;
+
+/**
+ * Sink writer created by {@link DynamoDbSink} to write to DynamoDB. More 
details on the operation
+ * of this sink writer may be found in the doc for {@link DynamoDbSink}. More 
details on the
+ * internals of this sink writer may be found in {@link AsyncSinkWriter}.
+ *
+ * <p>The {@link DynamoDbAsyncClient} used here may be configured in the 
standard way for the AWS
+ * SDK 2.x. e.g. the provision of {@code AWS_REGION}, {@code 
AWS_ACCESS_KEY_ID} and {@code
+ * AWS_SECRET_ACCESS_KEY} through environment variables etc.
+ */
+@Internal
+class DynamoDbSinkWriter<InputT> extends AsyncSinkWriter<InputT, 
DynamoDbWriteRequest> {
+    private static final Logger LOG = 
LoggerFactory.getLogger(DynamoDbSinkWriter.class);
+
+    /* A counter for the total number of records that have encountered an 
error during put */
+    private final Counter numRecordsOutErrorsCounter;
+
+    /* The sink writer metric group */
+    private final SinkWriterMetricGroup metrics;
+
+    private final DynamoDbAsyncClient client;
+    private final boolean failOnError;
+    private final String tableName;
+
+    private List<String> overwriteByPKeys;
+
+    public DynamoDbSinkWriter(
+            ElementConverter<InputT, DynamoDbWriteRequest> elementConverter,
+            InitContext context,
+            int maxBatchSize,
+            int maxInFlightRequests,
+            int maxBufferedRequests,
+            long maxBatchSizeInBytes,
+            long maxTimeInBufferMS,
+            long maxRecordSizeInBytes,
+            boolean failOnError,
+            String tableName,
+            List<String> overwriteByPKeys,
+            Properties dynamoDbClientProperties,
+            Collection<BufferedRequestState<DynamoDbWriteRequest>> states) {
+        super(
+                elementConverter,
+                context,
+                maxBatchSize,
+                maxInFlightRequests,
+                maxBufferedRequests,
+                maxBatchSizeInBytes,
+                maxTimeInBufferMS,
+                maxRecordSizeInBytes,
+                states);
+        this.failOnError = failOnError;
+        this.tableName = tableName;
+        this.overwriteByPKeys = overwriteByPKeys;
+        this.metrics = context.metricGroup();
+        this.numRecordsOutErrorsCounter = 
metrics.getNumRecordsOutErrorsCounter();
+        this.client = AWSDynamoDbUtil.createClient(dynamoDbClientProperties);
+    }
+
+    @Override
+    protected void submitRequestEntries(
+            List<DynamoDbWriteRequest> requestEntries,
+            Consumer<List<DynamoDbWriteRequest>> requestResultConsumer) {
+
+        TableRequestsContainer container = new 
TableRequestsContainer(overwriteByPKeys);
+        requestEntries.forEach(container::put);
+
+        CompletableFuture<BatchWriteItemResponse> future =
+                client.batchWriteItem(
+                        BatchWriteItemRequest.builder()
+                                .requestItems(
+                                        ImmutableMap.of(tableName, 
container.getRequestItems()))
+                                .build());
+
+        future.whenComplete(
+                (response, err) -> {
+                    if (err != null) {
+                        handleFullyFailedRequest(err, requestEntries, 
requestResultConsumer);
+                    } else if (response.unprocessedItems() != null
+                            && !response.unprocessedItems().isEmpty()) {
+                        handlePartiallyUnprocessedRequest(response, 
requestResultConsumer);
+                    } else {
+                        requestResultConsumer.accept(Collections.emptyList());
+                    }
+                });
+    }
+
+    private void handlePartiallyUnprocessedRequest(
+            BatchWriteItemResponse response, 
Consumer<List<DynamoDbWriteRequest>> requestResult) {
+        List<DynamoDbWriteRequest> unprocessed = new ArrayList<>();
+
+        for (WriteRequest request : 
response.unprocessedItems().get(tableName)) {
+            unprocessed.add(new DynamoDbWriteRequest(request));
+        }
+
+        LOG.warn(
+                "DynamoDB Sink failed to persist {} entries. Adding these 
entries back to retry.",
+                unprocessed.size());
+        numRecordsOutErrorsCounter.inc(unprocessed.size());
+
+        requestResult.accept(unprocessed);
+    }
+
+    private void handleFullyFailedRequest(
+            Throwable err,
+            List<DynamoDbWriteRequest> requestEntries,
+            Consumer<List<DynamoDbWriteRequest>> requestResult) {
+        LOG.warn(
+                "DynamoDB Sink failed to persist {} entries. Adding these 
entries back to retry.",
+                requestEntries.size(),
+                err);
+        numRecordsOutErrorsCounter.inc(requestEntries.size());
+
+        if (DynamoDbExceptionUtils.isNotRetryableException(err.getCause())) {
+            getFatalExceptionCons()
+                    .accept(
+                            new DynamoDbSinkException(
+                                    "Encountered non-recoverable exception", 
err));
+        } else if (failOnError) {
+            getFatalExceptionCons()
+                    .accept(new 
DynamoDbSinkException.DynamoDbSinkFailFastException(err));
+        } else {
+            requestResult.accept(requestEntries);
+        }
+    }
+
+    @Override
+    protected long getSizeInBytes(DynamoDbWriteRequest requestEntry) {
+        // dynamodb calculates item size as a sum of all attributes and all 
values, but doing so on
+        // every operation may be too expensive, so this is just an estimate
+        return 
requestEntry.getWriteRequest().toString().getBytes(StandardCharsets.UTF_8).length;

Review Comment:
   Hi @dannycranmer,
   
   It returns write request serialised into string with all attribute types 
(pretty much like dynamodb json format). Something like
   
   WriteRequest(PutRequest=PutRequest(Item={sk=AttributeValue(S=1), 
pk=AttributeValue(S=1), ...})). Actual data written is smaller, so this is 
over-estimation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to