dariuszseweryn commented on code in PR #10053:
URL: https://github.com/apache/nifi/pull/10053#discussion_r2185569094


##########
nifi-extension-bundles/nifi-aws-bundle/nifi-aws-processors/src/main/java/org/apache/nifi/processors/aws/kinesis/stream/record/KinesisRecordProcessorRecord.java:
##########
@@ -75,51 +82,93 @@ public KinesisRecordProcessorRecord(final 
ProcessSessionFactory sessionFactory,
     @Override
     void startProcessingRecords() {
         super.startProcessingRecords();
-        outputStream = null;
-        writer = null;
+        if (flowFileState != null) {
+            getLogger().warn("FlowFile State is not null at the start of 
processing records, this is not expected.");
+            closeSafe(flowFileState, "FlowFile State");
+            flowFileState = null;
+        }
     }
 
     @Override
-    void processRecord(final List<FlowFile> flowFiles, final 
KinesisClientRecord kinesisRecord, final boolean lastRecord,
+    void finishProcessingRecords(final ProcessSession session, final 
List<FlowFile> flowFiles, final StopWatch stopWatch) {
+        super.finishProcessingRecords(session, flowFiles, stopWatch);
+        try {
+            if (flowFileState == null) {
+                return;
+            }
+            if (!flowFiles.contains(flowFileState.flowFile)) {
+                getLogger().warn("Currently processed FlowFile is no longer 
available at processing end, this is not expected.", flowFiles);
+                closeSafe(flowFileState, "FlowFile State");
+                return;
+            }
+            completeFlowFile(flowFiles, session, stopWatch);
+        } catch (CompleteFlowFileSingleKinesisRecordException e) {
+            final boolean removeFirstFlowFileIfAvailable = true;
+            final KinesisClientRecord kinesisRecord = e.kinesisClientRecord;
+            final byte[] data = getData(kinesisRecord);
+            outputRawRecordOnException(removeFirstFlowFileIfAvailable, 
flowFiles, session, data, kinesisRecord, e);
+        } finally {
+            flowFileState = null;
+            failedKinesisRecordCausingDataLoss = null;
+            failedKinesisRecordCausingDataLossException = null;
+        }
+    }
+
+    @Override
+    void processRecord(final List<FlowFile> flowFiles, final 
KinesisClientRecord kinesisRecord,
                        final ProcessSession session, final StopWatch 
stopWatch) {
-        boolean firstOutputRecord = true;
-        int recordCount = 0;
-        final ByteBuffer dataBuffer = kinesisRecord.data();
-        byte[] data = dataBuffer != null ? new byte[dataBuffer.remaining()] : 
new byte[0];
-        if (dataBuffer != null) {
-            dataBuffer.get(data);
+        if (flowFileState != null && 
!flowFiles.contains(flowFileState.flowFile)) {
+            getLogger().warn("Currently processed FlowFile is no longer 
available, this is not expected.", flowFiles);
+            closeSafe(flowFileState, "FlowFile State");
+            flowFileState = null;
         }
+        if (kinesisRecord == failedKinesisRecordCausingDataLoss)  {
+            // AbstractKinesisRecordProcessor does retry processing of failed 
records. however in case of CompleteFlowFileMultipleKinesisRecordException it 
is impossible to determine the state of
+            // the affected FlowFile and replay all records that were in it. 
To prevent data loss, the exception needs to be rethrown until it is given up 
by the abstract processor. still there may be
+            // other cases where we end up in undetermined state.
+            throw failedKinesisRecordCausingDataLossException;

Review Comment:
   Yup, added a `KinesisUnrecoverableException` at 
`AbstractKinesisRecordProcessor`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to