z3d1k commented on code in PR #145:
URL:
https://github.com/apache/flink-connector-aws/pull/145#discussion_r1670444385
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/enumerator/tracker/SplitTracker.java:
##########
@@ -0,0 +1,147 @@
+package org.apache.flink.connector.kinesis.source.enumerator.tracker;
+
+import org.apache.flink.annotation.Internal;
+import
org.apache.flink.connector.kinesis.source.enumerator.KinesisShardSplitWithAssignmentStatus;
+import
org.apache.flink.connector.kinesis.source.enumerator.SplitAssignmentStatus;
+import org.apache.flink.connector.kinesis.source.split.KinesisShardSplit;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+
+/** This class is used to track shard hierarchy. */
+@Internal
+public class SplitTracker {
+ /**
+ * Flag controlling if tracker should wait before all parent splits will
be completed before
+ * assigning split to readers.
+ */
+ private final boolean preserveShardOrdering;
+
+ /** Map of all discovered splits that have not been completed. */
+ private final Map<String, KinesisShardSplit> knownSplits = new
ConcurrentHashMap<>();
Review Comment:
Added info about the key
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/enumerator/KinesisStreamsSourceEnumeratorState.java:
##########
@@ -19,32 +19,31 @@
package org.apache.flink.connector.kinesis.source.enumerator;
import org.apache.flink.annotation.Internal;
-import org.apache.flink.connector.kinesis.source.split.KinesisShardSplit;
import javax.annotation.Nullable;
-import java.util.Set;
+import java.util.List;
/**
* State for the {@link KinesisStreamsSourceEnumerator}. This class is stored
in state, so any
* changes need to be backwards compatible
*/
@Internal
public class KinesisStreamsSourceEnumeratorState {
- private final Set<KinesisShardSplit> unassignedSplits;
+ private final List<KinesisShardSplitWithAssignmentStatus> splits;
@Nullable private final String lastSeenShardId;
public KinesisStreamsSourceEnumeratorState(
- Set<KinesisShardSplit> unassignedSplits, String lastSeenShardId) {
- this.unassignedSplits = unassignedSplits;
+ List<KinesisShardSplitWithAssignmentStatus> splits, String
lastSeenShardId) {
Review Comment:
Renamed
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/enumerator/KinesisStreamsSourceEnumeratorStateSerializer.java:
##########
@@ -109,13 +110,22 @@ public KinesisStreamsSourceEnumeratorState deserialize(
+ ". Serializer version is "
+ splitSerializer.getVersion());
}
- Set<KinesisShardSplit> unassignedSplits = new
HashSet<>(numUnassignedSplits);
+ List<KinesisShardSplitWithAssignmentStatus> unassignedSplits =
+ new ArrayList<>(numUnassignedSplits);
for (int i = 0; i < numUnassignedSplits; i++) {
int serializedLength = in.readInt();
byte[] serializedSplit = new byte[serializedLength];
if (in.read(serializedSplit) != -1) {
+ KinesisShardSplit deserializedSplit =
+
splitSerializer.deserialize(splitSerializerVersion, serializedSplit);
+ SplitAssignmentStatus assignmentStatus =
SplitAssignmentStatus.UNASSIGNED;
+ if (version == CURRENT_VERSION) {
+ assignmentStatus =
SplitAssignmentStatus.fromStatusCode(in.readInt());
+ }
Review Comment:
Yes, previous version contains only unassigned splits.
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/enumerator/tracker/SplitTracker.java:
##########
@@ -0,0 +1,147 @@
+package org.apache.flink.connector.kinesis.source.enumerator.tracker;
+
+import org.apache.flink.annotation.Internal;
+import
org.apache.flink.connector.kinesis.source.enumerator.KinesisShardSplitWithAssignmentStatus;
+import
org.apache.flink.connector.kinesis.source.enumerator.SplitAssignmentStatus;
+import org.apache.flink.connector.kinesis.source.split.KinesisShardSplit;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+
+/** This class is used to track shard hierarchy. */
+@Internal
+public class SplitTracker {
+ /**
+ * Flag controlling if tracker should wait before all parent splits will
be completed before
+ * assigning split to readers.
+ */
+ private final boolean preserveShardOrdering;
+
+ /** Map of all discovered splits that have not been completed. */
+ private final Map<String, KinesisShardSplit> knownSplits = new
ConcurrentHashMap<>();
+
+ /** Set of currently assigned split id. */
Review Comment:
fixed
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/reader/KinesisStreamsSourceReader.java:
##########
@@ -64,6 +66,8 @@ public KinesisStreamsSourceReader(
@Override
protected void onSplitFinished(Map<String, KinesisShardSplitState>
finishedSplitIds) {
finishedSplitIds.keySet().forEach(this::unregisterShardMetricGroup);
+ context.sendSourceEventToCoordinator(
+ new SplitsFinishedEvent(new
HashSet<>(finishedSplitIds.keySet())));
}
Review Comment:
Done.
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/event/SplitsFinishedEvent.java:
##########
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.connector.kinesis.source.event;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.api.connector.source.SourceEvent;
+
+import java.util.Set;
+
+/** Source event used by source reader to communicate that splits are finished
to enumerator. */
+@Internal
+public class SplitsFinishedEvent implements SourceEvent {
Review Comment:
We don't need to implement these, as we don't perform equality checks or use
this event as key.
Also, example of such event without these methods -
https://github.com/apache/flink/blob/master/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/hybrid/SwitchSourceEvent.java
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/enumerator/KinesisShardSplitWithAssignmentStatus.java:
##########
@@ -0,0 +1,45 @@
+package org.apache.flink.connector.kinesis.source.enumerator;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.connector.kinesis.source.split.KinesisShardSplit;
+
+import java.util.Objects;
+
+/** Kinesis shard split with assignment status. */
+@Internal
+public class KinesisShardSplitWithAssignmentStatus {
+ private final KinesisShardSplit kinesisShardSplit;
+ private final SplitAssignmentStatus splitAssignmentStatus;
+
+ public KinesisShardSplitWithAssignmentStatus(
+ KinesisShardSplit kinesisShardSplit, SplitAssignmentStatus
splitAssignmentStatus) {
+ this.kinesisShardSplit = kinesisShardSplit;
+ this.splitAssignmentStatus = splitAssignmentStatus;
+ }
+
+ public KinesisShardSplit split() {
+ return kinesisShardSplit;
+ }
+
+ public SplitAssignmentStatus assignmentStatus() {
+ return splitAssignmentStatus;
+ }
+
+ @Override
+ public boolean equals(Object o) {
Review Comment:
Added
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/test/java/org/apache/flink/connector/kinesis/source/enumerator/KinesisStreamsSourceEnumeratorTest.java:
##########
@@ -275,53 +262,8 @@ void testAddSplitsBackWithoutSplitIsNoOp() throws
Throwable {
// Given enumerator has no assigned splits
// When we add splits back
// Then handled gracefully with no exception thrown
Review Comment:
Removed
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/enumerator/KinesisStreamsSourceEnumerator.java:
##########
@@ -143,17 +156,101 @@ public void addReader(int subtaskId) {
@Override
public KinesisStreamsSourceEnumeratorState snapshotState(long
checkpointId) throws Exception {
- return new KinesisStreamsSourceEnumeratorState(unassignedSplits,
lastSeenShardId);
+ List<KinesisShardSplitWithAssignmentStatus> splitStates =
+ splitTracker.snapshotState(checkpointId);
+ return new KinesisStreamsSourceEnumeratorState(splitStates,
lastSeenShardId);
}
@Override
public void close() throws IOException {
streamProxy.close();
}
- private List<KinesisShardSplit> initialDiscoverSplits() {
- List<Shard> shards = streamProxy.listShards(streamArn,
lastSeenShardId);
- return mapToSplits(shards, sourceConfig.get(STREAM_INITIAL_POSITION));
+ @VisibleForTesting
Review Comment:
In this case logic does not fit in the StreamsProxy, but also needs to be
tested.
Discussed with @hlteoh37 offline and we agreed that VisibleForTesting is ok
here.
##########
flink-connector-aws/flink-connector-aws-kinesis-streams/src/main/java/org/apache/flink/connector/kinesis/source/enumerator/KinesisStreamsSourceEnumerator.java:
##########
@@ -211,32 +301,33 @@ private void assignSplits(List<KinesisShardSplit>
discoveredSplits, Throwable th
throw new KinesisStreamsSourceException("Failed to list shards.",
throwable);
}
+ splitTracker.addSplits(discoveredSplits);
+ updateLastSeenShardId(discoveredSplits);
+
if (context.registeredReaders().size() < context.currentParallelism())
{
LOG.info(
"Insufficient registered readers, skipping assignment of
discovered splits until all readers are registered. Required number of readers:
{}, Registered readers: {}",
context.currentParallelism(),
context.registeredReaders().size());
- unassignedSplits.addAll(discoveredSplits);
return;
}
+ assignSplits();
+ }
+
+ private void assignSplits() {
Review Comment:
This method is performing split assignment to readers, IMO name represents
the action well.
Renamed method above to clearer represent meaning.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]