scwhittle commented on code in PR #33755:
URL: https://github.com/apache/beam/pull/33755#discussion_r1940837760
##########
runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkStateTest.java:
##########
@@ -439,7 +445,84 @@ public void
testActivateWorkForKey_matchingCacheTokens_newWorkTokenLesser_STALE(
ActivateWorkResult activateWorkResult =
activeWorkState.activateWorkForKey(newWork);
assertEquals(ActivateWorkResult.STALE, activateWorkResult);
- assertFalse(readOnlyActiveWork.get(shardedKey).contains(newWork));
- assertEquals(queuedWork, readOnlyActiveWork.get(shardedKey).peek());
+
assertFalse(readOnlyActiveWork.get(shardedKey.shardingKey()).containsValue(newWork));
+ assertEquals(queuedWork,
firstValue(readOnlyActiveWork.get(shardedKey.shardingKey())));
+ }
+
+ @Test
+ public void testFailWork() {
+ ShardedKey shardedKey1 = shardedKey("someKey1", 1L);
+ ShardedKey shardedKey2 = shardedKey("someKey2", 2L);
Review Comment:
have a key with same sharding_key but differnent key value as well?
##########
runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkStateTest.java:
##########
@@ -439,7 +445,84 @@ public void
testActivateWorkForKey_matchingCacheTokens_newWorkTokenLesser_STALE(
ActivateWorkResult activateWorkResult =
activeWorkState.activateWorkForKey(newWork);
assertEquals(ActivateWorkResult.STALE, activateWorkResult);
- assertFalse(readOnlyActiveWork.get(shardedKey).contains(newWork));
- assertEquals(queuedWork, readOnlyActiveWork.get(shardedKey).peek());
+
assertFalse(readOnlyActiveWork.get(shardedKey.shardingKey()).containsValue(newWork));
+ assertEquals(queuedWork,
firstValue(readOnlyActiveWork.get(shardedKey.shardingKey())));
+ }
+
+ @Test
+ public void testFailWork() {
+ ShardedKey shardedKey1 = shardedKey("someKey1", 1L);
+ ShardedKey shardedKey2 = shardedKey("someKey2", 2L);
+
+ for (long workToken : Arrays.asList(1L, 2L)) {
+ for (long cacheToken : Arrays.asList(1L, 2L)) {
Review Comment:
can we use different values for sharding_key, work token and cache token? to
avoid possible code mixup working due to overlap
##########
runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkStateTest.java:
##########
@@ -439,7 +445,84 @@ public void
testActivateWorkForKey_matchingCacheTokens_newWorkTokenLesser_STALE(
ActivateWorkResult activateWorkResult =
activeWorkState.activateWorkForKey(newWork);
assertEquals(ActivateWorkResult.STALE, activateWorkResult);
- assertFalse(readOnlyActiveWork.get(shardedKey).contains(newWork));
- assertEquals(queuedWork, readOnlyActiveWork.get(shardedKey).peek());
+
assertFalse(readOnlyActiveWork.get(shardedKey.shardingKey()).containsValue(newWork));
+ assertEquals(queuedWork,
firstValue(readOnlyActiveWork.get(shardedKey.shardingKey())));
+ }
+
+ @Test
+ public void testFailWork() {
+ ShardedKey shardedKey1 = shardedKey("someKey1", 1L);
+ ShardedKey shardedKey2 = shardedKey("someKey2", 2L);
+
+ for (long workToken : Arrays.asList(1L, 2L)) {
+ for (long cacheToken : Arrays.asList(1L, 2L)) {
+ ExecutableWork work1 = createWork(createWorkItem(workToken,
cacheToken, shardedKey1));
+ ExecutableWork work2 = createWork(createWorkItem(workToken,
cacheToken, shardedKey2));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work1));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work2));
Review Comment:
I think it coudl be a better test to activate more stuff for the key, and
then verify that failing just fails the specific stuff. Ie just call
failWorkForKey outside of loop after adding everything.
It seems with current logic it could work if fail just looked at sharding
key and failed everything for the key.
##########
runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkStateTest.java:
##########
@@ -439,7 +445,84 @@ public void
testActivateWorkForKey_matchingCacheTokens_newWorkTokenLesser_STALE(
ActivateWorkResult activateWorkResult =
activeWorkState.activateWorkForKey(newWork);
assertEquals(ActivateWorkResult.STALE, activateWorkResult);
- assertFalse(readOnlyActiveWork.get(shardedKey).contains(newWork));
- assertEquals(queuedWork, readOnlyActiveWork.get(shardedKey).peek());
+
assertFalse(readOnlyActiveWork.get(shardedKey.shardingKey()).containsValue(newWork));
+ assertEquals(queuedWork,
firstValue(readOnlyActiveWork.get(shardedKey.shardingKey())));
+ }
+
+ @Test
+ public void testFailWork() {
+ ShardedKey shardedKey1 = shardedKey("someKey1", 1L);
+ ShardedKey shardedKey2 = shardedKey("someKey2", 2L);
+
+ for (long workToken : Arrays.asList(1L, 2L)) {
+ for (long cacheToken : Arrays.asList(1L, 2L)) {
+ ExecutableWork work1 = createWork(createWorkItem(workToken,
cacheToken, shardedKey1));
+ ExecutableWork work2 = createWork(createWorkItem(workToken,
cacheToken, shardedKey2));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work1));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work2));
+ assertEquals(1,
readOnlyActiveWork.get(shardedKey1.shardingKey()).size());
+ assertEquals(1,
readOnlyActiveWork.get(shardedKey2.shardingKey()).size());
+ activeWorkState.failWorkForKey(
+ ImmutableList.of(
+ WorkIdWithShardingKey.create(
+ shardedKey1.shardingKey(),
+
WorkId.builder().setWorkToken(workToken).setCacheToken(cacheToken).build())));
+
assertTrue(firstValue(readOnlyActiveWork.get(shardedKey1.shardingKey())).work().isFailed());
+ assertFalse(
+
firstValue(readOnlyActiveWork.get(shardedKey2.shardingKey())).work().isFailed());
+ activeWorkState.failWorkForKey(
+ ImmutableList.of(
+ WorkIdWithShardingKey.create(
+ shardedKey2.shardingKey(),
+
WorkId.builder().setWorkToken(workToken).setCacheToken(cacheToken).build())));
+
assertTrue(firstValue(readOnlyActiveWork.get(shardedKey1.shardingKey())).work().isFailed());
+
assertTrue(firstValue(readOnlyActiveWork.get(shardedKey2.shardingKey())).work().isFailed());
+
+ activeWorkState.completeWorkAndGetNextWorkForKey(
+ shardedKey1, workId(workToken, cacheToken));
+ activeWorkState.completeWorkAndGetNextWorkForKey(
+ shardedKey2, workId(workToken, cacheToken));
+ }
+ }
+ }
+
+ @Test
+ public void testFailWork_batchFail() {
+ ShardedKey shardedKey1 = shardedKey("someKey1", 1L);
Review Comment:
ditto
##########
runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkStateTest.java:
##########
@@ -439,7 +445,84 @@ public void
testActivateWorkForKey_matchingCacheTokens_newWorkTokenLesser_STALE(
ActivateWorkResult activateWorkResult =
activeWorkState.activateWorkForKey(newWork);
assertEquals(ActivateWorkResult.STALE, activateWorkResult);
- assertFalse(readOnlyActiveWork.get(shardedKey).contains(newWork));
- assertEquals(queuedWork, readOnlyActiveWork.get(shardedKey).peek());
+
assertFalse(readOnlyActiveWork.get(shardedKey.shardingKey()).containsValue(newWork));
+ assertEquals(queuedWork,
firstValue(readOnlyActiveWork.get(shardedKey.shardingKey())));
+ }
+
+ @Test
+ public void testFailWork() {
+ ShardedKey shardedKey1 = shardedKey("someKey1", 1L);
+ ShardedKey shardedKey2 = shardedKey("someKey2", 2L);
+
+ for (long workToken : Arrays.asList(1L, 2L)) {
+ for (long cacheToken : Arrays.asList(1L, 2L)) {
+ ExecutableWork work1 = createWork(createWorkItem(workToken,
cacheToken, shardedKey1));
+ ExecutableWork work2 = createWork(createWorkItem(workToken,
cacheToken, shardedKey2));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work1));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work2));
+ assertEquals(1,
readOnlyActiveWork.get(shardedKey1.shardingKey()).size());
+ assertEquals(1,
readOnlyActiveWork.get(shardedKey2.shardingKey()).size());
+ activeWorkState.failWorkForKey(
+ ImmutableList.of(
+ WorkIdWithShardingKey.create(
+ shardedKey1.shardingKey(),
+
WorkId.builder().setWorkToken(workToken).setCacheToken(cacheToken).build())));
+
assertTrue(firstValue(readOnlyActiveWork.get(shardedKey1.shardingKey())).work().isFailed());
+ assertFalse(
+
firstValue(readOnlyActiveWork.get(shardedKey2.shardingKey())).work().isFailed());
+ activeWorkState.failWorkForKey(
+ ImmutableList.of(
+ WorkIdWithShardingKey.create(
+ shardedKey2.shardingKey(),
+
WorkId.builder().setWorkToken(workToken).setCacheToken(cacheToken).build())));
+
assertTrue(firstValue(readOnlyActiveWork.get(shardedKey1.shardingKey())).work().isFailed());
+
assertTrue(firstValue(readOnlyActiveWork.get(shardedKey2.shardingKey())).work().isFailed());
+
+ activeWorkState.completeWorkAndGetNextWorkForKey(
+ shardedKey1, workId(workToken, cacheToken));
+ activeWorkState.completeWorkAndGetNextWorkForKey(
+ shardedKey2, workId(workToken, cacheToken));
+ }
+ }
+ }
+
+ @Test
+ public void testFailWork_batchFail() {
+ ShardedKey shardedKey1 = shardedKey("someKey1", 1L);
+ ShardedKey shardedKey2 = shardedKey("someKey2", 2L);
+
+ for (long workToken : Arrays.asList(1L, 2L)) {
+ for (long cacheToken : Arrays.asList(1L, 2L)) {
+ ExecutableWork work1 = createWork(createWorkItem(workToken,
cacheToken, shardedKey1));
+ ExecutableWork work2 = createWork(createWorkItem(workToken,
cacheToken, shardedKey2));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work1));
+ assertEquals(ActivateWorkResult.EXECUTE,
activeWorkState.activateWorkForKey(work2));
+ assertEquals(1,
readOnlyActiveWork.get(shardedKey1.shardingKey()).size());
+ assertEquals(1,
readOnlyActiveWork.get(shardedKey2.shardingKey()).size());
+ activeWorkState.failWorkForKey(
Review Comment:
ditto
##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/ActiveWorkState.java:
##########
@@ -167,54 +165,28 @@ synchronized ActivateWorkResult
activateWorkForKey(ExecutableWork executableWork
*
* @param failedWork a map from sharding_key to tokens for the corresponding
work.
*/
- synchronized void failWorkForKey(Multimap<Long, WorkId> failedWork) {
- // Note we can't construct a ShardedKey and look it up in activeWork
directly since
- // HeartbeatResponse doesn't include the user key.
- for (Entry<ShardedKey, Deque<ExecutableWork>> entry :
activeWork.entrySet()) {
- Collection<WorkId> failedWorkIds =
failedWork.get(entry.getKey().shardingKey());
- for (WorkId failedWorkId : failedWorkIds) {
- for (ExecutableWork queuedWork : entry.getValue()) {
- WorkItem workItem = queuedWork.work().getWorkItem();
- if (workItem.getWorkToken() == failedWorkId.workToken()
- && workItem.getCacheToken() == failedWorkId.cacheToken()) {
- LOG.debug(
- "Failing work "
- + computationStateCache.getComputation()
- + " "
- + entry.getKey().shardingKey()
- + " "
- + failedWorkId.workToken()
- + " "
- + failedWorkId.cacheToken()
- + ". The work will be retried and is not lost.");
- queuedWork.work().setFailed();
- break;
- }
- }
+ synchronized void failWorkForKey(ImmutableList<WorkIdWithShardingKey>
failedWork) {
+ for (WorkIdWithShardingKey failedId : failedWork) {
+ LinkedHashMap<WorkId, ExecutableWork> workQueue =
activeWork.get(failedId.shardingKey());
Review Comment:
mark nullable and executableWork below as well
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]