This is an automated email from the ASF dual-hosted git repository. yong pushed a commit to branch branch-4.15 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit d03f6a25a72753de47dfac270a42d9a2d7c72771 Author: wenbingshen <[email protected]> AuthorDate: Wed Sep 28 12:18:12 2022 +0800 ledgerFragment check and results keep order (#3504) ### Motivation When `LedgerChecker` checks Ledger's fragments, it does not keep the order, and the results of the check are not kept in order, which works correctly most of the time. I'm not sure if we should keep the order of checks and final results, but I observed a flaky-test like this: https://github.com/apache/bookkeeper/actions/runs/3124965464/jobs/5068848112 I have printed some verbose logs as follows: testShouldGetTwoFrgamentsIfTwoBookiesFailedInSameEnsemble In the case of success and failure: ### When the test execution is successful: 2022-09-26T15:00:13,790 - INFO - [Time-limited test:LedgerChecker@373] - checkLedger nowing. 2022-09-26T15:00:13,792 - INFO - [Time-limited test:LedgerChecker@398] - LedgerCheck fragments before[Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50466, 127.0.0.1:50474, 127.0.0.1:50476], Closed: true)] 2022-09-26T15:00:13,793 - INFO - [Time-limited test:LedgerChecker@430] - not execute check fragments. curEnsemble [127.0.0.1:50466, 127.0.0.1:50486, 127.0.0.1:50476], curEntryId 1 **2022-09-26T15:00:13,809 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker$1@440] - check2Fragments [Fragment(LedgerID: 0, **FirstEntryID: 0[0]**, LastKnownEntryID: 0[0], Host: [127.0.0.1:50466, 127.0.0.1:50474, 127.0.0.1:50476], Closed: true), Fragment(LedgerID: 0, **FirstEntryID: 1[1]**, LastKnownEntryID: 1[1], Host: [127.0.0.1:50466, 127.0.0.1:50476, 127.0.0.1:50486], Closed: false)]** 2022-09-26T15:00:13,810 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker@479] - Checking fragment Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50466, 127.0.0.1:50474, 127.0.0.1:50476], Closed: true) 2022-09-26T15:00:13,813 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker$FullLedgerCallback@360] - operationComplete Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50466, 127.0.0.1:50474], Closed: true), -8 2022-09-26T15:00:13,813 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker@479] - Checking fragment Fragment(LedgerID: 0, FirstEntryID: 1[1], LastKnownEntryID: 1[1], Host: [127.0.0.1:50466, 127.0.0.1:50476, 127.0.0.1:50486], Closed: false) 2022-09-26T15:00:13,817 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker$FullLedgerCallback@360] - operationComplete Fragment(LedgerID: 0, FirstEntryID: 1[-1], LastKnownEntryID: 1[-1], Host: [127.0.0.1:50466], Closed: false), -8 **2022-09-26T15:00:13,817 - INFO - [Time-limited test:TestLedgerChecker@534] - waitAndGetResult [Fragment(LedgerID: 0, **FirstEntryID: 0[0]**, LastKnownEntryID: 0[0], Host: [127.0.0.1:50466, 127.0.0.1:50474], Closed: true), Fragment(LedgerID: 0, **FirstEntryID: 1[-1]**, LastKnownEntryID: 1[-1], Host: [127.0.0.1:50466], Closed: false)]** 2022-09-26T15:00:13,818 - INFO - [Time-limited test:TestLedgerChecker@201] - other Ensemble {0=[127.0.0.1:50466, 127.0.0.1:50474, 127.0.0.1:50476], 1=[127.0.0.1:50466, 127.0.0.1:50486, 127.0.0.1:50476]} 2022-09-26T15:00:13,818 - INFO - [Time-limited test:TestLedgerChecker@206] - unreplicated fragment: Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50466, 127.0.0.1:50474], Closed: true) 2022-09-26T15:00:13,819 - INFO - [Time-limited test:TestLedgerChecker@206] - unreplicated fragment: Fragment(LedgerID: 0, FirstEntryID: 1[-1], LastKnownEntryID: 1[-1], Host: [127.0.0.1:50466], Closed: false) 2022-09-26T15:00:13,819 - INFO - [Time-limited test:TestLedgerChecker@212] - bookies fragments [127.0.0.1:50466, 127.0.0.1:50474, 127.0.0.1:50476], [0, 1] 2022-09-26T15:00:13,819 - INFO - [Time-limited test:BookKeeperClusterTestCase@199] - TearDown ### When test execution failed: 2022-09-26T14:58:09,687 - INFO - [Time-limited test:LedgerChecker@373] - checkLedger nowing. 2022-09-26T14:58:09,689 - INFO - [Time-limited test:LedgerChecker@398] - LedgerCheck fragments before[Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50105, 127.0.0.1:50107, 127.0.0.1:50109], Closed: true)] 2022-09-26T14:58:09,690 - INFO - [Time-limited test:LedgerChecker@430] - not execute check fragments. curEnsemble [127.0.0.1:50105, 127.0.0.1:50122, 127.0.0.1:50107], curEntryId 1 **2022-09-26T14:58:09,704 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker$1@440] - check2Fragments [Fragment(LedgerID: 0, **FirstEntryID: 1[1]**, LastKnownEntryID: 1[1], Host: [127.0.0.1:50105, 127.0.0.1:50122, 127.0.0.1:50107], Closed: false), Fragment(LedgerID: 0, **FirstEntryID: 0[0]**, LastKnownEntryID: 0[0], Host: [127.0.0.1:50105, 127.0.0.1:50107, 127.0.0.1:50109], Closed: true)]** 2022-09-26T14:58:09,705 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker@479] - Checking fragment Fragment(LedgerID: 0, FirstEntryID: 1[1], LastKnownEntryID: 1[1], Host: [127.0.0.1:50105, 127.0.0.1:50122, 127.0.0.1:50107], Closed: false) 2022-09-26T14:58:09,707 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker@479] - Checking fragment Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50105, 127.0.0.1:50107, 127.0.0.1:50109], Closed: true) 2022-09-26T14:58:09,708 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker$FullLedgerCallback@360] - operationComplete Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50105, 127.0.0.1:50109], Closed: true), -8 2022-09-26T14:58:09,708 - INFO - [BookKeeperClientWorker-OrderedExecutor-0-0:LedgerChecker$FullLedgerCallback@360] - operationComplete Fragment(LedgerID: 0, FirstEntryID: 1[-1], LastKnownEntryID: 1[-1], Host: [127.0.0.1:50105], Closed: false), -8 **2022-09-26T14:58:09,709 - INFO - [Time-limited test:TestLedgerChecker@534] - waitAndGetResult [Fragment(LedgerID: 0, **FirstEntryID: 1[-1]**, LastKnownEntryID: 1[-1], Host: [127.0.0.1:50105], Closed: false), Fragment(LedgerID: 0, **FirstEntryID: 0[0]**, LastKnownEntryID: 0[0], Host: [127.0.0.1:50105, 127.0.0.1:50109], Closed: true)]** 2022-09-26T14:58:09,709 - INFO - [Time-limited test:TestLedgerChecker@201] - other Ensemble {0=[127.0.0.1:50105, 127.0.0.1:50109, 127.0.0.1:50107], 1=[127.0.0.1:50105, 127.0.0.1:50122, 127.0.0.1:50107]} 2022-09-26T14:58:09,709 - INFO - [Time-limited test:TestLedgerChecker@206] - unreplicated fragment: Fragment(LedgerID: 0, FirstEntryID: 1[-1], LastKnownEntryID: 1[-1], Host: [127.0.0.1:50105], Closed: false) 2022-09-26T14:58:09,709 - INFO - [Time-limited test:TestLedgerChecker@206] - unreplicated fragment: Fragment(LedgerID: 0, FirstEntryID: 0[0], LastKnownEntryID: 0[0], Host: [127.0.0.1:50105, 127.0.0.1:50109], Closed: true) 2022-09-26T14:58:09,709 - INFO - [Time-limited test:TestLedgerChecker@212] - bookies fragments [127.0.0.1:50105, 127.0.0.1:50122, 127.0.0.1:50107], [0] 2022-09-26T14:58:09,710 - INFO - [Time-limited test:BookKeeperClusterTestCase@199] - TearDown ### Changes Replace `HashSet` with `LinkedHashSet` (cherry picked from commit 74a87e260bd0cf9eca2d229cc181638741b5f575) --- .../src/main/java/org/apache/bookkeeper/client/LedgerChecker.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java index 68c6adb0b0..b9df5f4a22 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerChecker.java @@ -22,6 +22,7 @@ package org.apache.bookkeeper.client; import io.netty.buffer.ByteBuf; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -344,7 +345,7 @@ public class LedgerChecker { FullLedgerCallback(long numFragments, GenericCallback<Set<LedgerFragment>> cb) { - badFragments = new HashSet<LedgerFragment>(); + badFragments = new LinkedHashSet<>(); this.numFragments = new AtomicLong(numFragments); this.cb = cb; } @@ -376,7 +377,7 @@ public class LedgerChecker { final GenericCallback<Set<LedgerFragment>> cb, long percentageOfLedgerFragmentToBeVerified) { // build a set of all fragment replicas - final Set<LedgerFragment> fragments = new HashSet<LedgerFragment>(); + final Set<LedgerFragment> fragments = new LinkedHashSet<>(); Long curEntryId = null; List<BookieId> curEnsemble = null;
