This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new bb654e5af Fix flaky TestDownloadSuperblockInBatch
bb654e5af is described below
commit bb654e5afb0dc7ce925b9e8f36ad4413f80cfd20
Author: Marton Greber <[email protected]>
AuthorDate: Mon Jul 17 13:08:44 2023 +0000
Fix flaky TestDownloadSuperblockInBatch
The test has been failing from time to time with the following message:
Expected: has substring "recv error: Network error: RPC frame had a
length" Actual: "... UNKNOWN_ERROR: received error code Illegal state:
The tablet is not in a running state: BOOTSTRAPPING from remote service"
The fix is to add a wait in the test after the restart.
Previous to this patch, this fluke made the kudu-tool-test quite flaky:
http://dist-test.cloudera.org/job?job_id=root.1689595728.503176
* 125/400 failed
With this patch:
http://dist-test.cloudera.org/job?job_id=root.1689597615.525960
* 6/400 failed
^ I verified that those 6 are unrelated failures, compared to what is
beeing fixed in this patch.
(The command which has been used to produce the above dist-test results:
KUDU_ALLOW_SLOW_TESTS=1 ../../build-support/dist_test.py loop -n 100 \
-- ./bin/kudu-tool-test --stress_cpu_threads=16)
Moreover, TSAN reported a race condition on 'StringVectorSink
capture_logs'. Moved ScopedRegisterSink and the tool command into a
scope, such that it works as expected.
Change-Id: Id65cf0586416f70c72f61b2e6886bfc5d0690c0f
Reviewed-on: http://gerrit.cloudera.org:8080/20205
Tested-by: Kudu Jenkins
Reviewed-by: Mahesh Reddy <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/tools/kudu-tool-test.cc | 53 +++++++++++++++++++++-------------------
1 file changed, 28 insertions(+), 25 deletions(-)
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index cfd331a84..6feac6b32 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -9439,6 +9439,7 @@ TEST_P(DownloadSuperblockInBatchTest,
TestDownloadSuperblockInBatch) {
// So it is easy to make the size of superblock over the value of
rpc_max_message_size.
FLAGS_rpc_max_message_size = kSuperblockSize / 4;
ASSERT_OK(src_tserver->Restart());
+ ASSERT_OK(src_tserver->WaitStarted());
string source_tserver_rpc_addr = src_tserver->bound_rpc_addr().ToString();
string wal_dir = dst_tserver->options()->fs_opts.wal_root;
@@ -9447,32 +9448,34 @@ TEST_P(DownloadSuperblockInBatchTest,
TestDownloadSuperblockInBatch) {
// Copy tablet replicas from source tserver to destination tserver.
StringVectorSink capture_logs;
- ScopedRegisterSink reg(&capture_logs);
string stderr;
- RunActionStdoutStderrString(
- Substitute("local_replica copy_from_remote $0 $1 "
- "-fs_data_dirs=$2 -fs_wal_dir=$3 "
- "--tablet_copy_support_download_superblock_in_batch=$4 "
- // Disable --rpc_max_message_size_enable_validation, so
- // --rpc_max_message_size can be set a small value.
- "--rpc_max_message_size_enable_validation=false "
- // Set --rpc_max_message_size very small, so it is easy for
the size of
- // superblock over --rpc_max_message_size. It is used to
repeat the network
- // error, see line 9477.
- "--rpc_max_message_size=$5 "
- // This flag and --rpc_max_message_size are in a group flag
validator, so
- // it is also should be set a small value.
- "--consensus_max_batch_size_bytes=$6 "
- "--encrypt_data_at_rest=$7 "
- "--tablet_copy_transfer_chunk_size_bytes=50",
- tablet_id_to_copy,
- source_tserver_rpc_addr,
- data_dirs,
- wal_dir,
- FLAGS_tablet_copy_support_download_superblock_in_batch,
- (kSuperblockSize / 4),
- (kSuperblockSize / 8),
- FLAGS_encrypt_data_at_rest), nullptr, &stderr);
+ {
+ ScopedRegisterSink rs(&capture_logs);
+ RunActionStderrString(
+ Substitute("local_replica copy_from_remote $0 $1 "
+ "-fs_data_dirs=$2 -fs_wal_dir=$3 "
+ "--tablet_copy_support_download_superblock_in_batch=$4 "
+ // Disable --rpc_max_message_size_enable_validation, so
+ // --rpc_max_message_size can be set a small value.
+ "--rpc_max_message_size_enable_validation=false "
+ // Set --rpc_max_message_size very small, so it is easy for
the size of
+ // superblock over --rpc_max_message_size. It is used to
repeat the network
+ // error, see line 9477.
+ "--rpc_max_message_size=$5 "
+ // This flag and --rpc_max_message_size are in a group flag
validator, so
+ // it is also should be set a small value.
+ "--consensus_max_batch_size_bytes=$6 "
+ "--encrypt_data_at_rest=$7 "
+ "--tablet_copy_transfer_chunk_size_bytes=50",
+ tablet_id_to_copy,
+ source_tserver_rpc_addr,
+ data_dirs,
+ wal_dir,
+ FLAGS_tablet_copy_support_download_superblock_in_batch,
+ (kSuperblockSize / 4),
+ (kSuperblockSize / 8),
+ FLAGS_encrypt_data_at_rest), &stderr);
+ }
// The size of superblock is larger than rpc_max_message_size, it will cause
a network error.
// Downloading superblock will fail.
if (!FLAGS_tablet_copy_support_download_superblock_in_batch) {