This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 898d0d42f1 [improvement](load)add more log for better bug tracing
experience for be write (#14424)
898d0d42f1 is described below
commit 898d0d42f133914dbcd24dbbb6ec4a3d1d2086ae
Author: AlexYue <[email protected]>
AuthorDate: Tue Nov 29 22:28:39 2022 +0800
[improvement](load)add more log for better bug tracing experience for be
write (#14424)
Recently when tracing one bug happened in version 1.1.4
I found out there were some places we can add more log for a better tracing.
---
be/src/exec/tablet_sink.cpp | 13 ++++++++++++-
be/src/olap/delta_writer.cpp | 10 +++++++---
be/src/runtime/tablets_channel.cpp | 5 +++++
3 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp
index fa9584a060..7c0051c191 100644
--- a/be/src/exec/tablet_sink.cpp
+++ b/be/src/exec/tablet_sink.cpp
@@ -216,6 +216,7 @@ Status NodeChannel::open_wait() {
// if this is last rpc, will must set _add_batches_finished.
otherwise, node channel's close_wait
// will be blocked.
_add_batches_finished = true;
+ VLOG_PROGRESS << "node channel " << channel_info() <<
"add_batches_finished";
}
});
@@ -271,6 +272,9 @@ Status NodeChannel::open_wait() {
}
}
_add_batches_finished = true;
+ VLOG_PROGRESS << "node channel " << channel_info()
+ << "add_batches_finished and handled "
+ << result.tablet_errors().size() << "
tablets errors";
}
} else {
_cancel_with_msg(
@@ -408,7 +412,8 @@ void NodeChannel::mark_close() {
DCHECK(_pending_batches.back().second.eos());
_close_time_ms = UnixMillis();
LOG(INFO) << channel_info()
- << " mark closed, left pending batch size: " <<
_pending_batches.size();
+ << " mark closed, left pending batch size: " <<
_pending_batches.size()
+ << " left pending batch size: " << _pending_batches_bytes;
}
_eos_is_produced = true;
@@ -718,6 +723,8 @@ Status IndexChannel::init(RuntimeState* state, const
std::vector<TTabletWithPart
void IndexChannel::mark_as_failed(int64_t node_id, const std::string& host,
const std::string& err,
int64_t tablet_id) {
+ VLOG_PROGRESS << "mark node_id:" << node_id << " tablet_id: " << tablet_id
+ << " as failed, err: " << err;
const auto& it = _tablets_by_channel.find(node_id);
if (it == _tablets_by_channel.end()) {
return;
@@ -950,6 +957,10 @@ Status OlapTableSink::prepare(RuntimeState* state) {
tablets.emplace_back(std::move(tablet_with_partition));
}
}
+ if (UNLIKELY(tablets.empty())) {
+ LOG(WARNING) << "load job:" << state->load_job_id() << " index: "
<< index->index_id
+ << " would open 0 tablet";
+ }
_channels.emplace_back(new IndexChannel(this, index->index_id,
use_vec));
RETURN_IF_ERROR(_channels.back()->init(state, tablets));
}
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index c1bb34f1d6..d330e430fe 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -223,7 +223,7 @@ Status DeltaWriter::write(const vectorized::Block* block,
const std::vector<int>
if (_mem_table->is_flush()) {
auto s = _flush_memtable_async();
_reset_mem_table();
- if (OLAP_UNLIKELY(!s.ok())) {
+ if (UNLIKELY(!s.ok())) {
return s;
}
}
@@ -255,7 +255,7 @@ Status DeltaWriter::flush_memtable_and_wait(bool need_wait)
{
<< ", load id: " << print_id(_req.load_id);
auto s = _flush_memtable_async();
_reset_mem_table();
- if (OLAP_UNLIKELY(!s.ok())) {
+ if (UNLIKELY(!s.ok())) {
return s;
}
@@ -346,7 +346,11 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes&
slave_tablet_nodes,
return _cancel_status;
}
// return error if previous flush failed
- RETURN_NOT_OK(_flush_token->wait());
+ auto st = _flush_token->wait();
+ if (UNLIKELY(!st.ok())) {
+ LOG(WARNING) << "previous flush failed tablet " <<
_tablet->tablet_id();
+ return st;
+ }
_mem_table.reset();
diff --git a/be/src/runtime/tablets_channel.cpp
b/be/src/runtime/tablets_channel.cpp
index 85ab150203..d6c81fb845 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -130,6 +130,8 @@ Status TabletsChannel::close(
// just skip this tablet(writer) and continue to close
others
continue;
}
+ VLOG_PROGRESS << "cancel tablet writer successfully,
tablet_id=" << it.first
+ << ", transaction_id=" << _txn_id;
}
}
@@ -189,6 +191,8 @@ void TabletsChannel::_close_wait(DeltaWriter* writer,
PTabletError* tablet_error = tablet_errors->Add();
tablet_error->set_tablet_id(writer->tablet_id());
tablet_error->set_msg(st.get_error_msg());
+ VLOG_PROGRESS << "close wait failed tablet " << writer->tablet_id() <<
" transaction_id "
+ << _txn_id << "err msg " << st.get_error_msg();
}
}
@@ -443,6 +447,7 @@ Status TabletsChannel::add_batch(const
TabletWriterAddRequest& request,
int64_t tablet_id = request.tablet_ids(i);
if (_broken_tablets.find(tablet_id) != _broken_tablets.end()) {
// skip broken tablets
+ VLOG_PROGRESS << "skip broken tablet tablet=" << tablet_id;
continue;
}
auto it = tablet_to_rowidxs.find(tablet_id);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]