This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 31d282aa458 [feature](journal) Add log and metric to improve the
observability of journal batch (#30401)
31d282aa458 is described below
commit 31d282aa45835fac45280bb6b191326f13cc724d
Author: walter <[email protected]>
AuthorDate: Fri Jan 26 21:14:59 2024 +0800
[feature](journal) Add log and metric to improve the observability of
journal batch (#30401)
---
.../java/org/apache/doris/journal/JournalBatch.java | 3 +++
.../apache/doris/journal/bdbje/BDBJEJournal.java | 21 +++++++++++++++++++++
.../java/org/apache/doris/metric/MetricRepo.java | 8 +++++++-
3 files changed, 31 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java
b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java
index 0fc0ccf9355..12d62b68717 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java
@@ -41,6 +41,9 @@ public class JournalBatch {
//
// The writable data will be serialized and saved in the journal batch
with an internal
// representation, so it is safety to update the data object once this
function returned.
+ //
+ // If the batch is too large, it may cause a latency spike. Generally, we
recommend controlling
+ // the number of batch entities to less than 32 and the batch data size to
less than 640KB.
public void addJournal(short op, Writable data) throws IOException {
if (op == OperationType.OP_TIMESTAMP) {
// OP_TIMESTAMP is not supported, see `BDBJEJournal.write` for
details.
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
index 95efbfa3780..9f90a3e2a11 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
@@ -49,6 +49,7 @@ import com.sleepycat.je.rep.ReplicaWriteException;
import com.sleepycat.je.rep.ReplicatedEnvironment;
import com.sleepycat.je.rep.RollbackException;
import com.sleepycat.je.rep.TimeConsistencyPolicy;
+import org.apache.commons.lang3.time.StopWatch;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -60,6 +61,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
+
/*
* This is the bdb implementation of Journal interface.
* First, we open() this journal, then read from or write to the bdb
environment
@@ -132,9 +134,11 @@ public class BDBJEJournal implements Journal { //
CHECKSTYLE IGNORE THIS LINE: B
// Write the journals to bdb.
for (int i = 0; i < RETRY_TIME; i++) {
Transaction txn = null;
+ StopWatch watch = StopWatch.createStarted();
try {
// The default config is constructed from the configs of
environment.
txn =
bdbEnvironment.getReplicatedEnvironment().beginTransaction(null, null);
+ dataSize = 0;
for (int j = 0; j < entitySize; ++j) {
JournalBatch.Entity entity = entities.get(j);
DatabaseEntry theKey = idToKey(firstId + j);
@@ -152,6 +156,18 @@ public class BDBJEJournal implements Journal { //
CHECKSTYLE IGNORE THIS LINE: B
if (MetricRepo.isInit) {
MetricRepo.COUNTER_EDIT_LOG_SIZE_BYTES.increase(dataSize);
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.increase(dataSize);
+ MetricRepo.HISTO_JOURNAL_BATCH_SIZE.update(entitySize);
+ MetricRepo.HISTO_JOURNAL_BATCH_DATA_SIZE.update(dataSize);
+ }
+
+ if (entitySize > 32) {
+ LOG.warn("write bdb journal batch is too large, batch size
{}, the first journal id {}, "
+ + "data size {}", entitySize, firstId, dataSize);
+ }
+
+ if (dataSize > 640 * 1024) { // 640KB
+ LOG.warn("write bdb journal batch data is too large, data
size {}, the first journal id {}, "
+ + "batch size {}", dataSize, firstId, entitySize);
}
return firstId;
@@ -188,6 +204,11 @@ public class BDBJEJournal implements Journal { //
CHECKSTYLE IGNORE THIS LINE: B
if (txn != null) {
txn.abort();
}
+ watch.stop();
+ if (watch.getTime() > 100000) { // 100ms
+ LOG.warn("write bdb is too slow, cost {}ms, the first
journal id, batch size {}, data size{}",
+ watch.getTime(), firstId, entitySize, dataSize);
+ }
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index 667290132aa..91e38b8cdf1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -101,6 +101,8 @@ public final class MetricRepo {
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_SUCCESS;
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_FAILED;
public static Histogram HISTO_EDIT_LOG_WRITE_LATENCY;
+ public static Histogram HISTO_JOURNAL_BATCH_SIZE;
+ public static Histogram HISTO_JOURNAL_BATCH_DATA_SIZE;
public static LongCounterMetric COUNTER_IMAGE_WRITE_SUCCESS;
public static LongCounterMetric COUNTER_IMAGE_WRITE_FAILED;
@@ -486,7 +488,11 @@ public final class MetricRepo {
COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type",
"current_bytes"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES);
HISTO_EDIT_LOG_WRITE_LATENCY = METRIC_REGISTER.histogram(
- MetricRegistry.name("editlog", "write", "latency", "ms"));
+ MetricRegistry.name("editlog", "write", "latency", "ms"));
+ HISTO_JOURNAL_BATCH_SIZE = METRIC_REGISTER.histogram(
+ MetricRegistry.name("journal", "write", "batch_size"));
+ HISTO_JOURNAL_BATCH_DATA_SIZE = METRIC_REGISTER.histogram(
+ MetricRegistry.name("journal", "write", "batch_data_size"));
// edit log clean
COUNTER_EDIT_LOG_CLEAN_SUCCESS = new
LongCounterMetric("edit_log_clean", MetricUnit.OPERATIONS,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]