This is an automated email from the ASF dual-hosted git repository.
wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git
The following commit(s) were added to refs/heads/master by this push:
new 6227db1 Save error trace segment even that segment will abandoned by
sampling mechanism (#5407)
6227db1 is described below
commit 6227db19cd057512e3a97312cdfd0ebf688b74d5
Author: zifeihan <[email protected]>
AuthorDate: Fri Aug 28 20:37:19 2020 +0800
Save error trace segment even that segment will abandoned by sampling
mechanism (#5407)
* Save some error trace segment, event this segment abandoned by server
side trace sampling mechanism.
* Support forceSaveErrorSegment config to control force save some error
segment.
Co-authored-by: echo <[email protected]>
Co-authored-by: 吴晟 Wu Sheng <[email protected]>
---
docs/en/setup/backend/trace-sampling.md | 7 +++++-
.../analyzer/provider/AnalyzerModuleConfig.java | 7 ++++++
.../parser/listener/SegmentAnalysisListener.java | 26 ++++++++++++----------
.../src/main/resources/application.yml | 1 +
4 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/docs/en/setup/backend/trace-sampling.md
b/docs/en/setup/backend/trace-sampling.md
index 6a8ace6..d54df83 100644
--- a/docs/en/setup/backend/trace-sampling.md
+++ b/docs/en/setup/backend/trace-sampling.md
@@ -30,4 +30,9 @@ When you set the rate different, let's say
And we assume the agents reported all trace segments to backend,
Then the 35% traces in the global will be collected and saved in storage
consistent/complete, with all spans.
20% trace segments, which reported to Backend-Instance**B**, will saved in
storage, maybe miss some trace segments,
-because they are reported to Backend-Instance**A** and ignored.
\ No newline at end of file
+because they are reported to Backend-Instance**A** and ignored.
+
+# Note
+When you open sampling, the actual sample rate could be over sampleRate.
Because currently, all error segments will be saved, meanwhile, the upstream
and downstream may not be sampled. This feature is going to make sure you could
have the error stacks and segments, but don't guarantee you would have the
whole trace.
+
+Also, the side effect would be, if most of the accesses are fail, the sampling
rate would be closing to 100%, which could crash the backend or storage
clusters.
diff --git
a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/AnalyzerModuleConfig.java
b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/AnalyzerModuleConfig.java
index 84d3b6d..4a658d0 100644
---
a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/AnalyzerModuleConfig.java
+++
b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/AnalyzerModuleConfig.java
@@ -78,4 +78,11 @@ public class AnalyzerModuleConfig extends ModuleConfig {
@Getter
private final String configPath = "meter-receive-config";
+
+ /**
+ * Sample the trace segment if the segment has span(s) tagged as error
status, and ignore the sampleRate configuration.
+ */
+ @Setter
+ @Getter
+ private boolean forceSampleErrorSegment = true;
}
diff --git
a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/SegmentAnalysisListener.java
b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/SegmentAnalysisListener.java
index 9f4afdc..d46b427 100644
---
a/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/SegmentAnalysisListener.java
+++
b/oap-server/analyzer/agent-analyzer/src/main/java/org/apache/skywalking/oap/server/analyzer/provider/trace/parser/listener/SegmentAnalysisListener.java
@@ -47,6 +47,7 @@ import
org.apache.skywalking.oap.server.library.util.BooleanUtils;
public class SegmentAnalysisListener implements FirstAnalysisListener,
EntryAnalysisListener, SegmentListener {
private final SourceReceiver sourceReceiver;
private final TraceSegmentSampler sampler;
+ private final boolean forceSampleErrorSegment;
private final NamingControl namingControl;
private final List<String> searchableTagKeys;
@@ -125,18 +126,6 @@ public class SegmentAnalysisListener implements
FirstAnalysisListener, EntryAnal
@Override
public void parseSegment(SegmentObject segmentObject) {
- if (sampleStatus.equals(SAMPLE_STATUS.UNKNOWN) ||
sampleStatus.equals(SAMPLE_STATUS.IGNORE)) {
- if (sampler.shouldSample(segmentObject.getTraceId())) {
- sampleStatus = SAMPLE_STATUS.SAMPLED;
- } else {
- sampleStatus = SAMPLE_STATUS.IGNORE;
- }
- }
-
- if (sampleStatus.equals(SAMPLE_STATUS.IGNORE)) {
- return;
- }
-
segment.setTraceId(segmentObject.getTraceId());
segmentObject.getSpansList().forEach(span -> {
if (startTimestamp == 0 || startTimestamp > span.getStartTime()) {
@@ -153,6 +142,16 @@ public class SegmentAnalysisListener implements
FirstAnalysisListener, EntryAnal
});
final long accurateDuration = endTimestamp - startTimestamp;
duration = accurateDuration > Integer.MAX_VALUE ? Integer.MAX_VALUE :
(int) accurateDuration;
+
+ if (sampleStatus.equals(SAMPLE_STATUS.UNKNOWN) ||
sampleStatus.equals(SAMPLE_STATUS.IGNORE)) {
+ if (sampler.shouldSample(segmentObject.getTraceId())) {
+ sampleStatus = SAMPLE_STATUS.SAMPLED;
+ } else if (isError && forceSampleErrorSegment) {
+ sampleStatus = SAMPLE_STATUS.SAMPLED;
+ } else {
+ sampleStatus = SAMPLE_STATUS.IGNORE;
+ }
+ }
}
private void appendSearchableTags(SpanObject span) {
@@ -186,6 +185,7 @@ public class SegmentAnalysisListener implements
FirstAnalysisListener, EntryAnal
public static class Factory implements AnalysisListenerFactory {
private final SourceReceiver sourceReceiver;
private final TraceSegmentSampler sampler;
+ private final boolean forceSampleErrorSegment;
private final NamingControl namingControl;
private final List<String> searchTagKeys;
@@ -196,6 +196,7 @@ public class SegmentAnalysisListener implements
FirstAnalysisListener, EntryAnal
.getService(ConfigService.class);
this.searchTagKeys =
Arrays.asList(configService.getSearchableTracesTags().split(Const.COMMA));
this.sampler = new
TraceSegmentSampler(config.getTraceSampleRateWatcher());
+ this.forceSampleErrorSegment = config.isForceSampleErrorSegment();
this.namingControl = moduleManager.find(CoreModule.NAME)
.provider()
.getService(NamingControl.class);
@@ -206,6 +207,7 @@ public class SegmentAnalysisListener implements
FirstAnalysisListener, EntryAnal
return new SegmentAnalysisListener(
sourceReceiver,
sampler,
+ forceSampleErrorSegment,
namingControl,
searchTagKeys
);
diff --git a/oap-server/server-bootstrap/src/main/resources/application.yml
b/oap-server/server-bootstrap/src/main/resources/application.yml
index d774945..a74ee81 100755
--- a/oap-server/server-bootstrap/src/main/resources/application.yml
+++ b/oap-server/server-bootstrap/src/main/resources/application.yml
@@ -175,6 +175,7 @@ agent-analyzer:
default:
sampleRate: ${SW_TRACE_SAMPLE_RATE:10000} # The sample rate precision is
1/10000. 10000 means 100% sample in default.
slowDBAccessThreshold: ${SW_SLOW_DB_THRESHOLD:default:200,mongodb:100} #
The slow database access thresholds. Unit ms.
+ forceSampleErrorSegment: ${SW_FORCE_SAMPLE_ERROR_SEGMENT:true} # When
sampling mechanism active, this config can open(true) force save some error
segment. true is default.
receiver-sharing-server:
selector: ${SW_RECEIVER_SHARING_SERVER:default}