This is an automated email from the ASF dual-hosted git repository.
duansg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hertzbeat.git
The following commit(s) were added to refs/heads/master by this push:
new 67c3727362 [Refactor] tighten resource constraints for XML/XPath
collector (#3999)
67c3727362 is described below
commit 67c372736265aee641f279dfc217abcb7455cf1a
Author: Logic <[email protected]>
AuthorDate: Thu Jan 22 18:51:01 2026 +0800
[Refactor] tighten resource constraints for XML/XPath collector (#3999)
---
.../collector/collect/http/HttpCollectImpl.java | 100 +++++++++++++++++++--
.../collector/constants/CollectorConstants.java | 24 +++++
2 files changed, 118 insertions(+), 6 deletions(-)
diff --git
a/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/http/HttpCollectImpl.java
b/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/http/HttpCollectImpl.java
index 2bd9132d22..03e2d0dc4c 100644
---
a/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/http/HttpCollectImpl.java
+++
b/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/http/HttpCollectImpl.java
@@ -39,6 +39,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import javax.net.ssl.SSLException;
+import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
@@ -106,6 +107,22 @@ import java.util.Collections;
*/
@Slf4j
public class HttpCollectImpl extends AbstractCollect {
+
+ /**
+ * Pre-compiled regex patterns for dangerous Xpath detection.
+ * Compiled once at class load for performance.
+ */
+ private static final List<Pattern> DANGEROUS_XPATH_PATTERNS;
+
+ static {
+ List<Pattern> patterns = new ArrayList<>();
+ for (String pattern : CollectorConstants.DANGEROUS_XPATH_PATTERNS) {
+ // Use CASE_INSENSITIVE to prevent bypass via case variations
(e.g., //TEXT() vs //text())
+ patterns.add(Pattern.compile(pattern, Pattern.CASE_INSENSITIVE |
Pattern.DOTALL));
+ }
+ DANGEROUS_XPATH_PATTERNS = Collections.unmodifiableList(patterns);
+ }
+
private final Set<Integer> defaultSuccessStatusCodes = Set.of(
HttpStatus.SC_OK,
HttpStatus.SC_CREATED,
@@ -357,21 +374,83 @@ public class HttpCollectImpl extends AbstractCollect {
}
}
+ /**
+ * Validates the Xpath expression to prevent DoS attacks.
+ * Checks for dangerous patterns that could traverse the entire XML
document.
+ * Uses pre-compiled patterns for performance and case-insensitive
matching for security.
+ *
+ * @param xpathExpression the Xpath expression to validate
+ * @throws IllegalArgumentException if the expression contains dangerous
patterns
+ */
+ private void validateXpathExpression(String xpathExpression) throws
IllegalArgumentException {
+ if (!StringUtils.hasText(xpathExpression)) {
+ return;
+ }
+
+ // Check against dangerous patterns using pre-compiled regex
+ for (Pattern pattern : DANGEROUS_XPATH_PATTERNS) {
+ Matcher matcher = pattern.matcher(xpathExpression);
+ if (matcher.find()) {
+ throw new IllegalArgumentException(
+ "Xpath expression contains dangerous pattern that may
cause DoS: " + pattern.pattern()
+ );
+ }
+ }
+
+ // Check for excessive wildcard usage (more than 3 // or * operators)
+ long descendantAxisCount = xpathExpression.chars().filter(ch -> ch ==
'/').count();
+ long wildcardCount = xpathExpression.chars().filter(ch -> ch ==
'*').count();
+
+ if (descendantAxisCount > 10 || wildcardCount > 5) {
+ throw new IllegalArgumentException(
+ "Xpath expression contains too many wildcards or descendant
axes, potential DoS risk"
+ );
+ }
+
+ log.debug("Xpath expression validation passed: {}", xpathExpression);
+ }
+
private void parseResponseByXmlPath(String resp, Metrics metrics,
CollectRep.MetricsData.Builder
builder, Long responseTime) {
HttpProtocol http = metrics.getHttp();
List<String> aliasFields = metrics.getAliasFields();
String xpathExpression = http.getParseScript();
+
+ // Layer 1: Validate Xpath expression is not empty
if (!StringUtils.hasText(xpathExpression)) {
log.warn("Http collect parse type is xmlPath, but the xpath
expression is empty.");
builder.setCode(CollectRep.Code.FAIL);
- builder.setMsg("XPath expression is empty");
+ builder.setMsg("Xpath expression is empty");
+ return;
+ }
+
+ // Layer 2: Check XML response size to prevent memory exhaustion
+ if (resp != null && resp.length() >
CollectorConstants.MAX_XML_RESPONSE_SIZE) {
+ log.warn("XML response size {} bytes exceeds maximum allowed size
{} bytes",
+ resp.length(), CollectorConstants.MAX_XML_RESPONSE_SIZE);
+ builder.setCode(CollectRep.Code.FAIL);
+ builder.setMsg("XML response exceeds maximum allowed size of "
+ + (CollectorConstants.MAX_XML_RESPONSE_SIZE / 1024 / 1024) +
"MB");
+ return;
+ }
+
+ // Layer 3: Validate Xpath expression for dangerous patterns
+ try {
+ validateXpathExpression(xpathExpression);
+ } catch (IllegalArgumentException e) {
+ log.warn("Xpath expression validation failed: {}", e.getMessage());
+ builder.setCode(CollectRep.Code.FAIL);
+ builder.setMsg(e.getMessage());
return;
}
+
int keywordNum = CollectUtil.countMatchKeyword(resp,
http.getKeyword());
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+
+ // Layer 4: Enable XML secure processing and XXE protection
+ dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
dbf.setFeature("http://xml.org/sax/features/external-parameter-entities",
false);
@@ -387,7 +466,7 @@ public class HttpCollectImpl extends AbstractCollect {
NodeList nodeList = (NodeList) xpath.evaluate(xpathExpression,
document, XPathConstants.NODESET);
if (nodeList == null || nodeList.getLength() == 0) {
- log.debug("XPath expression '{}' returned no nodes.",
xpathExpression);
+ log.debug("Xpath expression '{}' returned no nodes.",
xpathExpression);
boolean requestedSummaryFields = aliasFields.stream()
.anyMatch(alias ->
NetworkConstants.RESPONSE_TIME.equalsIgnoreCase(alias)
||
CollectorConstants.KEYWORD.equalsIgnoreCase(alias));
@@ -408,7 +487,16 @@ public class HttpCollectImpl extends AbstractCollect {
return;
}
- for (int i = 0; i < nodeList.getLength(); i++) {
+ // Layer 5: Limit the number of results to prevent excessive
resource consumption
+ int resultSize = nodeList.getLength();
+ int maxResults = CollectorConstants.MAX_XPATH_RESULT_NODES;
+ if (resultSize > maxResults) {
+ log.warn("Xpath expression returned {} nodes, exceeding limit
of {}. Processing first {} nodes only.",
+ resultSize, maxResults, maxResults);
+ resultSize = maxResults;
+ }
+
+ for (int i = 0; i < resultSize; i++) {
Node node = nodeList.item(i);
CollectRep.ValueRow.Builder valueRowBuilder =
CollectRep.ValueRow.newBuilder();
@@ -422,7 +510,7 @@ public class HttpCollectImpl extends AbstractCollect {
String value = (String) xpath.evaluate(alias,
node, XPathConstants.STRING);
valueRowBuilder.addColumn(StringUtils.hasText(value) ? value :
CommonConstants.NULL_VALUE);
} catch (XPathExpressionException e) {
- log.warn("Failed to evaluate XPath '{}' for node
[{}]: {}", alias, node.getNodeName(), e.getMessage());
+ log.warn("Failed to evaluate Xpath '{}' for node
[{}]: {}", alias, node.getNodeName(), e.getMessage());
valueRowBuilder.addColumn(CommonConstants.NULL_VALUE);
}
}
@@ -431,7 +519,7 @@ public class HttpCollectImpl extends AbstractCollect {
}
} catch (Exception e) {
- log.warn("Failed to parse XML response with XPath '{}': {}",
xpathExpression, e.getMessage(), e);
+ log.warn("Failed to parse XML response with Xpath '{}': {}",
xpathExpression, e.getMessage(), e);
builder.setCode(CollectRep.Code.FAIL);
builder.setMsg("Failed to parse XML response: " + e.getMessage());
}
@@ -882,4 +970,4 @@ public class HttpCollectImpl extends AbstractCollect {
}
return successCodeSet.contains(statusCode);
}
-}
\ No newline at end of file
+}
diff --git
a/hertzbeat-collector/hertzbeat-collector-common/src/main/java/org/apache/hertzbeat/collector/constants/CollectorConstants.java
b/hertzbeat-collector/hertzbeat-collector-common/src/main/java/org/apache/hertzbeat/collector/constants/CollectorConstants.java
index a52092e2f0..32d7b29de3 100644
---
a/hertzbeat-collector/hertzbeat-collector-common/src/main/java/org/apache/hertzbeat/collector/constants/CollectorConstants.java
+++
b/hertzbeat-collector/hertzbeat-collector-common/src/main/java/org/apache/hertzbeat/collector/constants/CollectorConstants.java
@@ -49,4 +49,28 @@ public interface CollectorConstants extends NetworkConstants
{
String STATUS_CODE = "statusCode";
+ /**
+ * Maximum XML response size in bytes (10MB) to prevent DoS attacks
+ */
+ int MAX_XML_RESPONSE_SIZE = 10 * 1024 * 1024;
+
+ /**
+ * Maximum number of nodes returned by Xpath query to prevent excessive
resource consumption
+ */
+ int MAX_XPATH_RESULT_NODES = 1000;
+
+ /**
+ * Dangerous Xpath expression patterns that could cause DoS attacks
+ * These patterns match expressions that traverse the entire XML document
+ */
+ String[] DANGEROUS_XPATH_PATTERNS = {
+ "//\\*\\s*\\|\\s*//@\\*\\s*\\|\\s*//text\\(\\)", // //* | //@* |
//text()
+ "//\\*\\s*\\|", // //* | ...
+ "//@\\*\\s*\\|", // //@* | ...
+ "//node\\(\\)\\s*\\|", // //node() | ...
+ "descendant-or-self::node\\(\\)\\s*\\|", //
descendant-or-self::node() | ...
+ "/descendant-or-self::node\\(\\)", //
/descendant-or-self::node()
+ "//\\*[\\s\\S]*//\\*" // //** with
multiple wildcards
+ };
+
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]