This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 64aa475b8d3 branch-4.0: [fix](fe) Reject lone UTF-16 surrogates in
JSONB literals (RFC 8259 §8.2) #63255 (#63346)
64aa475b8d3 is described below
commit 64aa475b8d38fe7f7322592482c5cd6701d0bbd9
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri May 22 23:55:53 2026 -0700
branch-4.0: [fix](fe) Reject lone UTF-16 surrogates in JSONB literals (RFC
8259 §8.2) #63255 (#63346)
Cherry-picked from #63255
Co-authored-by: morrySnow <[email protected]>
Co-authored-by: Copilot <[email protected]>
---
.../org/apache/doris/analysis/JsonLiteral.java | 39 ++++++-
.../trees/expressions/literal/JsonLiteral.java | 39 ++++++-
.../trees/expressions/literal/JsonLiteralTest.java | 124 +++++++++++++++++++++
3 files changed, 200 insertions(+), 2 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/JsonLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/JsonLiteral.java
index e617697d34f..b88dc8c8638 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/JsonLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/JsonLiteral.java
@@ -26,10 +26,12 @@ import org.apache.doris.thrift.TExprNode;
import org.apache.doris.thrift.TExprNodeType;
import org.apache.doris.thrift.TJsonLiteral;
+import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import com.google.gson.JsonSyntaxException;
import com.google.gson.annotations.SerializedName;
+import java.util.Map;
import java.util.Objects;
public class JsonLiteral extends LiteralExpr {
@@ -46,7 +48,8 @@ public class JsonLiteral extends LiteralExpr {
public JsonLiteral(String value) throws AnalysisException {
try {
- parser.parse(value);
+ JsonElement element = parser.parse(value);
+ validateNoLoneSurrogate(element);
} catch (JsonSyntaxException e) {
throw new AnalysisException("Invalid jsonb literal: " +
e.getMessage());
}
@@ -55,6 +58,40 @@ public class JsonLiteral extends LiteralExpr {
analysisDone();
}
+ // RFC 8259 §8.2: JSON strings must not contain lone UTF-16 surrogates.
+ // Gson accepts them by default, so we validate after parsing.
+ // Both string values AND object field names are checked.
+ private static void validateNoLoneSurrogate(JsonElement element) throws
AnalysisException {
+ if (element.isJsonPrimitive() &&
element.getAsJsonPrimitive().isString()) {
+ validateNoLoneSurrogateInString(element.getAsString());
+ } else if (element.isJsonObject()) {
+ for (Map.Entry<String, JsonElement> entry :
element.getAsJsonObject().entrySet()) {
+ validateNoLoneSurrogateInString(entry.getKey());
+ validateNoLoneSurrogate(entry.getValue());
+ }
+ } else if (element.isJsonArray()) {
+ for (JsonElement child : element.getAsJsonArray()) {
+ validateNoLoneSurrogate(child);
+ }
+ }
+ }
+
+ private static void validateNoLoneSurrogateInString(String s) throws
AnalysisException {
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ if (Character.isHighSurrogate(c)) {
+ if (i + 1 >= s.length() ||
!Character.isLowSurrogate(s.charAt(i + 1))) {
+ throw new AnalysisException(
+ "Invalid jsonb literal: JSON string contains lone
high surrogate");
+ }
+ i++; // skip the paired low surrogate
+ } else if (Character.isLowSurrogate(c)) {
+ throw new AnalysisException(
+ "Invalid jsonb literal: JSON string contains lone low
surrogate");
+ }
+ }
+ }
+
protected JsonLiteral(JsonLiteral other) {
super(other);
value = other.value;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/JsonLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/JsonLiteral.java
index 4c4c7dced4c..b563b430893 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/JsonLiteral.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/JsonLiteral.java
@@ -27,6 +27,9 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
+import java.util.Iterator;
+import java.util.Map;
+
/**
* literal for json type.
*/
@@ -49,8 +52,42 @@ public class JsonLiteral extends Literal {
}
if (jsonNode == null || jsonNode.isMissingNode()) {
throw new AnalysisException("Invalid jsonb literal: ''");
+ }
+ validateNoLoneSurrogate(jsonNode);
+ this.value = jsonNode.toString();
+ }
+
+ // RFC 8259 §8.2: JSON strings must not contain lone UTF-16 surrogates.
+ // Jackson accepts them by default, so we validate after parsing.
+ // Both string values AND object field names are checked.
+ private static void validateNoLoneSurrogate(JsonNode node) {
+ if (node.isTextual()) {
+ validateNoLoneSurrogateInString(node.textValue());
+ } else if (node.isObject()) {
+ Iterator<Map.Entry<String, JsonNode>> fields = node.fields();
+ while (fields.hasNext()) {
+ Map.Entry<String, JsonNode> entry = fields.next();
+ validateNoLoneSurrogateInString(entry.getKey());
+ validateNoLoneSurrogate(entry.getValue());
+ }
} else {
- this.value = jsonNode.toString();
+ node.forEach(JsonLiteral::validateNoLoneSurrogate);
+ }
+ }
+
+ private static void validateNoLoneSurrogateInString(String s) {
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ if (Character.isHighSurrogate(c)) {
+ if (i + 1 >= s.length() ||
!Character.isLowSurrogate(s.charAt(i + 1))) {
+ throw new AnalysisException(
+ "Invalid jsonb literal: JSON string contains lone
high surrogate");
+ }
+ i++; // skip the paired low surrogate
+ } else if (Character.isLowSurrogate(c)) {
+ throw new AnalysisException(
+ "Invalid jsonb literal: JSON string contains lone low
surrogate");
+ }
}
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/JsonLiteralTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/JsonLiteralTest.java
new file mode 100644
index 00000000000..6e16ea9805e
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/JsonLiteralTest.java
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.literal;
+
+import org.apache.doris.nereids.exceptions.AnalysisException;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for JsonLiteral surrogate validation (RFC 8259 §8.2).
+ */
+public class JsonLiteralTest {
+
+ // --- valid inputs ---
+
+ @Test
+ public void testValidAsciiString() {
+ // plain ASCII string in JSON is always valid
+ Assertions.assertDoesNotThrow(() -> new JsonLiteral("\"hello\""));
+ }
+
+ @Test
+ public void testValidObject() {
+ Assertions.assertDoesNotThrow(() -> new
JsonLiteral("{\"key\":\"value\"}"));
+ }
+
+ @Test
+ public void testValidArray() {
+ Assertions.assertDoesNotThrow(() -> new JsonLiteral("[1, \"abc\",
true]"));
+ }
+
+ @Test
+ public void testValidSurrogatePair() {
+ // \uD83D\uDE00 is a valid surrogate pair (U+1F600, 😀)
+ // JSON escape: "\uD83D\uDE00"
+ Assertions.assertDoesNotThrow(() -> new
JsonLiteral("\"\\uD83D\\uDE00\""));
+ }
+
+ @Test
+ public void testValidSurrogatePairInObject() {
+ Assertions.assertDoesNotThrow(() -> new
JsonLiteral("{\"emoji\":\"\\uD83D\\uDE00\"}"));
+ }
+
+ // --- lone high surrogate ---
+
+ @Test
+ public void testLoneHighSurrogateTopLevel() {
+ // "\uD800" — lone high surrogate, no paired low surrogate
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> new JsonLiteral("\"\\uD800\""));
+ Assertions.assertTrue(ex.getMessage().contains("lone high surrogate"),
+ "Expected 'lone high surrogate' in: " + ex.getMessage());
+ }
+
+ @Test
+ public void testLoneHighSurrogateInObject() {
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> new JsonLiteral("{\"k\":\"\\uD800\"}"));
+ Assertions.assertTrue(ex.getMessage().contains("lone high surrogate"));
+ }
+
+ @Test
+ public void testLoneHighSurrogateInArray() {
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> new JsonLiteral("[\"\\uD800\"]"));
+ Assertions.assertTrue(ex.getMessage().contains("lone high surrogate"));
+ }
+
+ @Test
+ public void testHighSurrogateFollowedByNonLow() {
+ // \uD800\u0041 — high surrogate followed by 'A', not a low surrogate
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> new JsonLiteral("\"\\uD800A\""));
+ Assertions.assertTrue(ex.getMessage().contains("lone high surrogate"));
+ }
+
+ // --- lone low surrogate ---
+
+ @Test
+ public void testLoneLowSurrogateTopLevel() {
+ // "\uDC00" — lone low surrogate
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> new JsonLiteral("\"\\uDC00\""));
+ Assertions.assertTrue(ex.getMessage().contains("lone low surrogate"),
+ "Expected 'lone low surrogate' in: " + ex.getMessage());
+ }
+
+ @Test
+ public void testLoneHighSurrogateInObjectKey() {
+ // lone surrogate in object field name must also be rejected
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> new JsonLiteral("{\"\\uD800\":\"value\"}"));
+ Assertions.assertTrue(ex.getMessage().contains("lone high surrogate"));
+ }
+
+ @Test
+ public void testLoneLowSurrogateInObjectKey() {
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> new JsonLiteral("{\"\\uDC00\":\"value\"}"));
+ Assertions.assertTrue(ex.getMessage().contains("lone low surrogate"));
+ }
+
+ @Test
+ public void testValidSurrogatePairInObjectKey() {
+ // valid surrogate pair in key must be accepted
+ Assertions.assertDoesNotThrow(() -> new
JsonLiteral("{\"\\uD83D\\uDE00\":\"ok\"}"));
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]