This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b6d8ef45d7c [chore](be) Document lightweight JSONB validation (#63359)
b6d8ef45d7c is described below

commit b6d8ef45d7c3247515324d3acb9dbfe21a7ef353
Author: Jerry Hu <[email protected]>
AuthorDate: Wed May 20 20:47:26 2026 +0800

    [chore](be) Document lightweight JSONB validation (#63359)
    
    ### What problem does this PR solve?
    
    Issue Number: DORIS-25577
    
    Problem Summary: After evaluating the JSONB INSERT/Stream Load path and
    JSONB query call sites, recursive full validation in
    `JsonbDocument::checkAndCreateDocument` is not appropriate for the
    default helper. INSERT and load inputs are JSON text that BE re-encodes
    through `JsonBinaryValue`/`JsonbWriter`, while `checkAndCreateDocument`
    and `createValue` are also used by JSONB scalar/table functions in
    per-row hot paths.
    
    This PR documents why these helpers intentionally remain lightweight and
    why any future deep validation should be added only at a clearly
    untrusted raw JSONB binary boundary.
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test:
        - Manual test: `git diff --check`
    - Behavior changed: No
    - Does this need documentation: No
---
 be/src/util/jsonb_document.cpp | 7 ++++++-
 be/src/util/jsonb_document.h   | 3 +++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/be/src/util/jsonb_document.cpp b/be/src/util/jsonb_document.cpp
index 05a44f4b30f..e0efc375b96 100644
--- a/be/src/util/jsonb_document.cpp
+++ b/be/src/util/jsonb_document.cpp
@@ -52,6 +52,11 @@ Status JsonbDocument::checkAndCreateDocument(const char* pb, 
size_t size,
     }
 
     const auto* val = (const JsonbValue*)doc_ptr->payload_;
+    // Keep this check lightweight. This API is used by JSONB scalar/table 
functions on every row,
+    // so recursively validating object/array payloads here would add an 
O(document size) scan before
+    // the real operation and can regress large JSONB queries. External 
INSERT/LOAD paths build JSONB
+    // through JsonBinaryValue/JsonbWriter before storage; any untrusted raw 
binary boundary should
+    // add explicit deep validation there instead of changing this hot-path 
helper.
     if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
         size != sizeof(JsonbHeader) + val->numPackedBytes()) {
         return Status::InvalidArgument("Invalid JSONB document: invalid 
type({}) or size({})",
@@ -217,4 +222,4 @@ std::vector<std::pair<StringRef, const JsonbValue*>> 
ObjectVal::get_ordered_key_
     return kvs;
 }
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index 3ee00709ec0..c141d67c79a 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -1037,6 +1037,9 @@ inline const JsonbValue* JsonbDocument::createValue(const 
char* pb, size_t size)
     }
 
     const auto* val = (const JsonbValue*)doc->payload_;
+    // Same as checkAndCreateDocument(), this is intentionally a lightweight 
structural check for
+    // hot paths. Do not recursively validate container bodies here unless the 
caller is a clearly
+    // untrusted raw binary boundary and accepts the O(document size) cost.
     if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
         return nullptr;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to