This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new b62273acb45 [refactor](jsonb) Make the checkAndCreateDocument function
return a Status (#51673)
b62273acb45 is described below
commit b62273acb4504f1f76723e39161500ea66e3d594
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Jun 17 09:40:57 2025 +0800
[refactor](jsonb) Make the checkAndCreateDocument function return a Status
(#51673)
---
be/src/util/jsonb_document.h | 250 ++++++++++-----------
be/src/util/jsonb_utils.h | 24 +-
be/src/util/jsonb_writer.h | 11 +-
.../exprs/table_function/vexplode_json_array.cpp | 5 +-
.../exprs/table_function/vexplode_json_object.cpp | 5 +-
be/src/vec/functions/function_cast.h | 12 +-
be/src/vec/functions/function_jsonb.cpp | 40 ++--
be/src/vec/jsonb/serialize.cpp | 5 +-
be/src/vec/olap/olap_data_convertor.cpp | 16 +-
.../vec/data_types/common_data_type_serder_test.h | 5 +-
.../vec/data_types/serde/data_type_serde_test.cpp | 8 +-
be/test/vec/olap/jsonb_value_test.cpp | 6 +-
12 files changed, 198 insertions(+), 189 deletions(-)
diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index fb87114f886..7f50c4012ba 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -66,10 +66,6 @@
#ifndef JSONB_JSONBDOCUMENT_H
#define JSONB_JSONBDOCUMENT_H
-#include <assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-
#include <algorithm>
#include <cctype>
#include <charconv>
@@ -78,6 +74,7 @@
#include <type_traits>
#include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/status.h"
// #include "util/string_parser.hpp"
@@ -172,17 +169,14 @@ constexpr unsigned int ARRAY_CODE = 1;
*/
class JsonbDocument {
public:
- // Prepare a document in the buffer
- static JsonbDocument* makeDocument(char* pb, uint32_t size, JsonbType
type);
- static JsonbDocument* makeDocument(char* pb, uint32_t size, const
JsonbValue* rval);
-
// create an JsonbDocument object from JSONB packed bytes
- static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size);
+ [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t
size,
+ JsonbDocument** doc);
// create an JsonbValue from JSONB packed bytes
static JsonbValue* createValue(const char* pb, uint32_t size);
- uint8_t version() { return header_.ver_; }
+ uint8_t version() const { return header_.ver_; }
JsonbValue* getValue() { return ((JsonbValue*)payload_); }
@@ -194,7 +188,6 @@ public:
const ObjectVal* operator->() const { return ((const ObjectVal*)payload_);
}
-public:
bool operator==(const JsonbDocument& other) const {
assert(false);
return false;
@@ -409,11 +402,10 @@ private:
template <class Iter_Type, class Cont_Type>
class JsonbFwdIteratorT {
public:
- typedef Iter_Type iterator;
- typedef typename std::iterator_traits<Iter_Type>::pointer pointer;
- typedef typename std::iterator_traits<Iter_Type>::reference reference;
+ using iterator = Iter_Type;
+ using pointer = typename std::iterator_traits<Iter_Type>::pointer;
+ using reference = typename std::iterator_traits<Iter_Type>::reference;
-public:
explicit JsonbFwdIteratorT() : current_(nullptr) {}
explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
@@ -452,10 +444,10 @@ private:
iterator current_;
};
-typedef int (*hDictInsert)(const char* key, unsigned len);
-typedef int (*hDictFind)(const char* key, unsigned len);
+using hDictInsert = int (*)(const char*, unsigned int);
+using hDictFind = int (*)(const char*, unsigned int);
-typedef std::underlying_type<JsonbType>::type JsonbTypeUnder;
+using JsonbTypeUnder = std::underlying_type_t<JsonbType>;
/*
* JsonbKeyValue class defines JSONB key type, as described below.
@@ -484,7 +476,9 @@ class JsonbKeyValue {
public:
// now we use sMaxKeyId to represent an empty key
static const int sMaxKeyId = 65535;
- typedef uint16_t keyid_type;
+ using keyid_type = uint16_t;
+
+ JsonbKeyValue() = delete;
static const uint8_t sMaxKeyLen = 64;
@@ -512,8 +506,6 @@ private:
keyid_type id_;
char str_[1];
} key_;
-
- JsonbKeyValue();
};
/*
@@ -620,7 +612,7 @@ private:
NumberValT();
};
-typedef NumberValT<int8_t> JsonbInt8Val;
+using JsonbInt8Val = NumberValT<int8_t>;
// override setVal for Int8Val
template <>
@@ -633,7 +625,7 @@ inline bool JsonbInt8Val::setVal(int8_t value) {
return true;
}
-typedef NumberValT<int16_t> JsonbInt16Val;
+using JsonbInt16Val = NumberValT<int16_t>;
// override setVal for Int16Val
template <>
@@ -645,7 +637,7 @@ inline bool JsonbInt16Val::setVal(int16_t value) {
num_ = value;
return true;
}
-typedef NumberValT<int32_t> JsonbInt32Val;
+using JsonbInt32Val = NumberValT<int32_t>;
// override setVal for Int32Val
template <>
@@ -658,7 +650,7 @@ inline bool JsonbInt32Val::setVal(int32_t value) {
return true;
}
-typedef NumberValT<int64_t> JsonbInt64Val;
+using JsonbInt64Val = NumberValT<int64_t>;
// override setVal for Int64Val
template <>
@@ -671,7 +663,7 @@ inline bool JsonbInt64Val::setVal(int64_t value) {
return true;
}
-typedef NumberValT<int128_t> JsonbInt128Val;
+using JsonbInt128Val = NumberValT<int128_t>;
// override setVal for Int128Val
template <>
@@ -684,7 +676,7 @@ inline bool JsonbInt128Val::setVal(int128_t value) {
return true;
}
-typedef NumberValT<double> JsonbDoubleVal;
+using JsonbDoubleVal = NumberValT<double>;
// override setVal for DoubleVal
template <>
@@ -697,7 +689,7 @@ inline bool JsonbDoubleVal::setVal(double value) {
return true;
}
-typedef NumberValT<float> JsonbFloatVal;
+using JsonbFloatVal = NumberValT<float>;
// override setVal for DoubleVal
template <>
@@ -733,18 +725,21 @@ public:
switch (type_) {
case JsonbType::T_Int8:
if (val < std::numeric_limits<int8_t>::min() ||
- val > std::numeric_limits<int8_t>::max())
+ val > std::numeric_limits<int8_t>::max()) {
return false;
+ }
return ((JsonbInt8Val*)this)->setVal((int8_t)val);
case JsonbType::T_Int16:
if (val < std::numeric_limits<int16_t>::min() ||
- val > std::numeric_limits<int16_t>::max())
+ val > std::numeric_limits<int16_t>::max()) {
return false;
+ }
return ((JsonbInt16Val*)this)->setVal((int16_t)val);
case JsonbType::T_Int32:
if (val < std::numeric_limits<int32_t>::min() ||
- val > std::numeric_limits<int32_t>::max())
+ val > std::numeric_limits<int32_t>::max()) {
return false;
+ }
return ((JsonbInt32Val*)this)->setVal((int32_t)val);
case JsonbType::T_Int64:
return ((JsonbInt64Val*)this)->setVal((int64_t)val);
@@ -833,7 +828,9 @@ public:
*/
size_t length() {
// It's an empty string
- if (0 == size_) return size_;
+ if (0 == size_) {
+ return size_;
+ }
// The string stored takes all the spaces in payload_
if (payload_[size_ - 1] != 0) {
return size_;
@@ -847,14 +844,15 @@ public:
// all other strings: -1
int getBoolVal() {
if (size_ == 4 && tolower(payload_[0]) == 't' && tolower(payload_[1])
== 'r' &&
- tolower(payload_[2]) == 'u' && tolower(payload_[3]) == 'e')
+ tolower(payload_[2]) == 'u' && tolower(payload_[3]) == 'e') {
return 1;
- else if (size_ == 5 && tolower(payload_[0]) == 'f' &&
tolower(payload_[1]) == 'a' &&
- tolower(payload_[2]) == 'l' && tolower(payload_[3]) == 's' &&
- tolower(payload_[4]) == 'e')
+ } else if (size_ == 5 && tolower(payload_[0]) == 'f' &&
tolower(payload_[1]) == 'a' &&
+ tolower(payload_[2]) == 'l' && tolower(payload_[3]) == 's'
&&
+ tolower(payload_[4]) == 'e') {
return 0;
- else
+ } else {
return -1;
+ }
}
private:
@@ -889,13 +887,12 @@ protected:
*/
class ObjectVal : public ContainerVal {
public:
- typedef JsonbKeyValue value_type;
- typedef value_type* pointer;
- typedef const value_type* const_pointer;
- typedef JsonbFwdIteratorT<pointer, ObjectVal> iterator;
- typedef JsonbFwdIteratorT<const_pointer, ObjectVal> const_iterator;
+ using value_type = JsonbKeyValue;
+ using pointer = value_type*;
+ using const_pointer = const value_type*;
+ using iterator = JsonbFwdIteratorT<pointer, ObjectVal>;
+ using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>;
-public:
const_iterator search(const char* key, hDictFind handler = nullptr) const {
return const_cast<ObjectVal*>(this)->search(key, handler);
}
@@ -913,7 +910,9 @@ public:
}
iterator search(const char* key, unsigned int klen, hDictFind handler =
nullptr) {
- if (!key || !klen) return end();
+ if (!key || !klen) {
+ return end();
+ }
int key_id = -1;
if (handler && (key_id = handler(key, klen)) >= 0) {
@@ -923,13 +922,15 @@ public:
}
iterator search(int key_id) {
- if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) return end();
+ if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) {
+ return end();
+ }
const char* pch = payload_;
const char* fence = payload_ + size_;
while (pch < fence) {
- JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
+ auto* pkey = (JsonbKeyValue*)(pch);
if (!pkey->klen() && key_id == pkey->getKeyId()) {
return iterator(pkey);
}
@@ -947,7 +948,7 @@ public:
unsigned int num = 0;
while (pch < fence) {
- JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
+ auto* pkey = (JsonbKeyValue*)(pch);
++num;
pch += pkey->numPackedBytes();
}
@@ -963,8 +964,10 @@ public:
unsigned int num = 0;
while (pch < fence) {
- JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
- if (num == i) return pkey;
+ auto* pkey = (JsonbKeyValue*)(pch);
+ if (num == i) {
+ return pkey;
+ }
++num;
pch += pkey->numPackedBytes();
}
@@ -985,21 +988,27 @@ public:
// find the JSONB value by a key string (null terminated)
JsonbValue* find(const char* key, hDictFind handler = nullptr) {
- if (!key) return nullptr;
+ if (!key) {
+ return nullptr;
+ }
return find(key, (unsigned int)strlen(key), handler);
}
// find the JSONB value by a key string (with length)
JsonbValue* find(const char* key, unsigned int klen, hDictFind handler =
nullptr) {
iterator kv = search(key, klen, handler);
- if (end() == kv) return nullptr;
+ if (end() == kv) {
+ return nullptr;
+ }
return kv->value();
}
// find the JSONB value by a key dictionary ID
JsonbValue* find(int key_id) {
iterator kv = search(key_id);
- if (end() == kv) return nullptr;
+ if (end() == kv) {
+ return nullptr;
+ }
return kv->value();
}
@@ -1017,7 +1026,7 @@ private:
const char* fence = payload_ + size_;
while (pch < fence) {
- JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
+ auto* pkey = (JsonbKeyValue*)(pch);
if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen)
== 0) {
return iterator(pkey);
}
@@ -1038,15 +1047,27 @@ private:
*/
class ArrayVal : public ContainerVal {
public:
+ using value_type = JsonbValue;
+ using pointer = value_type*;
+ using const_pointer = const value_type*;
+ using iterator = JsonbFwdIteratorT<pointer, ArrayVal>;
+ using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>;
+
// get the JSONB value at index
JsonbValue* get(int idx) const {
- if (idx < 0) return nullptr;
+ if (idx < 0) {
+ return nullptr;
+ }
const char* pch = payload_;
const char* fence = payload_ + size_;
- while (pch < fence && idx-- > 0) pch +=
((JsonbValue*)pch)->numPackedBytes();
- if (idx > 0 || pch == fence) return nullptr;
+ while (pch < fence && idx-- > 0) {
+ pch += ((JsonbValue*)pch)->numPackedBytes();
+ }
+ if (idx > 0 || pch == fence) {
+ return nullptr;
+ }
return (JsonbValue*)pch;
}
@@ -1067,12 +1088,6 @@ public:
return num;
}
- typedef JsonbValue value_type;
- typedef value_type* pointer;
- typedef const value_type* const_pointer;
- typedef JsonbFwdIteratorT<pointer, ArrayVal> iterator;
- typedef JsonbFwdIteratorT<const_pointer, ArrayVal> const_iterator;
-
iterator begin() { return iterator((pointer)payload_); }
const_iterator begin() const { return const_iterator((pointer)payload_); }
@@ -1085,76 +1100,29 @@ private:
ArrayVal();
};
-// Prepare an empty document
-// input: pb - buuffer/packed bytes for jsonb document
-// size - size of the buffer
-// type - value type in the document
-inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size,
JsonbType type) {
+inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t
size,
+ JsonbDocument** doc) {
+ *doc = nullptr;
if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
- return nullptr;
+ return Status::InvalidArgument("Invalid JSONB document: too small
size({}) or null pointer",
+ size);
}
- if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) {
- return nullptr;
- }
- JsonbDocument* doc = (JsonbDocument*)pb;
- // Write header
- doc->header_.ver_ = JSONB_VER;
- JsonbValue* value = doc->getValue();
- // Write type
- value->type_ = type;
-
- // Set empty JsonbValue
- if (type == JsonbType::T_Object || type == JsonbType::T_Array)
- ((ContainerVal*)value)->size_ = 0;
- if (type == JsonbType::T_String || type == JsonbType::T_Binary)
- ((JsonbBlobVal*)value)->size_ = 0;
- return doc;
-}
-
-// Prepare a document from an JsonbValue
-// input: pb - buuffer/packed bytes for jsonb document
-// size - size of the buffer
-// rval - jsonb value to be copied into the document
-inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size,
const JsonbValue* rval) {
- // checking if the buffer is big enough to store the value
- if (!pb || !rval || size < sizeof(JsonbHeader) + rval->numPackedBytes()) {
- return nullptr;
- }
-
- JsonbType type = rval->type();
- if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) {
- return nullptr;
+ auto* doc_ptr = (JsonbDocument*)pb;
+ if (doc_ptr->header_.ver_ != JSONB_VER) {
+ return Status::InvalidArgument("Invalid JSONB document: invalid
version({})",
+ doc_ptr->header_.ver_);
}
- JsonbDocument* doc = (JsonbDocument*)pb;
- // Write header
- doc->header_.ver_ = JSONB_VER;
- // get the starting byte of the value
- JsonbValue* value = doc->getValue();
- // binary copy of the rval
- if (value != rval) // copy not necessary if values are the same
- memmove(value, rval, rval->numPackedBytes());
- return doc;
-}
-
-inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb,
size_t size) {
- if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
- return nullptr;
- }
-
- JsonbDocument* doc = (JsonbDocument*)pb;
- if (doc->header_.ver_ != JSONB_VER) {
- return nullptr;
- }
-
- JsonbValue* val = (JsonbValue*)doc->payload_;
+ auto* val = (JsonbValue*)doc_ptr->payload_;
if (val->type() < JsonbType::T_Null || val->type() >= JsonbType::NUM_TYPES
||
size != sizeof(JsonbHeader) + val->numPackedBytes()) {
- return nullptr;
+ return Status::InvalidArgument("Invalid JSONB document: invalid
type({}) or size({})",
+
static_cast<JsonbTypeUnder>(val->type()), size);
}
- return doc;
+ *doc = doc_ptr;
+ return Status::OK();
}
inline void JsonbDocument::setValue(const JsonbValue* value) {
memcpy(payload_, value, value->numPackedBytes());
@@ -1165,12 +1133,12 @@ inline JsonbValue* JsonbDocument::createValue(const
char* pb, uint32_t size) {
return nullptr;
}
- JsonbDocument* doc = (JsonbDocument*)pb;
+ auto* doc = (JsonbDocument*)pb;
if (doc->header_.ver_ != JSONB_VER) {
return nullptr;
}
- JsonbValue* val = (JsonbValue*)doc->payload_;
+ auto* val = (JsonbValue*)doc->payload_;
if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
return nullptr;
}
@@ -1184,7 +1152,7 @@ inline unsigned int JsonbDocument::numPackedBytes() const
{
inline unsigned int JsonbKeyValue::numPackedBytes() const {
unsigned int ks = keyPackedBytes();
- JsonbValue* val = (JsonbValue*)(((char*)this) + ks);
+ auto* val = (JsonbValue*)(((char*)this) + ks);
return ks + val->numPackedBytes();
}
@@ -1323,8 +1291,8 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const {
case JsonbType::T_String:
case JsonbType::T_Binary: {
if (rhs->isString()) {
- auto str_value1 = (JsonbStringVal*)this;
- auto str_value2 = (JsonbStringVal*)rhs;
+ auto* str_value1 = (JsonbStringVal*)this;
+ auto* str_value2 = (JsonbStringVal*)rhs;
return str_value1->length() == str_value2->length() &&
std::memcmp(str_value1->getBlob(), str_value2->getBlob(),
str_value1->length()) == 0;
@@ -1335,7 +1303,9 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const {
int lhs_num = ((ArrayVal*)this)->numElem();
if (rhs->isArray()) {
int rhs_num = ((ArrayVal*)rhs)->numElem();
- if (rhs_num > lhs_num) return false;
+ if (rhs_num > lhs_num) {
+ return false;
+ }
int contains_num = 0;
for (int i = 0; i < lhs_num; ++i) {
for (int j = 0; j < rhs_num; ++j) {
@@ -1356,13 +1326,14 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const
{
}
case JsonbType::T_Object: {
if (rhs->isObject()) {
- auto str_value1 = (ObjectVal*)this;
- auto str_value2 = (ObjectVal*)rhs;
+ auto* str_value1 = (ObjectVal*)this;
+ auto* str_value2 = (ObjectVal*)rhs;
for (int i = 0; i < str_value2->numElem(); ++i) {
JsonbKeyValue* key = str_value2->getJsonbKeyValue(i);
JsonbValue* value = str_value1->find(key->getKeyStr(),
key->klen());
- if (key != nullptr && value != nullptr &&
!value->contains(key->value()))
+ if (key != nullptr && value != nullptr &&
!value->contains(key->value())) {
return false;
+ }
}
return true;
}
@@ -1411,7 +1382,9 @@ inline const char* JsonbValue::getValuePtr() const {
inline bool JsonbPath::seek(const char* key_path, size_t kp_len) {
//path invalid
- if (!key_path || kp_len == 0) return false;
+ if (!key_path || kp_len == 0) {
+ return false;
+ }
Stream stream(key_path, kp_len);
stream.skip_whitespace();
if (stream.exhausted() || stream.read() != SCOPE) {
@@ -1447,7 +1420,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path,
hDictFind handler) {
->find(path.get_leg_from_leg_vector(i)->leg_ptr,
path.get_leg_from_leg_vector(i)->leg_len, handler);
- if (!pval) return nullptr;
+ if (!pval) {
+ return nullptr;
+ }
continue;
} else {
return nullptr;
@@ -1470,8 +1445,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path,
hDictFind handler) {
if (pval->type_ != JsonbType::T_Array ||
path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
- path.get_leg_from_leg_vector(i)->leg_len != 0)
+ path.get_leg_from_leg_vector(i)->leg_len != 0) {
return nullptr;
+ }
if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
pval =
((ArrayVal*)pval)->get(path.get_leg_from_leg_vector(i)->array_index);
@@ -1481,7 +1457,9 @@ inline JsonbValue* JsonbValue::findValue(JsonbPath& path,
hDictFind handler) {
path.get_leg_from_leg_vector(i)->array_index);
}
- if (!pval) return nullptr;
+ if (!pval) {
+ return nullptr;
+ }
continue;
}
}
diff --git a/be/src/util/jsonb_utils.h b/be/src/util/jsonb_utils.h
index c32588e2610..a16d8ba69d2 100644
--- a/be/src/util/jsonb_utils.h
+++ b/be/src/util/jsonb_utils.h
@@ -21,8 +21,7 @@
#ifndef JSONB_JSONBUTIL_H
#define JSONB_JSONBUTIL_H
-#include <sstream>
-
+#include "common/exception.h"
#include "jsonb_document.h"
#include "jsonb_stream.h"
#include "jsonb_writer.h"
@@ -39,15 +38,17 @@ public:
JsonbToJson() : os_(buffer_, OUT_BUF_SIZE) {}
// get json string
- const std::string to_json_string(const char* data, size_t size) {
- JsonbDocument* pdoc =
doris::JsonbDocument::checkAndCreateDocument(data, size);
- if (!pdoc) {
- LOG(FATAL) << "invalid json binary value: " <<
std::string_view(data, size);
+
+ std::string to_json_string(const char* data, size_t size) {
+ JsonbDocument* pdoc;
+ auto st = doris::JsonbDocument::checkAndCreateDocument(data, size,
&pdoc);
+ if (!st.ok()) {
+ throw Exception(st);
}
return to_json_string(pdoc->getValue());
}
- const std::string to_json_string(const JsonbValue* val) {
+ std::string to_json_string(const JsonbValue* val) {
os_.clear();
os_.seekp(0);
@@ -61,7 +62,7 @@ public:
return json_string;
}
- static const std::string jsonb_to_json_string(const char* data, size_t
size) {
+ static std::string jsonb_to_json_string(const char* data, size_t size) {
JsonbToJson jsonb_to_json;
return jsonb_to_json.to_json_string(data, size);
}
@@ -141,9 +142,9 @@ private:
}
char char_buffer[16];
for (const char* ptr = str; ptr != str + len && *ptr; ++ptr) {
- if ((unsigned char)*ptr > 31 && *ptr != '\"' && *ptr != '\\')
+ if ((unsigned char)*ptr > 31 && *ptr != '\"' && *ptr != '\\') {
os_.put(*ptr);
- else {
+ } else {
os_.put('\\');
unsigned char token;
switch (token = *ptr) {
@@ -235,7 +236,6 @@ private:
os_.put(']');
}
-private:
JsonbOutStream os_;
char buffer_[OUT_BUF_SIZE];
};
@@ -294,7 +294,7 @@ private:
}
JsonbWriterT<OS_TYPE> writer_;
};
-typedef JsonbValueCreaterT<JsonbOutStream> JsonbValueCreater;
+using JsonbValueCreater = JsonbValueCreaterT<JsonbOutStream>;
} // namespace doris
#endif // JSONB_JSONBUTIL_H
diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h
index f92d8a4096b..b71b6ef6aa4 100644
--- a/be/src/util/jsonb_writer.h
+++ b/be/src/util/jsonb_writer.h
@@ -40,6 +40,7 @@
#include <stack>
#include <string>
+#include "common/exception.h"
#include "jsonb_document.h"
#include "jsonb_stream.h"
@@ -479,8 +480,14 @@ public:
OS_TYPE* getOutput() { return os_; }
JsonbDocument* getDocument() {
- return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
- getOutput()->getSize());
+ JsonbDocument* doc = nullptr;
+ auto st =
JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
+
getOutput()->getSize(), &doc);
+ if (st.ok()) {
+ return doc;
+ } else {
+ throw doris::Exception(st);
+ }
}
JsonbValue* getValue() {
diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp
b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
index 7594d9a5cc6..0742d497c7b 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
@@ -63,8 +63,9 @@ void
VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) {
StringRef text = _text_column->get_data_at(row_idx);
if (text.data != nullptr) {
if (WhichDataType(_text_datatype).is_json()) {
- JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(text.data, text.size);
- if (doc && doc->getValue() && doc->getValue()->isArray()) {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(text.data,
text.size, &doc);
+ if (st.ok() && doc && doc->getValue() &&
doc->getValue()->isArray()) {
auto* a = (ArrayVal*)doc->getValue();
if (a->numElem() > 0) {
_cur_size = _parsed_data.set_output(*a, a->numElem());
diff --git a/be/src/vec/exprs/table_function/vexplode_json_object.cpp
b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
index 38a00d60b19..aa92d8238ae 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_object.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
@@ -54,8 +54,9 @@ void VExplodeJsonObjectTableFunction::process_row(size_t
row_idx) {
StringRef text = _json_object_column->get_data_at(row_idx);
if (text.data != nullptr) {
- JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data,
text.size);
- if (!doc || !doc->getValue()) [[unlikely]] {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(text.data, text.size,
&doc);
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
// error jsonb, put null into output, cur_size = 0 , we will
insert_default
return;
}
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 251a711e64c..99f5f625971 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -718,8 +718,9 @@ struct ConvertImplGenericFromJsonb {
const bool is_dst_string = is_string_or_fixed_string(data_type_to);
for (size_t i = 0; i < size; ++i) {
const auto& val = col_from_string->get_data_at(i);
- JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(val.data, val.size);
- if (UNLIKELY(!doc || !doc->getValue())) {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(val.data,
val.size, &doc);
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
(*vec_null_map_to)[i] = 1;
col_to->insert_default();
continue;
@@ -762,7 +763,7 @@ struct ConvertImplGenericFromJsonb {
continue;
}
ReadBuffer read_buffer((char*)(input_str.data()),
input_str.size());
- Status st = data_type_to->from_string(read_buffer, col_to);
+ st = data_type_to->from_string(read_buffer, col_to);
// if parsing failed, will return null
(*vec_null_map_to)[i] = !st.ok();
if (!st.ok()) {
@@ -881,8 +882,9 @@ struct ConvertImplFromJsonb {
}
// doc is NOT necessary to be deleted since JsonbDocument will
not allocate memory
- JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(val.data, val.size);
- if (UNLIKELY(!doc || !doc->getValue())) {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(val.data,
val.size, &doc);
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
null_map[i] = 1;
res[i] = 0;
continue;
diff --git a/be/src/vec/functions/function_jsonb.cpp
b/be/src/vec/functions/function_jsonb.cpp
index 08d2c974db0..af4aeed9e5d 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -557,8 +557,9 @@ private:
continue;
}
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
- JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw,
l_size);
- if (UNLIKELY(!doc || !doc->getValue())) {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size,
&doc);
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
dst_arr.clear();
return Status::InvalidArgument("jsonb data is invalid");
}
@@ -665,8 +666,9 @@ private:
static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const
char* l_raw_str,
int l_str_size, JsonbPath& path)
{
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str,
l_str_size);
- if (UNLIKELY(!doc || !doc->getValue())) {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size,
&doc);
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
return;
}
@@ -760,8 +762,9 @@ private:
}
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw,
l_size);
- if (UNLIKELY(!doc || !doc->getValue())) {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
return;
}
@@ -886,10 +889,11 @@ public:
writer->writeStartArray();
// doc is NOT necessary to be deleted since JsonbDocument will
not allocate memory
- JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(l_raw, l_size);
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size,
&doc);
for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
- if (UNLIKELY(!doc || !doc->getValue())) {
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
writer->writeNull();
continue;
}
@@ -1027,8 +1031,9 @@ private:
}
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str,
l_str_size);
- if (UNLIKELY(!doc || !doc->getValue())) {
+ JsonbDocument* doc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size,
&doc);
+ if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
null_map[i] = 1;
res[i] = 0;
return;
@@ -1406,8 +1411,9 @@ struct JsonbLengthUtil {
}
auto jsonb_value = jsonb_data_column->get_data_at(i);
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc =
- JsonbDocument::checkAndCreateDocument(jsonb_value.data,
jsonb_value.size);
+ JsonbDocument* doc = nullptr;
+
RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data,
+
jsonb_value.size, &doc));
JsonbValue* value = doc->getValue()->findValue(path, nullptr);
if (UNLIKELY(!value)) {
null_map->get_data()[i] = 1;
@@ -1541,10 +1547,12 @@ struct JsonbContainsUtil {
continue;
}
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc1 =
- JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
jsonb_value1.size);
- JsonbDocument* doc2 =
- JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
jsonb_value2.size);
+ JsonbDocument* doc1 = nullptr;
+
RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
+
jsonb_value1.size, &doc1));
+ JsonbDocument* doc2 = nullptr;
+
RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
+
jsonb_value2.size, &doc2));
JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr);
JsonbValue* value2 = doc2->getValue();
diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp
index d75d332f40c..cd97e3bf5ca 100644
--- a/be/src/vec/jsonb/serialize.cpp
+++ b/be/src/vec/jsonb/serialize.cpp
@@ -24,6 +24,7 @@
#include <unordered_set>
#include <vector>
+#include "common/status.h"
#include "olap/tablet_schema.h"
#include "runtime/descriptors.h"
#include "runtime/jsonb_value.h"
@@ -91,7 +92,9 @@ void JsonbSerializeUtil::jsonb_to_block(const
DataTypeSerDeSPtrs& serdes, const
const std::unordered_map<uint32_t,
uint32_t>& col_id_to_idx,
Block& dst, const
std::vector<std::string>& default_values,
const std::unordered_set<int>&
include_cids) {
- auto pdoc = JsonbDocument::checkAndCreateDocument(data, size);
+ JsonbDocument* pdoc = nullptr;
+ THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(data, size, &pdoc));
+
JsonbDocument& doc = *pdoc;
size_t num_rows = dst.rows();
size_t filled_columns = 0;
diff --git a/be/src/vec/olap/olap_data_convertor.cpp
b/be/src/vec/olap/olap_data_convertor.cpp
index db441b671e9..79b0866bc77 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -668,10 +668,10 @@ Status
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
"`string_type_length_soft_limit_bytes` in vec
engine.");
}
// Make sure that the json binary data written in is the
correct jsonb value.
- if (_is_jsonb &&
- !doris::JsonbDocument::checkAndCreateDocument(slice->data,
slice->size)) {
- return Status::InvalidArgument("invalid json binary value:
{}",
-
std::string_view(slice->data, slice->size));
+ if (_is_jsonb) {
+ JsonbDocument* doc = nullptr;
+
RETURN_IF_ERROR(doris::JsonbDocument::checkAndCreateDocument(
+ slice->data, slice->size, &doc));
}
} else {
// TODO: this may not be necessary, check and remove later
@@ -695,10 +695,10 @@ Status
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
" in vec engine.");
}
// Make sure that the json binary data written in is the correct
jsonb value.
- if (_is_jsonb &&
- !doris::JsonbDocument::checkAndCreateDocument(slice->data,
slice->size)) {
- return Status::InvalidArgument("invalid json binary value: {}",
- std::string_view(slice->data,
slice->size));
+ if (_is_jsonb) {
+ JsonbDocument* doc = nullptr;
+
RETURN_IF_ERROR(doris::JsonbDocument::checkAndCreateDocument(slice->data,
+
slice->size, &doc));
}
string_offset = *offset_cur;
++slice;
diff --git a/be/test/vec/data_types/common_data_type_serder_test.h
b/be/test/vec/data_types/common_data_type_serder_test.h
index ef8d07323df..f8c3488e684 100644
--- a/be/test/vec/data_types/common_data_type_serder_test.h
+++ b/be/test/vec/data_types/common_data_type_serder_test.h
@@ -292,7 +292,10 @@ public:
EXPECT_EQ(jsonb_column->size(), load_cols[0]->size());
for (size_t r = 0; r < jsonb_column->size(); ++r) {
StringRef jsonb_data = jsonb_column->get_data_at(r);
- auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size);
+ JsonbDocument* pdoc = nullptr;
+ auto st =
+ JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size, &pdoc);
+ ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " <<
st.to_string();
JsonbDocument& doc = *pdoc;
size_t cIdx = 0;
for (auto it = doc->begin(); it != doc->end(); ++it) {
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index 3c9498f1d6d..24d89953459 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -240,7 +240,9 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
- auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size);
+ JsonbDocument* pdoc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size, &pdoc);
+ ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " <<
st.to_string();
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
@@ -270,7 +272,9 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
- auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size);
+ JsonbDocument* pdoc = nullptr;
+ auto st = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size, &pdoc);
+ ASSERT_TRUE(st.ok()) << "checkAndCreateDocument failed: " <<
st.to_string();
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
diff --git a/be/test/vec/olap/jsonb_value_test.cpp
b/be/test/vec/olap/jsonb_value_test.cpp
index 3111163c0be..d6b5db784e2 100644
--- a/be/test/vec/olap/jsonb_value_test.cpp
+++ b/be/test/vec/olap/jsonb_value_test.cpp
@@ -189,7 +189,8 @@ TEST(JsonbValueConvertorTest, JsonbValueInvalid) {
auto [status, column] = _olap_data_convertor->convert_column_data(0);
// invalid will make error
ASSERT_FALSE(status.ok());
- ASSERT_TRUE(status.to_string().find("invalid json binary value") !=
std::string::npos);
+ ASSERT_TRUE(status.to_string().find("Invalid JSONB document") !=
std::string::npos)
+ << status.to_string();
ASSERT_NE(column, nullptr);
// test with null map
@@ -235,7 +236,8 @@ TEST(JsonbValueConvertorTest, JsonbValueInvalid) {
_olap_data_convertor->set_source_content(&block, 0, 5);
auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
ASSERT_FALSE(status.ok());
- ASSERT_TRUE(status.to_string().find("invalid json binary value") !=
std::string::npos);
+ ASSERT_TRUE(status.to_string().find("Invalid JSONB document") !=
std::string::npos)
+ << status.to_string();
ASSERT_NE(column, nullptr);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]