This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 753fddb2a29 [chore](json) remove jsonb_parser.h file (#51284)
753fddb2a29 is described below
commit 753fddb2a29899136b88dbeee1b5b43a36d6166f
Author: Mryange <[email protected]>
AuthorDate: Wed May 28 14:15:32 2025 +0800
[chore](json) remove jsonb_parser.h file (#51284)
### What problem does this PR solve?
Delete jsonb_parser.h, since simdjson already supports multiple CPU
architectures.
---
be/src/runtime/jsonb_value.h | 4 -
be/src/util/jsonb_parser.h | 1060 --------------------
be/src/vec/columns/column_object.cpp | 7 +-
.../vec/data_types/serde/data_type_jsonb_serde.cpp | 5 -
.../data_types/serde/data_type_object_serde.cpp | 7 +-
be/src/vec/functions/function_json.cpp | 8 +-
be/src/vec/functions/function_jsonb.cpp | 4 -
7 files changed, 4 insertions(+), 1091 deletions(-)
diff --git a/be/src/runtime/jsonb_value.h b/be/src/runtime/jsonb_value.h
index e743a143547..a46a7ac4570 100644
--- a/be/src/runtime/jsonb_value.h
+++ b/be/src/runtime/jsonb_value.h
@@ -26,11 +26,7 @@
#include "common/status.h"
#include "util/hash_util.hpp"
-#ifdef __AVX2__
#include "util/jsonb_parser_simd.h"
-#else
-#include "util/jsonb_parser.h"
-#endif
namespace doris {
diff --git a/be/src/util/jsonb_parser.h b/be/src/util/jsonb_parser.h
deleted file mode 100644
index 4192e36ea5c..00000000000
--- a/be/src/util/jsonb_parser.h
+++ /dev/null
@@ -1,1060 +0,0 @@
-/*
- * Copyright (c) 2014, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree. An additional grant
- * of patent rights can be found in the PATENTS file in the same directory.
- *
- */
-
-/*
- * This file defines JsonbParserT (template) and JsonbParser.
- *
- * JsonbParserT is a template class which implements a JSON parser.
- * JsonbParserT parses JSON text, and serialize it to JSONB binary format
- * by using JsonbWriterT object. By default, JsonbParserT creates a new
- * JsonbWriterT object with an output stream object. However, you can also
- * pass in your JsonbWriterT or any stream object that implements some basic
- * interface of std::ostream (see JsonbStream.h).
- *
- * JsonbParser specializes JsonbParserT with JsonbOutStream type (see
- * JsonbStream.h). So unless you want to provide own a different output stream
- * type, use JsonbParser object.
- *
- * ** Parsing JSON **
- * JsonbParserT parses JSON string, and directly serializes into JSONB
- * packed bytes. There are three ways to parse a JSON string: (1) using
- * c-string, (2) using string with len, (3) using std::istream object. You can
- * use custom streambuf to redirect output. JsonbOutBuffer is a streambuf used
- * internally if the input is raw character buffer.
- *
- * You can reuse an JsonbParserT object to parse/serialize multiple JSON
- * strings, and the previous JSONB will be overwritten.
- *
- * If parsing fails (returned false), the error code will be set to one of
- * JsonbErrType, and can be retrieved by calling getErrorCode().
- *
- * ** External dictionary **
- * During parsing a JSON string, you can pass a call-back function to map a key
- * string to an id, and store the dictionary id in JSONB to save space. The
- * purpose of using an external dictionary is more towards a collection of
- * documents (which has common keys) rather than a single document, so that
- * space saving will be significant.
- *
- * ** Endianness **
- * Note: JSONB serialization doesn't assume endianness of the server. However
- * you will need to ensure that the endianness at the reader side is the same
- * as that at the writer side (if they are on different machines). Otherwise,
- * proper conversion is needed when a number value is returned to the
- * caller/writer.
- *
- * @author Tian Xia <[email protected]>
- *
- * this file is copied from
- *
https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonJsonParser.h
- * and modified by Doris
- */
-
-#ifndef JSONB_JSONBJSONPARSER_H
-#define JSONB_JSONBJSONPARSER_H
-
-#include <cmath>
-#include <limits>
-
-#include "jsonb_document.h"
-#include "jsonb_error.h"
-#include "jsonb_writer.h"
-#include "string_parser.hpp"
-
-namespace doris {
-
-const char* const kJsonDelim = " ,]}\t\r\n";
-const char* const kWhiteSpace = " \t\n\r";
-
-/*
- * Template JsonbParserT
- */
-template <class OS_TYPE>
-class JsonbParserT {
-public:
- JsonbParserT() : stream_pos_(0), err_(JsonbErrType::E_NONE) {}
-
- explicit JsonbParserT(OS_TYPE& os) : writer_(os), stream_pos_(0),
err_(JsonbErrType::E_NONE) {}
-
- // parse a UTF-8 JSON string
- bool parse(const std::string& str, hDictInsert handler = nullptr) {
- return parse(str.c_str(), str.size(), handler);
- }
-
- // parse a UTF-8 JSON c-style string (NULL terminated)
- bool parse(const char* c_str, hDictInsert handler = nullptr) {
- return parse(c_str, strlen(c_str), handler);
- }
-
- // parse a UTF-8 JSON string with length
- bool parse(const char* pch, size_t len, hDictInsert handler = nullptr) {
- if (!pch || len == 0) {
- err_ = JsonbErrType::E_EMPTY_DOCUMENT;
- return false;
- }
-
- JsonbInBuffer sb(pch, len);
- std::istream in(&sb);
- return parse(in, handler);
- }
-
- // parse UTF-8 JSON text from an input stream
- bool parse(std::istream& in, hDictInsert handler = nullptr) {
- bool res = false;
- err_ = JsonbErrType::E_NONE;
- stream_pos_ = 0;
-
- // reset output stream
- writer_.reset();
-
- trim(in);
-
- // TODO(wzy): parsePrimitive should be implemented
- if (in.peek() == '{') {
- skipChar(in);
- res = parseObject(in, handler);
- } else if (in.peek() == '[') {
- skipChar(in);
- res = parseArray(in, handler);
- } else {
- res = parsePrimitive(in, handler);
- if (!res) err_ = handle_parse_failure(in);
- }
-
- trim(in);
- if (res && !in.eof()) {
- err_ = JsonbErrType::E_INVALID_DOCU;
- return false;
- }
-
- return res;
- }
-
- JsonbWriterT<OS_TYPE>& getWriter() { return writer_; }
-
- JsonbErrType getErrorCode() { return err_; }
-
- JsonbErrInfo getErrorInfo() {
- assert(err_ < JsonbErrType::E_NUM_ERRORS);
-
- JsonbErrInfo err_info;
-
- // stream_pos_ always points to the next char, so err_pos is 1-based
- err_info.err_pos = stream_pos_;
- err_info.err_msg = JsonbErrMsg::getErrMsg(err_);
-
- return err_info;
- }
-
- // clear error code
- void clearErr() { err_ = JsonbErrType::E_NONE; }
-
-private:
- JsonbErrType handle_parse_value_failure(bool parse_res, std::istream& in) {
- if (parse_res) {
- trim(in);
- if (!in.good()) {
- return JsonbErrType::E_INVALID_DOCU_COMPAT;
- }
- }
- return JsonbErrType::E_INVALID_DOCU;
- ;
- }
-
- // In case json is determined to be invalid at top level,
- // try to parse literal values.
- // We return a different error code E_INVALID_DOCU_COMPAT
- // in case the input json contains these values.
- // Returning a different error code will cause an
- // auditing on the caller.
- // This is mainly done because 8.0 JSON_VALID considers
- // this as a valid input.
- JsonbErrType handle_parse_failure(std::istream& in) {
- JsonbErrType error = JsonbErrType::E_INVALID_DOCU;
- if (!writer_.writeStartArray()) {
- return error;
- }
-
- switch (in.peek()) {
- case 'n':
- skipChar(in);
- error = handle_parse_value_failure(parseNull(in), in);
- break;
- case 't':
- skipChar(in);
- error = handle_parse_value_failure(parseTrue(in), in);
- break;
- case 'f':
- skipChar(in);
- error = handle_parse_value_failure(parseFalse(in), in);
- break;
- case '"':
- skipChar(in);
- error = handle_parse_value_failure(parseString(in), in);
- break;
- default:
- if (parseNumber(in)) {
- trim(in);
- if (in.eof()) {
- error = JsonbErrType::E_INVALID_DOCU_COMPAT;
- }
- }
- }
- if (!writer_.writeEndArray()) {
- return error;
- }
-
- return error;
- }
-
- // parse primitive
- bool parsePrimitive(std::istream& in, hDictInsert handler) {
- bool res = false;
- switch (in.peek()) {
- case 'n':
- skipChar(in);
- res = parseNull(in);
- break;
- case 't':
- skipChar(in);
- res = parseTrue(in);
- break;
- case 'f':
- skipChar(in);
- res = parseFalse(in);
- break;
- case '"':
- skipChar(in);
- res = parseString(in);
- break;
- default:
- res = parseNumber(in);
- }
-
- return res;
- }
-
- // parse a JSON object (comma-separated list of key-value pairs)
- bool parseObject(std::istream& in, hDictInsert handler) {
- if (!writer_.writeStartObject()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
-
- trim(in);
-
- if (in.peek() == '}') {
- skipChar(in);
- // empty object
- if (!writer_.writeEndObject()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- return true;
- }
-
- while (in.good()) {
- if (nextChar(in) != '"') {
- err_ = JsonbErrType::E_INVALID_OBJ;
- return false;
- }
-
- if (!parseKVPair(in, handler)) {
- return false;
- }
-
- trim(in);
-
- char ch = nextChar(in);
- if (ch == '}') {
- // end of the object
- if (!writer_.writeEndObject()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- return true;
- } else if (ch != ',') {
- err_ = JsonbErrType::E_INVALID_OBJ;
- return false;
- }
-
- trim(in);
- }
-
- err_ = JsonbErrType::E_INVALID_OBJ;
- return false;
- }
-
- // parse a JSON array (comma-separated list of values)
- bool parseArray(std::istream& in, hDictInsert handler) {
- if (!writer_.writeStartArray()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
-
- trim(in);
-
- if (in.peek() == ']') {
- skipChar(in);
- // empty array
- if (!writer_.writeEndArray()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- return true;
- }
-
- while (in.good()) {
- if (!parseValue(in, handler)) {
- return false;
- }
-
- trim(in);
-
- char ch = nextChar(in);
- if (ch == ']') {
- // end of the array
- if (!writer_.writeEndArray()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- return true;
- } else if (ch != ',') {
- err_ = JsonbErrType::E_INVALID_ARR;
- return false;
- }
-
- trim(in);
- }
-
- err_ = JsonbErrType::E_INVALID_ARR;
- return false;
- }
-
- // parse a key-value pair, separated by ":"
- bool parseKVPair(std::istream& in, hDictInsert handler) {
- if (parseKey(in, handler) && parseValue(in, handler)) {
- return true;
- }
-
- return false;
- }
-
- // parse a key (must be string)
- bool parseKey(std::istream& in, hDictInsert handler) {
- char key[JsonbKeyValue::sMaxKeyLen];
- int key_len = 0;
- while (in.good() && in.peek() != '"' && key_len <
JsonbKeyValue::sMaxKeyLen) {
- char ch = nextChar(in);
- if (ch == '\\') {
- char escape_buffer[5]; // buffer for escape
- int len;
- if (!parseEscape(in, escape_buffer, len)) {
- err_ = JsonbErrType::E_INVALID_KEY_STRING;
- return false;
- }
- if (key_len + len >= JsonbKeyValue::sMaxKeyLen) {
- err_ = JsonbErrType::E_INVALID_KEY_LENGTH;
- return false;
- }
- memcpy(key + key_len, escape_buffer, len);
- key_len += len;
- } else {
- key[key_len++] = ch;
- }
- }
- // The JSON key can be an empty string.
- if (!in.good() || in.peek() != '"') {
- if (key_len == JsonbKeyValue::sMaxKeyLen)
- err_ = JsonbErrType::E_INVALID_KEY_LENGTH;
- else
- err_ = JsonbErrType::E_INVALID_KEY_STRING;
- return false;
- }
-
- skipChar(in); // discard '"'
-
- int key_id = -1;
- if (handler) {
- key_id = handler(key, key_len);
- }
-
- if (key_id < 0) {
- writer_.writeKey(key, key_len);
- } else {
- writer_.writeKey(key_id);
- }
-
- trim(in);
-
- if (nextChar(in) != ':') {
- err_ = JsonbErrType::E_INVALID_OBJ;
- return false;
- }
-
- trim(in);
- if (!in.good()) {
- err_ = JsonbErrType::E_INVALID_OBJ;
- return false;
- }
-
- return true;
- }
-
- // parse a value
- bool parseValue(std::istream& in, hDictInsert handler) {
- bool res = false;
-
- switch (in.peek()) {
- case 'N':
- case 'n': {
- skipChar(in);
- res = parseNull(in);
- break;
- }
- case 'T':
- case 't': {
- skipChar(in);
- res = parseTrue(in);
- break;
- }
- case 'F':
- case 'f': {
- skipChar(in);
- res = parseFalse(in);
- break;
- }
- case '"': {
- skipChar(in);
- res = parseString(in);
- break;
- }
- case '{': {
- skipChar(in);
- ++nesting_lvl_;
- if (nesting_lvl_ >= MaxNestingLevel) {
- err_ = JsonbErrType::E_NESTING_LVL_OVERFLOW;
- return false;
- }
- res = parseObject(in, handler);
- if (res) {
- --nesting_lvl_;
- }
- break;
- }
- case '[': {
- skipChar(in);
- ++nesting_lvl_;
- if (nesting_lvl_ >= MaxNestingLevel) {
- err_ = JsonbErrType::E_NESTING_LVL_OVERFLOW;
- return false;
- }
- res = parseArray(in, handler);
- if (res) {
- --nesting_lvl_;
- }
- break;
- }
- default: {
- res = parseNumber(in);
- break;
- }
- }
-
- return res;
- }
-
- // parse NULL value
- bool parseNull(std::istream& in) {
- if (tolower(nextChar(in)) == 'u' && tolower(nextChar(in)) == 'l' &&
- tolower(nextChar(in)) == 'l') {
- writer_.writeNull();
- return true;
- }
-
- err_ = JsonbErrType::E_INVALID_SCALAR_VALUE;
- return false;
- }
-
- // parse TRUE value
- bool parseTrue(std::istream& in) {
- if (tolower(nextChar(in)) == 'r' && tolower(nextChar(in)) == 'u' &&
- tolower(nextChar(in)) == 'e') {
- writer_.writeBool(true);
- return true;
- }
-
- err_ = JsonbErrType::E_INVALID_SCALAR_VALUE;
- return false;
- }
-
- // parse FALSE value
- bool parseFalse(std::istream& in) {
- if (tolower(nextChar(in)) == 'a' && tolower(nextChar(in)) == 'l' &&
- tolower(nextChar(in)) == 's' && tolower(nextChar(in)) == 'e') {
- writer_.writeBool(false);
- return true;
- }
-
- err_ = JsonbErrType::E_INVALID_SCALAR_VALUE;
- return false;
- }
-
- /*
- This is a helper function to parse the hex value. hex_num means the
- number of digits needed to be parsed. If less than zero, then it will
- consider all the characters between current and any character in JsonDelim.
- */
- unsigned parseHexHelper(std::istream& in, uint64_t& val, unsigned hex_num
= 17) {
- // We can't read more than 17 digits, so when read 17 digits, it's
overflow
- val = 0;
- unsigned num_digits = 0;
- char ch = tolower(in.peek());
- while (in.good() && !strchr(kJsonDelim, ch) && num_digits != hex_num) {
- if (ch >= '0' && ch <= '9') {
- val = (val << 4) + (ch - '0');
- } else if (ch >= 'a' && ch <= 'f') {
- val = (val << 4) + (ch - 'a' + 10);
- } else {
- // unrecognized hex digit
- return 0;
- }
- skipChar(in);
- ch = tolower(in.peek());
- ++num_digits;
- }
- return num_digits;
- }
-
- // parse HEX value
- bool parseHex4(std::istream& in, unsigned& h) {
- uint64_t val;
- if (4 == parseHexHelper(in, val, 4)) {
- h = (unsigned)val;
- return true;
- }
- return false;
- }
-
- /*
- parse Escape char.
- */
- bool parseEscape(std::istream& in, char* out, int& len) {
- /*
- This is extracted from cJSON implementation.
- This is about the mask of the first byte in UTF-8.
- The mask is defined in:
- http://en.wikipedia.org/wiki/UTF-8#Description
- */
- const unsigned char firstByteMark[6] = {0x00, 0xC0, 0xE0, 0xF0, 0xF8,
0xFC};
- if (!in.good()) {
- return false;
- }
- char c = nextChar(in);
- len = 1;
- switch (c) {
- // \" \\ \/ \b \f \n \r \t
- case '"':
- *out = '"';
- return true;
- case '\\':
- *out = '\\';
- return true;
- case '/':
- *out = '/';
- return true;
- case 'b':
- *out = '\b';
- return true;
- case 'f':
- *out = '\f';
- return true;
- case 'n':
- *out = '\n';
- return true;
- case 'r':
- *out = '\r';
- return true;
- case 't':
- *out = '\t';
- return true;
- case 'u': {
- unsigned uc;
- if (!parseHex4(in, uc)) {
- return false;
- }
- /*
- For DC00 to DFFF, it should be low surrogates for UTF16.
- So if it display in the high bits, it's invalid.
- */
- if (uc >= 0xDC00 && uc <= 0xDFFF) {
- return false;
- }
-
- /*
- For D800 to DBFF, it's the high surrogates for UTF16.
- So it's utf-16, there must be another one between 0xDC00
- and 0xDFFF.
- */
- if (uc >= 0xD800 && uc <= 0xDBFF) {
- unsigned uc2;
-
- if (!in.good()) {
- return false;
- }
- c = nextChar(in);
- if (c != '\\') {
- return false;
- }
-
- if (!in.good()) {
- return false;
- }
- c = nextChar(in);
- if (c != 'u') {
- return false;
- }
-
- if (!parseHex4(in, uc2)) {
- return false;
- }
- /*
- Now we need the low surrogates for UTF16. It should be
- within 0xDC00 and 0xDFFF.
- */
- if (uc2 < 0xDC00 || uc2 > 0xDFFF) return false;
- /*
- For the character that not in the Basic Multilingual Plan,
- it's represented as twelve-character, encoding the UTF-16
- surrogate pair.
- UTF16 is between 0x10000 and 0x10FFFF. The high surrogate
- present the high bits and the low surrogate present the
- lower 10 bits.
- For detailed explanation, please refer to:
-
http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
- Then it will be converted to UTF8.
- */
- uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF));
- }
-
- /*
- Get the length of the unicode.
- Please refer to http://en.wikipedia.org/wiki/UTF-8#Description.
- */
- if (uc < 0x80)
- len = 1;
- else if (uc < 0x800)
- len = 2;
- else if (uc < 0x10000)
- len = 3;
- else
- len = 4;
- out += len;
- /*
- Encode it.
- Please refer to http://en.wikipedia.org/wiki/UTF-8#Description.
- This part of code has a reference to cJSON.
- */
- switch (len) {
- case 4:
- *--out = ((uc | 0x80) & 0xBF);
- uc >>= 6;
- [[fallthrough]];
- case 3:
- *--out = ((uc | 0x80) & 0xBF);
- uc >>= 6;
- [[fallthrough]];
- case 2:
- *--out = ((uc | 0x80) & 0xBF);
- uc >>= 6;
- [[fallthrough]];
- case 1:
- // Mask the first byte according to the standard.
- *--out = (uc | firstByteMark[len - 1]);
- }
- return true;
- break;
- }
- default:
- return false;
- break;
- }
- }
-
- // parse a string
- bool parseString(std::istream& in) {
- const int BUFFER_LEN = 4096;
- if (!writer_.writeStartString()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
-
- // write 4KB at a time
- char buffer[BUFFER_LEN];
- int nread = 0;
- while (in.good()) {
- char ch = nextChar(in);
- if (ch == '"') {
- // write all remaining bytes in the buffer
- if (nread > 0) {
- if (!writer_.writeString(buffer, nread)) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- }
- // end writing string
- if (!writer_.writeEndString()) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- return true;
- } else if (ch == '\\') {
- // this is a escape char
- char escape_buffer[5]; // buffer for escape
- int len;
- if (!parseEscape(in, escape_buffer, len)) {
- err_ = JsonbErrType::E_INVALID_STR;
- return false;
- }
-
- // Write each char to the buffer
- for (int i = 0; i != len; ++i) {
- buffer[nread++] = escape_buffer[i];
- if (nread == BUFFER_LEN) {
- if (!writer_.writeString(buffer, nread)) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- nread = 0;
- }
- }
- } else {
- // just a char
- buffer[nread++] = ch;
- if (nread == BUFFER_LEN) {
- // flush buffer
- if (!writer_.writeString(buffer, nread)) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
- nread = 0;
- }
- }
- }
-
- err_ = JsonbErrType::E_INVALID_STR;
- return false;
- }
-
- // parse a number
- // Number format can be hex, octal, or decimal (including float).
- // Only decimal can have (+/-) sign prefix.
- bool parseNumber(std::istream& in) {
- bool ret = false;
- switch (in.peek()) {
- case '0': {
- skipChar(in);
-
- if (in.peek() == 'x' || in.peek() == 'X') {
- skipChar(in);
- ret = parseHex(in);
- } else if (in.peek() == '.') {
- skipChar(in); // remove '.'
- num_buf_[0] = '.';
- ret = parseDouble(in, num_buf_ + 1);
- } else {
- ret = parseOctal(in);
- }
-
- break;
- }
- case '-': {
- skipChar(in);
- ret = parseDecimal(in, true);
- break;
- }
- case '+':
- skipChar(in);
- // fall through
- default:
- ret = parseDecimal(in);
- break;
- }
-
- return ret;
- }
-
- // parse a number in hex format
- bool parseHex(std::istream& in) {
- uint64_t val = 0;
- int num_digits;
- if (0 == (num_digits = parseHexHelper(in, val))) {
- err_ = JsonbErrType::E_INVALID_HEX;
- return false;
- }
-
- int size = 0;
- if (num_digits <= 2) {
- size = writer_.writeInt8((int8_t)val);
- } else if (num_digits <= 4) {
- size = writer_.writeInt16((int16_t)val);
- } else if (num_digits <= 8) {
- size = writer_.writeInt32((int32_t)val);
- } else if (num_digits <= 16) {
- size = writer_.writeInt64(val);
- } else {
- err_ = JsonbErrType::E_HEX_OVERFLOW;
- return false;
- }
-
- if (size == 0) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
-
- return true;
- }
-
- // parse a number in octal format
- bool parseOctal(std::istream& in) {
- int64_t val = 0;
- char ch = in.peek();
- while (in.good() && !strchr(kJsonDelim, ch)) {
- if (ch >= '0' && ch <= '7') {
- val = val * 8 + (ch - '0');
- } else {
- err_ = JsonbErrType::E_INVALID_OCTAL;
- return false;
- }
-
- // check if the number overflows
- if (val < 0) {
- err_ = JsonbErrType::E_OCTAL_OVERFLOW;
- return false;
- }
-
- skipChar(in);
- ch = in.peek();
- }
-
- int size = 0;
- if (val <= std::numeric_limits<int8_t>::max()) {
- size = writer_.writeInt8((int8_t)val);
- } else if (val <= std::numeric_limits<int16_t>::max()) {
- size = writer_.writeInt16((int16_t)val);
- } else if (val <= std::numeric_limits<int32_t>::max()) {
- size = writer_.writeInt32((int32_t)val);
- } else { // val <= INT64_MAX
- size = writer_.writeInt64(val);
- }
-
- if (size == 0) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
-
- return true;
- }
-
- // parse a number in decimal (including float)
- bool parseDecimal(std::istream& in, bool neg = false) {
- char ch = 0;
- while (in.good() && (ch = in.peek()) == '0') skipChar(in);
-
- char* pbuf = num_buf_;
- if (neg) *(pbuf++) = '-';
-
- char* save_pos = pbuf;
- while (in.good() && !strchr(kJsonDelim, ch)) {
- *(pbuf++) = ch;
- if (pbuf == end_buf_) {
- err_ = JsonbErrType::E_DECIMAL_OVERFLOW;
- return false;
- }
-
- if (ch == '.') {
- skipChar(in); // remove '.'
- return parseDouble(in, pbuf);
- } else if (ch == 'E' || ch == 'e') {
- skipChar(in); // remove 'E'
- return parseExponent(in, pbuf);
- } else if (ch < '0' || ch > '9') {
- err_ = JsonbErrType::E_INVALID_DECIMAL;
- return false;
- }
-
- skipChar(in);
- ch = in.peek();
- }
- if (save_pos == pbuf) {
- err_ = JsonbErrType::E_INVALID_DECIMAL; // empty input
- return false;
- }
-
- *pbuf = 0; // set null-terminator
- StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
- int128_t val =
- StringParser::string_to_int<int128_t>(num_buf_, pbuf -
num_buf_, &parse_result);
- if (parse_result != StringParser::PARSE_SUCCESS) {
- VLOG_ROW << "debug string_to_int error for " << num_buf_ << "
val=" << val
- << " parse_result=" << parse_result;
- err_ = JsonbErrType::E_DECIMAL_OVERFLOW;
- return false;
- }
-
- int size = 0;
- if (val >= std::numeric_limits<int8_t>::min() &&
- val <= std::numeric_limits<int8_t>::max()) {
- size = writer_.writeInt8((int8_t)val);
- } else if (val >= std::numeric_limits<int16_t>::min() &&
- val <= std::numeric_limits<int16_t>::max()) {
- size = writer_.writeInt16((int16_t)val);
- } else if (val >= std::numeric_limits<int32_t>::min() &&
- val <= std::numeric_limits<int32_t>::max()) {
- size = writer_.writeInt32((int32_t)val);
- } else if (val >= std::numeric_limits<int64_t>::min() &&
- val <= std::numeric_limits<int64_t>::max()) {
- size = writer_.writeInt64((int64_t)val);
- } else { // INT128
- size = writer_.writeInt128(val);
- }
-
- if (size == 0) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
-
- return true;
- }
-
- // parse IEEE745 double precision
- bool parseDouble(std::istream& in, char* pbuf) {
- char* save_pos = pbuf;
- char ch = in.peek();
- while (in.good() && !strchr(kJsonDelim, ch)) {
- *(pbuf++) = ch;
- if (pbuf == end_buf_) {
- err_ = JsonbErrType::E_DOUBLE_OVERFLOW;
- return false;
- }
-
- if (ch == 'e' || ch == 'E') {
- skipChar(in); // remove 'E'
- return parseExponent(in, pbuf);
- } else if (ch < '0' || ch > '9') {
- err_ = JsonbErrType::E_INVALID_DECIMAL;
- return false;
- }
-
- skipChar(in);
- ch = in.peek();
- }
- if (save_pos == pbuf) {
- err_ = JsonbErrType::E_INVALID_DECIMAL; // empty input
- return false;
- }
-
- *pbuf = 0; // set null-terminator
- return internConvertBufferToDouble(num_buf_, pbuf - num_buf_);
- }
-
- // parse the exponent part of a double number
- bool parseExponent(std::istream& in, char* pbuf) {
- char ch = in.peek();
- if (in.good()) {
- if (ch == '+' || ch == '-') {
- *(pbuf++) = ch;
- if (pbuf == end_buf_) {
- err_ = JsonbErrType::E_DOUBLE_OVERFLOW;
- return false;
- }
- skipChar(in);
- ch = in.peek();
- }
- }
-
- char* save_pos = pbuf;
- while (in.good() && !strchr(kJsonDelim, ch)) {
- *(pbuf++) = ch;
- if (pbuf == end_buf_) {
- err_ = JsonbErrType::E_DOUBLE_OVERFLOW;
- return false;
- }
-
- if (ch < '0' || ch > '9') {
- err_ = JsonbErrType::E_INVALID_EXPONENT;
- return false;
- }
-
- skipChar(in);
- ch = in.peek();
- }
- if (save_pos == pbuf) {
- err_ = JsonbErrType::E_INVALID_EXPONENT; // empty input
- return false;
- }
-
- *pbuf = 0; // set null-terminator
- return internConvertBufferToDouble(num_buf_, pbuf - num_buf_);
- }
-
- // call system function to parse double to string
- bool internConvertBufferToDouble(char* num_buf_, int len) {
- StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
- double val = StringParser::string_to_float<double>(num_buf_, len,
&parse_result);
- if (parse_result != StringParser::PARSE_SUCCESS) {
- VLOG_ROW << "debug string_to_float error for " << num_buf_ << "
val=" << val
- << " parse_result=" << parse_result;
- err_ = JsonbErrType::E_DECIMAL_OVERFLOW;
- return false;
- }
-
- if (writer_.writeDouble(val) == 0) {
- err_ = JsonbErrType::E_OUTPUT_FAIL;
- return false;
- }
-
- return true;
- }
-
- void trim(std::istream& in) {
- while (in.good() && strchr(kWhiteSpace, in.peek())) {
- skipChar(in);
- }
- }
-
- /*
- * Helper functions to keep track of characters read.
- * Do not rely on std::istream's tellg() which may not be implemented.
- */
-
- char nextChar(std::istream& in) {
- ++stream_pos_;
- return in.get();
- }
-
- void skipChar(std::istream& in) {
- ++stream_pos_;
- in.ignore();
- }
-
-private:
- JsonbWriterT<OS_TYPE> writer_;
- uint32_t stream_pos_;
- JsonbErrType err_;
- char num_buf_[512]; // buffer to hold number string
- const char* end_buf_ = num_buf_ + sizeof(num_buf_) - 1;
- uint32_t nesting_lvl_ = 0;
-};
-
-typedef JsonbParserT<JsonbOutStream> JsonbParser;
-
-} // namespace doris
-
-#endif // JSONB_JSONBJSONPARSER_H
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index ae19d5b20df..ab289925593 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -46,6 +46,7 @@
#include "olap/olap_common.h"
#include "runtime/primitive_type.h"
#include "util/defer_op.h"
+#include "util/jsonb_parser_simd.h"
#include "util/simd/bits.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/helpers.h"
@@ -76,12 +77,6 @@
#include "vec/data_types/get_least_supertype.h"
#include "vec/json/path_in_data.h"
-#ifdef __AVX2__
-#include "util/jsonb_parser_simd.h"
-#else
-#include "util/jsonb_parser.h"
-#endif
-
namespace doris::vectorized {
#include "common/compile_check_begin.h"
namespace {
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index 88e8eb779ad..79df9656a9f 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -30,12 +30,7 @@
#include "common/status.h"
#include "exprs/json_functions.h"
#include "runtime/jsonb_value.h"
-
-#ifdef __AVX2__
#include "util/jsonb_parser_simd.h"
-#else
-#include "util/jsonb_parser.h"
-#endif
namespace doris {
namespace vectorized {
#include "common/compile_check_begin.h"
diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp
b/be/src/vec/data_types/serde/data_type_object_serde.cpp
index 758528c2cec..d1e0084ecd8 100644
--- a/be/src/vec/data_types/serde/data_type_object_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp
@@ -24,6 +24,7 @@
#include "common/exception.h"
#include "common/status.h"
+#include "util/jsonb_parser_simd.h"
#include "vec/columns/column.h"
#include "vec/columns/column_object.h"
#include "vec/common/assert_cast.h"
@@ -31,12 +32,6 @@
#include "vec/core/field.h"
#include "vec/core/types.h"
-#ifdef __AVX2__
-#include "util/jsonb_parser_simd.h"
-#else
-#include "util/jsonb_parser.h"
-#endif
-
namespace doris {
namespace vectorized {
diff --git a/be/src/vec/functions/function_json.cpp
b/be/src/vec/functions/function_json.cpp
index 4d27b549996..e41fb26cd7a 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -38,17 +38,12 @@
#include <utility>
#include <vector>
+#include "common/cast_set.h"
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/exception.h"
#include "common/status.h"
#include "exprs/json_functions.h"
-#include "vec/io/io_helper.h"
-#ifdef __AVX2__
#include "util/jsonb_parser_simd.h"
-#else
-#include "util/jsonb_parser.h"
-#endif
-#include "common/cast_set.h"
#include "util/string_parser.hpp"
#include "util/string_util.h"
#include "vec/aggregate_functions/aggregate_function.h"
@@ -70,6 +65,7 @@
#include "vec/functions/function.h"
#include "vec/functions/function_totype.h"
#include "vec/functions/simple_function_factory.h"
+#include "vec/io/io_helper.h"
#include "vec/utils/stringop_substring.h"
#include "vec/utils/template_helpers.hpp"
diff --git a/be/src/vec/functions/function_jsonb.cpp
b/be/src/vec/functions/function_jsonb.cpp
index 37a2095e86c..364072ca05e 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -33,11 +33,7 @@
#include "udf/udf.h"
#include "util/jsonb_document.h"
#include "util/jsonb_error.h"
-#ifdef __AVX2__
#include "util/jsonb_parser_simd.h"
-#else
-#include "util/jsonb_parser.h"
-#endif
#include "util/jsonb_stream.h"
#include "util/jsonb_utils.h"
#include "util/jsonb_writer.h"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]