This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 3c41631a053dd70276b7f52ae37ebb5dd0e84eac Author: Alexey Serbin <[email protected]> AuthorDate: Tue Jun 17 18:34:18 2025 -0700 KUDU-1261 introduce Flatbuffers into thirdparty This changelist adds flatbuffers-25.2.10 into the Kudu's 3rd-party. I'm planning to use Flatbuffers for serializing and de-serializing of array cells' data in the RowOperationsPB.indirect_data field in follow-up patches. In the future, we can use it for serdes-ing of arbitrary nested types, but switching to the Arrow IPC format and importing the corresponding code from the Arrow project seems to be the best option in the long run. Using the Arrow IPC format for serdes-ing data of nested type cells' looks like a natural next step once switching to a columnar on-the-wire format for data exchanged between Kudu clients and servers for write operations. At the time of writing, Kudu has columnar on-the-wire format only for scanned data when COLUMNAR_LAYOUT_FEATURE supported both by server and client sides. Using Flatbuffers for serdes-ing nested type cells' data looks like a good option because of multi-language support [1], performance [2], ability to re-use the buffer memory without reallocation and copying, no temporary serdes objects, and a small run-time footprint. It's also inter-operable between versions and platforms [3], and licensed under Apache 2.0 license [4]. After quick research, the choice was between Protobuf, Flatbuffers, and Cap'n Proto [5]. I found few reports of Cap'n Proto serdes performance being very close to Flatbuffer's, and I didn't need Cap'n Proto's RPC and other very cool features, so the choice eventually became simple: Flatbuffers vs Protobuf. To choose between them, I implemented a small benchmark to assess the performance of each in a serdes use case for particular schema (arrays.fbs, arrays.proto). The results of the benchmark show that Flatbuffers' serdes is about 7x-8x times faster than Protobuf if looking at user CPU times, and that's with buffer contents verification enabled: RELEASE build, Ubuntu 24.04, x86_64, GCC/G++ 13 Flatbuffers serialize : ElemNum= 1024 Iterations= 100000 real 0.164s user 0.101s sys 0.053s Protobuf serialize : ElemNum= 1024 Iterations= 100000 real 0.883s user 0.785s sys 0.059s Flatbuffers deserialize: ElemNum= 1024 Iterations= 100000 real 0.136s user 0.092s sys 0.056s Protobuf deserialize: ElemNum= 1024 Iterations= 100000 real 0.825s user 0.707s sys 0.059s Flatbuffers serialize : ElemNum= 1024 Iterations= 500000 real 0.798s user 0.544s sys 0.246s Protobuf serialize : ElemNum= 1024 Iterations= 500000 real 4.437s user 4.190s sys 0.272s Flatbuffers deserialize: ElemNum= 1024 Iterations= 500000 real 0.675s user 0.469s sys 0.260s Protobuf deserialize: ElemNum= 1024 Iterations= 500000 real 4.119s user 3.827s sys 0.262s [1] https://flatbuffers.dev/support/ [2] https://flatbuffers.dev/benchmarks [3] https://flatbuffers.dev/white_paper/ [4] https://github.com/google/flatbuffers/blob/master/LICENSE [5] https://capnproto.org/ Change-Id: I89c697b8d80cbbd2af4233d16806a230cedaa81a Reviewed-on: http://gerrit.cloudera.org:8080/23056 Reviewed-by: Abhishek Chennaka <[email protected]> Tested-by: Alexey Serbin <[email protected]> --- CMakeLists.txt | 16 +- cmake_modules/FindFlatbuffers.cmake | 140 +++++ java/kudu-proto/build.gradle | 4 +- src/kudu/benchmarks/CMakeLists.txt | 26 + src/kudu/benchmarks/serdes/arrays.fbs | 67 +++ src/kudu/benchmarks/serdes/arrays.proto | 87 +++ src/kudu/benchmarks/serdes/serdes-test.cc | 640 +++++++++++++++++++++ thirdparty/build-definitions.sh | 22 + thirdparty/build-thirdparty.sh | 8 + thirdparty/download-thirdparty.sh | 7 + .../flatbuffers-length-to-size-uint8-ptr.patch | 58 ++ thirdparty/vars.sh | 4 + 12 files changed, 1077 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7a63310d..9b93e7351 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1086,6 +1086,13 @@ ADD_THIRDPARTY_LIB(gtest STATIC_LIB "${GTEST_STATIC_LIBRARY}" SHARED_LIB "${GTEST_SHARED_LIBRARY}") +## Flatbuffers +find_package(Flatbuffers REQUIRED) +include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR}) +ADD_THIRDPARTY_LIB(flatbuffers + STATIC_LIB "${FLATBUFFERS_STATIC_LIBRARY}" + SHARED_LIB "${FLATBUFFERS_SHARED_LIBRARY}") + ## Protobuf find_package(Protobuf REQUIRED) include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR}) @@ -1467,12 +1474,19 @@ if (UNIX) add_custom_target(krpc-gen) endif (UNIX) +############################################################ +# "make fb-gen": generate all Flatbuffers source/header files +############################################################ +if (UNIX) + add_custom_target(fb-gen) +endif (UNIX) + ############################################################ # "generated-headers" target ############################################################ if (UNIX) add_custom_target(generated-headers - DEPENDS pb-gen krpc-gen hms_thrift gen_version_info) + DEPENDS pb-gen krpc-gen fb-gen hms_thrift gen_version_info) endif (UNIX) ############################################################ diff --git a/cmake_modules/FindFlatbuffers.cmake b/cmake_modules/FindFlatbuffers.cmake new file mode 100644 index 000000000..022ad5bcd --- /dev/null +++ b/cmake_modules/FindFlatbuffers.cmake @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Find Flatbuffers +# +# This module defines +# FLATBUFFERS_INCLUDE_DIR, directory containing headers +# FLATBUFFERS_SHARED_LIB, path to Flatbuffers shared library +# FLATBUFFERS_STATIC_LIB, path to Flatbuffers static library +# FLATBUFFERS_FLATC_EXECUTABLE - the Flatbuffers compiler (flatc) +# FLATBUFFERS_FOUND, whether flatbuffers has been found + +find_path(FLATBUFFERS_INCLUDE_DIR flatbuffers/flatbuffers.h + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) +find_library(FLATBUFFERS_SHARED_LIBRARY flatbuffers + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) +find_library(FLATBUFFERS_STATIC_LIBRARY libflatbuffers.a + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) + +find_program(FLATBUFFERS_FLATC_EXECUTABLE flatc + DOC "The Google Flatbuffers Compiler" + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Flatbuffers REQUIRED_VARS + FLATBUFFERS_SHARED_LIBRARY + FLATBUFFERS_STATIC_LIBRARY + FLATBUFFERS_INCLUDE_DIR + FLATBUFFERS_FLATC_EXECUTABLE) + +# +# FLATBUFFERS_GENERATE_CPP(FBS_HDRS FBS_TGTS +# [SOURCE_ROOT <root from which source is found>] +# [BINARY_ROOT <root into which binaries are built>] +# FBS_FILES foo.fbs) +# add_executable(bar bar.cc ${FBS_HDRS}) +# target_link_libraries(bar ${FLATBUFFERS_SHARED_LIBRARY}) +# +# ==================================================================== +# +# FLATBUFFERS_GENERATE_CPP (public function) +# HDRS = Variable to define with autogenerated +# header files +# TGTS = Variable to define with autogenerated +# custom targets; if HDRS need to be used in multiple libraries, +# those libraries should depend on these targets +# in order to "serialize" the flatc invocations +# ==================================================================== + +function(FLATBUFFERS_GENERATE_CPP HDRS TGTS) + if(NOT ARGN) + message(SEND_ERROR "Error: FLATBUFFERS_GENERATE_CPP() called without any .fbs files") + return() + endif(NOT ARGN) + + set(options) + set(one_value_args SOURCE_ROOT BINARY_ROOT) + set(multi_value_args FBS_FILES) + cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) + if(ARG_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") + endif() + + set(${HDRS}) + set(${TGTS}) + + if("${ARG_SOURCE_ROOT}" STREQUAL "") + SET(ARG_SOURCE_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") + endif() + GET_FILENAME_COMPONENT(ARG_SOURCE_ROOT ${ARG_SOURCE_ROOT} ABSOLUTE) + + if("${ARG_BINARY_ROOT}" STREQUAL "") + SET(ARG_BINARY_ROOT "${CMAKE_CURRENT_BINARY_DIR}") + endif() + GET_FILENAME_COMPONENT(ARG_BINARY_ROOT ${ARG_BINARY_ROOT} ABSOLUTE) + + foreach(FIL ${ARG_FBS_FILES}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + + # Ensure that the Flatbuffers file is within the source root. + FILE(RELATIVE_PATH FBS_REL_TO_ROOT "${ARG_SOURCE_ROOT}" "${ABS_FIL}") + + GET_FILENAME_COMPONENT(REL_DIR "${FBS_REL_TO_ROOT}" PATH) + + if(NOT REL_DIR STREQUAL "") + SET(REL_DIR "${REL_DIR}/") + endif() + + set(FBS_H_OUT "${ARG_BINARY_ROOT}/${REL_DIR}${FIL_WE}.fb.h") + list(APPEND ${HDRS} "${FBS_H_OUT}") + + add_custom_command( + OUTPUT "${FBS_H_OUT}" + COMMAND ${FLATBUFFERS_FLATC_EXECUTABLE} + ARGS + --cpp + --cpp-std c++17 + --filename-suffix .fb + -o ${ARG_BINARY_ROOT}/${REL_DIR} + ${ABS_FIL} + DEPENDS ${ABS_FIL} + COMMENT "Running C++ Flatbuffers compiler on ${FIL}" + VERBATIM) + + # This custom target enforces that there's just one invocation of flatc + # when there are multiple consumers of the generated files. The target name + # must be unique; adding parts of the filename helps ensure this. + string(MAKE_C_IDENTIFIER "${REL_DIR}${FIL}" TGT_NAME) + add_custom_target(${TGT_NAME} + DEPENDS "${FBS_H_OUT}") + list(APPEND ${TGTS} "${TGT_NAME}") + endforeach() + + set_source_files_properties(${${HDRS}} PROPERTIES GENERATED TRUE) + set(${HDRS} ${${HDRS}} PARENT_SCOPE) + set(${TGTS} ${${TGTS}} PARENT_SCOPE) + + # The 'fb-gen' is a high-level target in $KUDU_ROOT/CMakeLists.txt to + # process Flatbuffers definitions and generate corresponding files. + add_dependencies(fb-gen ${${TGTS}}) +endfunction() diff --git a/java/kudu-proto/build.gradle b/java/kudu-proto/build.gradle index 42b9dac7b..98ee30346 100644 --- a/java/kudu-proto/build.gradle +++ b/java/kudu-proto/build.gradle @@ -26,8 +26,10 @@ sourceSets { main { proto { srcDir "${project.rootDir}/../src" - // Excluded any test proto files + // Exclude any test proto files exclude "**/*test*.proto" + // Exclude specific .proto files under src/kudu/benchmarks + exclude "**/benchmarks/serdes/arrays.proto" } } } diff --git a/src/kudu/benchmarks/CMakeLists.txt b/src/kudu/benchmarks/CMakeLists.txt index f33a21dd3..e900e5366 100644 --- a/src/kudu/benchmarks/CMakeLists.txt +++ b/src/kudu/benchmarks/CMakeLists.txt @@ -73,3 +73,29 @@ if(NOT NO_ROCKSDB) rocksdb) endif() ADD_KUDU_TEST(tpch/rpc_line_item_dao-test) + +####################################### +# serdes-test +####################################### +PROTOBUF_GENERATE_CPP( + ARRAYS_PROTO_SRCS ARRAYS_PROTO_HDRS ARRAYS_PROTO_TGTS + SOURCE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../.. + BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../.. + PROTO_FILES serdes/arrays.proto) +ADD_EXPORTABLE_LIBRARY(serdes_arrays + SRCS ${ARRAYS_PROTO_SRCS} + DEPS protobuf + NONLINK_DEPS ${ARRAYS_PROTO_TGTS}) + +FLATBUFFERS_GENERATE_CPP(ARRAYS_FBS_HDRS ARRAYS_FBS_TGTS + SOURCE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../.. + BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../.. + FBS_FILES serdes/arrays.fbs) + +ADD_KUDU_TEST(serdes/serdes-test) +ADD_KUDU_TEST_DEPENDENCIES(serdes/serdes-test ${ARRAYS_FBS_TGTS}) +if(NOT NO_TESTS) + target_link_libraries(serdes-test + flatbuffers + serdes_arrays) +endif() diff --git a/src/kudu/benchmarks/serdes/arrays.fbs b/src/kudu/benchmarks/serdes/arrays.fbs new file mode 100644 index 000000000..a0f8070f0 --- /dev/null +++ b/src/kudu/benchmarks/serdes/arrays.fbs @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This is a pair IDL file for arrays.proto in the same directory. + +namespace arrays.test; + +// These represent one-dimensional arrays of a particular scalar type +// containing an arbitrary number of elements. +table Int8 { values:[int8]; } +table UInt8 { values:[uint8]; } +table Int16 { values:[int16]; } +table UInt16 { values:[uint16]; } +table Int32 { values:[int32]; } +table UInt32 { values:[uint32]; } +table Int64 { values:[int64]; } +table UInt64 { values:[uint64]; } +table Float { values:[float]; } +table Double { values:[double]; } +table String { values:[string]; } +table Binary { values:[ubyte]; } + +// A wrapper to represent one-dimensional arrays of various scalar types +// under the umbrella of a single field. +union ScalarArray { + Int8, + UInt8, + Int16, + UInt16, + Int32, + UInt32, + Int64, + UInt64, + Float, + Double, + String, + Binary +} + +// This is to represent a one-dimensional array of a particular scalar type +// where some of the array's elements might be null/non-valid. The 'data' +// field is a placeholder for the array's elements, and the 'validity' field +// contains information on their validity/non-nullness: 'true' means 'valid', +// 'false' means 'non-valid' (a.k.a. 'null'). +// +// TODO(aserbin): consider storing 1 bit per data element in the 'validity' +// field instead of using whole 'ubyte/uint8' +table Content { + data:ScalarArray; + validity:[bool]; +} + +root_type Content; diff --git a/src/kudu/benchmarks/serdes/arrays.proto b/src/kudu/benchmarks/serdes/arrays.proto new file mode 100644 index 000000000..93076e022 --- /dev/null +++ b/src/kudu/benchmarks/serdes/arrays.proto @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This is a pair IDL file for arrays.fbs in the same directory. + +syntax = "proto2"; +package arrays.test; + +// This message is to represent a one-dimensional array of the same +// scalar type where some of the array's elements might be null/non-valid. +// +// It's mirroring the 'Content' Flatbuffers table from the arrays.fbs file. +message ArrayPB { + message Bool { + repeated bool values = 1; + }; + message Int8 { + repeated bytes values = 1; + }; + message UInt8 { + repeated bytes values = 1; + }; + message Int32 { + repeated int32 values = 1; + }; + message UInt32 { + repeated uint32 values = 1; + }; + message Int64 { + repeated int64 values = 1; + }; + message UInt64 { + repeated uint64 values = 1; + }; + message Float { + repeated float values = 1; + }; + message Double { + repeated double values = 1; + }; + message String { + repeated string values = 1; + }; + message Binary { + repeated bytes values = 1; + }; + + // The 'validity' field contains information on the validity/non-nullness + // of the elements in the 'value' union field below: 'true' means 'valid', + // 'false' means 'non-valid' (a.k.a. 'null'). + // + // TODO(aserbin): consider storing 1 bit per data element in the 'validity' + // field instead of using 'bool' + repeated bool validity = 1; + + // The 'value' union field is the placeholder for the array's elements. For + // the sake of simplicity of the logic that uses serialized/de-serialized + // data, non-valid/null data elements are always present, but their value + // might be arbitrary and should be discarded. + oneof value { + Bool val_bool = 2; + Int8 val_int8 = 3; + UInt8 val_uint8 = 4; + Int32 val_int32 = 5; + UInt32 val_uint32 = 6; + Int64 val_int64 = 7; + UInt64 val_uint64 = 8; + Float val_float = 9; + Double val_double = 10; + String val_string = 11; + Binary val_binary = 12; + } +}; diff --git a/src/kudu/benchmarks/serdes/serdes-test.cc b/src/kudu/benchmarks/serdes/serdes-test.cc new file mode 100644 index 000000000..98422371c --- /dev/null +++ b/src/kudu/benchmarks/serdes/serdes-test.cc @@ -0,0 +1,640 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <limits> +#include <memory> +#include <numeric> +#include <ostream> +#include <random> +#include <string> +#include <string_view> +#include <utility> +#include <vector> + +#include <flatbuffers/buffer.h> +#include <flatbuffers/flatbuffer_builder.h> +#include <flatbuffers/string.h> +#include <flatbuffers/vector.h> +#include <flatbuffers/verifier.h> +#include <glog/logging.h> +#include <gtest/gtest.h> + +#include "kudu/benchmarks/serdes/arrays.fb.h" +#include "kudu/benchmarks/serdes/arrays.pb.h" +#include "kudu/gutil/stringprintf.h" +#include "kudu/gutil/strings/substitute.h" +#include "kudu/util/slice.h" +#include "kudu/util/stopwatch.h" +#include "kudu/util/test_util.h" + +using arrays::test::ArrayPB; +using arrays::test::CreateContentDirect; +using arrays::test::CreateInt32Direct; +using arrays::test::CreateUInt64Direct; +using arrays::test::Content; +using arrays::test::Int16; +using arrays::test::Int32; +using arrays::test::Int64; +using arrays::test::GetContent; +using arrays::test::String; +using arrays::test::ScalarArray; +using arrays::test::UInt64; +using arrays::test::VerifyContentBuffer; +using flatbuffers::FlatBufferBuilder; +using flatbuffers::Verifier; +using std::iota; +using std::string; +using std::string_view; +using std::unique_ptr; +using std::vector; +using strings::Substitute; + +namespace kudu { + +// Various serialization/de-serialization test scenarios using Flatbuffers +// and Protobuf for one-dimensional arrays based on data structures defined +// in the 'arrays.fbs' and 'arrays.proto' IDL files. For Flatbuffers-specific +// C++ bindings and reference, see https://flatbuffers.dev/languages/cpp/ +class SerDesTest : public KuduTest { +}; + +TEST_F(SerDesTest, FlatbuffersBasic) { + constexpr const size_t kBufSize = 1024; + const vector<int32_t> values_src{ 1, 2, 3, 4, 5 }; + const vector<uint8_t> validity_src{ 0b00010111 }; + + // Add the data using FlatBufferBuilder. + FlatBufferBuilder builder(kBufSize); + { + auto values = CreateInt32Direct(builder, &values_src); + builder.Finish(CreateContentDirect(builder, + ScalarArray::Int32, + values.Union(), + &validity_src)); + } + + // Extract the data from the buffer. The instance of FlatBufferBuilder + // needs to stay alive since it owns the buffer. Alternatively, it's possible + // to use FlatBufferBuilder::Release() to get DetachedBuffer, passing the + // ownership to the instance of DetachedBuffer. + const uint8_t* buf = builder.GetBufferPointer(); + { + const Content* content = GetContent(buf); + ASSERT_NE(nullptr, content); + + Verifier verifier(buf, kBufSize); + ASSERT_TRUE(VerifyContentBuffer(verifier)); + + const auto array_type = content->data_type(); + ASSERT_EQ(ScalarArray::Int32, array_type); + + const auto* values = content->data_as<Int32>()->values(); + ASSERT_NE(nullptr, values); + ASSERT_EQ(5, values->size()); + for (size_t i = 0; i < values->size(); ++i) { + ASSERT_EQ(i + 1, values->Get(i)); + } + + const auto* validity = content->validity(); + ASSERT_EQ(validity_src.size(), validity->size()); + ASSERT_EQ(0, memcmp(validity_src.data(), validity->data(), validity_src.size())); + + // Verify raw data access. + const int32_t* data_src = values_src.data(); + const int32_t* data = values->data(); + for (size_t i = 0; i < values_src.size(); ++i) { + SCOPED_TRACE(Substitute("array index: $0", i)); + ASSERT_TRUE(*data_src++ == *data++); + } + } +} + +TEST_F(SerDesTest, FlatbuffersPlainSrcBuffer) { + constexpr const size_t kBufSize = 1024; + const vector<int16_t> values_src{ 1, 2, 3, 4, 5, 6, 7 }; + const vector<uint8_t> validity_src{ 0b01010111 }; + + FlatBufferBuilder builder(kBufSize); + { + // Create helper objects accessing the source data via raw pointer, + // relying on the C-style array memory layout. + auto values_vec = builder.CreateVector(values_src.data(), values_src.size()); + auto values = Int16::Traits::Create(builder, values_vec); + builder.Finish(CreateContentDirect(builder, + ScalarArray::Int16, + values.Union(), + &validity_src)); + } + + // Extract data from the buffer and verify it matches the source. + const uint8_t* buf = builder.GetBufferPointer(); + { + Verifier verifier(buf, kBufSize); + ASSERT_TRUE(VerifyContentBuffer(verifier)); + + const Content* content = GetContent(buf); + ASSERT_NE(nullptr, content); + const auto array_type = content->data_type(); + ASSERT_EQ(ScalarArray::Int16, array_type); + + const auto* values = content->data_as<Int16>()->values(); + ASSERT_NE(nullptr, values); + ASSERT_EQ(7, values->size()); + for (size_t i = 0; i < values->size(); ++i) { + ASSERT_EQ(i + 1, values->Get(i)); + } + + const auto* validity = content->validity(); + ASSERT_EQ(validity_src.size(), validity->size()); + ASSERT_EQ(0, memcmp(validity_src.data(), validity->data(), validity_src.size())); + + // Verify raw data access. + const int16_t* data_src = values_src.data(); + const int16_t* data = values->data(); + for (size_t i = 0; i < 7; ++i) { + SCOPED_TRACE(Substitute("array index: $0", i)); + ASSERT_TRUE(*data_src++ == *data++); + } + } +} + +TEST_F(SerDesTest, FlatbuffersStringSrcVector) { + constexpr const size_t kBufSize = 1024; + const vector<string> values_src{ "", "1", "02", "003", "0004", "00005" }; + const vector<uint8_t> validity_src{ 0b00111110 }; + + FlatBufferBuilder builder(kBufSize); + { + // Create helper objects accessing the source data via raw pointer, + // relying on the C-style array memory layout. + auto values_vec = builder.CreateVectorOfStrings(values_src); + auto values = String::Traits::Create(builder, values_vec); + builder.Finish(CreateContentDirect(builder, + ScalarArray::String, + values.Union(), + &validity_src)); + } + + // Extract data from the buffer and verify it matches the source. + const uint8_t* buf = builder.GetBufferPointer(); + { + Verifier verifier(buf, kBufSize); + ASSERT_TRUE(VerifyContentBuffer(verifier)); + + const Content* content = GetContent(buf); + ASSERT_NE(nullptr, content); + const auto array_type = content->data_type(); + ASSERT_EQ(ScalarArray::String, array_type); + + const auto* values = content->data_as<String>()->values(); + ASSERT_NE(nullptr, values); + ASSERT_EQ(values_src.size(), values->size()); + for (size_t i = 0; i < values->size(); ++i) { + ASSERT_EQ(values_src[i], values->Get(i)->string_view()); + + // Do explicit memory comparison using raw data accessors. + const auto ref_size = values_src[i].size(); + ASSERT_EQ(ref_size, values->Get(i)->size()); + ASSERT_EQ(0, memcmp(values->Get(i)->Data(), values_src[i].data(), ref_size)); + } + + const auto* validity = content->validity(); + ASSERT_EQ(validity_src.size(), validity->size()); + ASSERT_EQ(0, memcmp(validity_src.data(), validity->data(), validity_src.size())); + } +} + +TEST_F(SerDesTest, FlatbuffersStringViewSrcVector) { + constexpr const size_t kBufSize = 1024; + const vector<string_view> values_src{ "1", "02", "003" }; + const vector<uint8_t> validity_src{ 0b00000111 }; + + FlatBufferBuilder builder(kBufSize); + { + // Create helper objects accessing the source data via raw pointer, + // relying on the C-style array memory layout. + auto values_vec = builder.CreateVectorOfStrings(values_src); + auto values = String::Traits::Create(builder, values_vec); + builder.Finish(CreateContentDirect(builder, + ScalarArray::String, + values.Union(), + &validity_src)); + } + + // Extract data from the buffer and verify it matches the source. + const uint8_t* buf = builder.GetBufferPointer(); + { + Verifier verifier(buf, kBufSize); + ASSERT_TRUE(VerifyContentBuffer(verifier)); + + const Content* content = GetContent(buf); + ASSERT_NE(nullptr, content); + const auto array_type = content->data_type(); + ASSERT_EQ(ScalarArray::String, array_type); + + const auto* values = content->data_as<String>()->values(); + ASSERT_NE(nullptr, values); + ASSERT_EQ(values_src.size(), values->size()); + for (size_t i = 0; i < values->size(); ++i) { + ASSERT_EQ(values_src[i], values->Get(i)->string_view()); + + // Do explicit memory comparison using raw data accessors. + const auto ref_size = values_src[i].size(); + ASSERT_EQ(ref_size, values->Get(i)->size()); + ASSERT_EQ(0, memcmp(values->Get(i)->Data(), values_src[i].data(), ref_size)); + } + + const auto* validity = content->validity(); + ASSERT_EQ(validity_src.size(), validity->size()); + ASSERT_EQ(0, memcmp(validity_src.data(), validity->data(), validity_src.size())); + } +} + +TEST_F(SerDesTest, FlatbuffersSliceSrcVector) { + constexpr const size_t kBufSize = 1024; + const vector<Slice> values_src{ "-1", "0", "1", "02", "003", "100000000000" }; + const vector<uint8_t> validity_src{ 0b00101111 }; + + FlatBufferBuilder builder(kBufSize); + { + // Create helper objects accessing the source data via raw pointer, + // relying on the C-style array memory layout. + auto values_vec = builder.CreateVectorOfStrings(values_src); + auto values = String::Traits::Create(builder, values_vec); + builder.Finish(CreateContentDirect(builder, + ScalarArray::String, + values.Union(), + &validity_src)); + } + + // Extract data from the buffer and verify it matches the source. + const uint8_t* buf = builder.GetBufferPointer(); + { + Verifier verifier(buf, kBufSize); + ASSERT_TRUE(VerifyContentBuffer(verifier)); + + const Content* content = GetContent(buf); + ASSERT_NE(nullptr, content); + const auto array_type = content->data_type(); + ASSERT_EQ(ScalarArray::String, array_type); + + const auto* values = content->data_as<String>()->values(); + ASSERT_NE(nullptr, values); + ASSERT_EQ(values_src.size(), values->size()); + for (size_t i = 0; i < values->size(); ++i) { + // Do explicit memory comparison using raw data accessors. + const auto ref_size = values_src[i].size(); + ASSERT_EQ(ref_size, values->Get(i)->size()); + ASSERT_EQ(0, memcmp(values->Get(i)->Data(), values_src[i].data(), ref_size)); + } + + const auto* validity = content->validity(); + ASSERT_EQ(validity_src.size(), validity->size()); + ASSERT_EQ(0, memcmp(validity_src.data(), validity->data(), validity_src.size())); + } +} + +TEST_F(SerDesTest, FlatbuffersBuilderUninitializedVector) { + constexpr const size_t kBufSize = 1024; + const int64_t values_src[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + const vector<uint8_t> validity_src{ 0b00000010, 0b11010111 }; + + FlatBufferBuilder builder(kBufSize); + { + int64_t* buf = nullptr; + auto v = builder.CreateUninitializedVector(11, &buf); + ASSERT_NE(nullptr, buf); + // Copy the data into the allocated, but uninitialized vector. + memcpy(buf, values_src, 11 * sizeof(int64_t)); + auto values = Int64::Traits::Create(builder, v); + builder.Finish(CreateContentDirect(builder, + ScalarArray::Int64, + values.Union(), + &validity_src)); + } + + // Extract data from the buffer and verify it matches the source. + const uint8_t* buf = builder.GetBufferPointer(); + { + Verifier verifier(buf, kBufSize); + ASSERT_TRUE(VerifyContentBuffer(verifier)); + + const Content* content = GetContent(buf); + ASSERT_NE(nullptr, content); + const auto array_type = content->data_type(); + ASSERT_EQ(ScalarArray::Int64, array_type); + + const auto* values = content->data_as<Int64>()->values(); + ASSERT_NE(nullptr, values); + ASSERT_EQ(11, values->size()); + + const auto* validity = content->validity(); + ASSERT_EQ(validity_src.size(), validity->size()); + ASSERT_EQ(0, memcmp(validity_src.data(), validity->data(), 2)); + + // Verify raw data access. + const int64_t* data_src = values_src; + const int64_t* data = values->data(); + ASSERT_EQ(0, memcmp(data, data_src, 11 * sizeof(int64_t))); + } +} + +TEST_F(SerDesTest, FlatbuffersReleaseRaw) { + constexpr const size_t kBufSize = 1024; + const vector<int32_t> values_src{ 1, 2, 3, 4, 5 }; + const vector<uint8_t> validity_src{ 0b00010111 }; + + size_t buf_size = 0; + size_t buf_offset = 0; + unique_ptr<uint8_t[]> buf_data; + + { + // Add the data using FlatBufferBuilder. + FlatBufferBuilder builder(kBufSize); + auto values = CreateInt32Direct(builder, &values_src); + builder.Finish(CreateContentDirect(builder, + ScalarArray::Int32, + values.Union(), + &validity_src)); + buf_data.reset(builder.ReleaseRaw(buf_size, buf_offset)); + } + ASSERT_NE(nullptr, buf_data.get()); + ASSERT_GT(buf_size, 0); + ASSERT_GT(buf_size, buf_offset); + + { + // The serialized data starts at 'buf_offset'. + Verifier verifier(buf_data.get() + buf_offset, buf_size - buf_offset); + ASSERT_TRUE(VerifyContentBuffer(verifier)); + + const Content* content = GetContent(buf_data.get() + buf_offset); + ASSERT_NE(nullptr, content); + const auto array_type = content->data_type(); + ASSERT_EQ(ScalarArray::Int32, array_type); + + const auto* values = content->data_as<Int32>()->values(); + ASSERT_NE(nullptr, values); + ASSERT_EQ(5, values->size()); + for (size_t i = 0; i < values->size(); ++i) { + ASSERT_EQ(i + 1, values->Get(i)); + } + + const auto* validity = content->validity(); + ASSERT_NE(nullptr, validity); + ASSERT_EQ(1, validity->size()); + ASSERT_EQ(validity_src[0], validity->Get(0)); + + // Verify raw data access for the data, element by element. + const int32_t* data_src = values_src.data(); + const int32_t* data = values->data(); + for (size_t i = 0; i < values_src.size(); ++i) { + SCOPED_TRACE(Substitute("array index: $0", i)); + ASSERT_TRUE(*data_src++ == *data++); + } + + // Verify raw data access for the validity bits, 8 bits at a time. + const uint8_t* validity_bits_src = validity_src.data(); + const uint8_t* validity_bits_dst = validity->data(); + for (size_t i = 0; i < validity_src.size(); ++i) { + SCOPED_TRACE(Substitute("validity index: $0", i)); + ASSERT_TRUE(*validity_bits_src++ == *validity_bits_dst++); + } + } +} + +TEST_F(SerDesTest, FlatbuffersBenchmark) { + SKIP_IF_SLOW_NOT_ALLOWED(); + + constexpr const size_t kIterNum = 100000; + constexpr const size_t kElemNum = 1024; + constexpr const size_t kBufSize = kElemNum * (8 + 1 + 1); + static_assert(kElemNum % 8 == 0); + + vector<uint64_t> data_src(kElemNum); + iota(data_src.begin(), data_src.end(), 0); + + vector<uint8_t> validity_src(kElemNum / 8); + iota(validity_src.begin(), validity_src.end(), 0); + + Stopwatch timer_s; + Stopwatch timer_d; + + Verifier::Options opt; + opt.check_nested_flatbuffers = true; + opt.max_depth = 2; + opt.max_tables = 2; + opt.max_size = 2 * kBufSize; + + size_t buf_min_size = std::numeric_limits<size_t>::max(); + size_t buf_max_size = std::numeric_limits<size_t>::min(); + size_t buf_sum_size = 0; + + std::mt19937 gen(SeedRandom()); + for (size_t iter = 0; iter < kIterNum; ++iter) { + uint8_t* buf = nullptr; + std::shuffle(data_src.begin(), data_src.end(), gen); + std::shuffle(validity_src.begin(), validity_src.end(), gen); + + timer_s.resume(); + FlatBufferBuilder builder(kBufSize); + auto values = CreateUInt64Direct(builder, &data_src); + builder.Finish(CreateContentDirect(builder, + ScalarArray::UInt64, + values.Union(), + &validity_src)); + + buf = builder.GetBufferPointer(); + timer_s.stop(); + + ASSERT_NE(nullptr, buf); + + timer_d.resume(); + Verifier verifier(buf, kBufSize, opt); + const auto verification_result = VerifyContentBuffer(verifier); + const Content* content = GetContent(buf); + const auto* result_data = content->data_as<UInt64>()->values(); + timer_d.stop(); + + ASSERT_TRUE(verification_result); + ASSERT_EQ(data_src.size(), result_data->size()); + for (size_t i = 0; i < data_src.size(); ++i) { + ASSERT_EQ(data_src[i], result_data->Get(i)); + } + + // Verify raw data access. + const uint64_t* raw_data_src = data_src.data(); + const uint64_t* raw_data = result_data->data(); + ASSERT_EQ(0, memcmp(raw_data_src, + raw_data, + sizeof(uint64_t) * data_src.size())); + // Collect stats on the serialized data size. + const size_t buf_size = builder.GetSize(); + buf_sum_size += buf_size; + if (buf_size > buf_max_size) { + buf_max_size = buf_size; + } + if (buf_size < buf_min_size) { + buf_min_size = buf_size; + } + } + + const auto& timer_s_desc = StringPrintf( + "ElemNum=%5zd Iterations=%8zd", kElemNum, kIterNum); + LOG(INFO) << Substitute("Flatbuffers serialize : $0 $1", + timer_s_desc, timer_s.elapsed().ToString()); + const auto& timer_d_desc = StringPrintf( + "ElemNum=%5zd Iterations=%8zd", kElemNum, kIterNum); + LOG(INFO) << Substitute("Flatbuffers deserialize: $0 $1", + timer_d_desc, timer_d.elapsed().ToString()); + const auto& buffer_size_desc = StringPrintf( + "ElemNum=%5zd Iterations=%8zd min=%5zd max=%5zd average=%5zd", + kElemNum, kIterNum, buf_min_size, buf_max_size, buf_sum_size / kIterNum); + LOG(INFO) << Substitute("Flatbuffers buffer size: $0", + buffer_size_desc); +} + +TEST_F(SerDesTest, ProtobufBasic) { + const vector<int32_t> values_src{ 1, 2, 3, 4, 5 }; + const vector<bool> validity_src{ true, true, true, false, true }; + + string message_str; + { + ArrayPB message; + message.mutable_validity()->Add(validity_src.begin(), validity_src.end()); + auto* data = message.mutable_val_int32(); + data->mutable_values()->Add(values_src.begin(), values_src.end()); + + ASSERT_TRUE(message.SerializeToString(&message_str)); + } + + { + ArrayPB message; + ASSERT_TRUE(message.ParseFromString(message_str)); + + const auto& validity = message.validity(); + ASSERT_EQ(validity_src.size(), validity.size()); + for (auto i = 0; i < validity.size(); ++i) { + ASSERT_EQ(validity_src[i], validity[i]); + } + + ASSERT_TRUE(message.has_val_int32()); + ASSERT_FALSE(message.has_val_int64()); + const auto& data = message.val_int32(); + ASSERT_EQ(5, data.values_size()); + ASSERT_EQ(values_src.size(), data.values_size()); + + for (auto i = 0; i < data.values_size(); ++i) { + ASSERT_EQ(values_src[i], data.values(i)); + } + } +} + +TEST_F(SerDesTest, ProtobufBenchmark) { + SKIP_IF_SLOW_NOT_ALLOWED(); + + constexpr const size_t kIterNum = 100000; + constexpr const size_t kElemNum = 1024; + constexpr const size_t kDataSeqStart = + std::numeric_limits<uint64_t>::max() - 2 * kElemNum; + + static_assert(kElemNum % 8 == 0); + + vector<uint64_t> data_src(kElemNum); + iota(data_src.begin(), data_src.end(), kDataSeqStart); + + vector<bool> validity_src(kElemNum); + for (size_t i = 0; i < kElemNum; ++i) { + validity_src[i] = (i % 8 != 0); + } + + Stopwatch timer_s; + Stopwatch timer_d; + + size_t buf_min_size = std::numeric_limits<size_t>::max(); + size_t buf_max_size = std::numeric_limits<size_t>::min(); + size_t buf_sum_size = 0; + + std::mt19937 gen(SeedRandom()); + for (auto iter = 0; iter < kIterNum; ++iter) { + std::shuffle(data_src.begin(), data_src.end(), gen); + std::shuffle(validity_src.begin(), validity_src.end(), gen); + string message_str; + { + timer_s.resume(); + ArrayPB message; + auto* validity = message.mutable_validity(); + validity->Add(validity_src.begin(), validity_src.end()); + auto* data = message.mutable_val_uint64(); + data->mutable_values()->Add(data_src.begin(), data_src.end()); + + const auto status = message.SerializeToString(&message_str); + timer_s.stop(); + ASSERT_TRUE(status); + } + + { + timer_d.resume(); + ArrayPB message; + const auto status = message.ParseFromString(message_str); + const auto is_uint64 = message.has_val_uint64(); + const auto& data = message.val_uint64(); + const auto& validity = message.validity(); + timer_d.stop(); + + // Do verification outside of the performance timer section. + ASSERT_TRUE(status); + ASSERT_TRUE(is_uint64); + ASSERT_EQ(data_src.size(), data.values_size()); + ASSERT_EQ(0, memcmp(data_src.data(), + data.values().data(), + data_src.size() * sizeof(uint64_t))); + ASSERT_EQ(validity_src.size(), validity.size()); + for (auto i = 0; i < validity.size(); ++i) { + ASSERT_EQ(validity_src[i], validity[i]); + } + } + // Collect stats on the serialized data size. + const size_t buf_size = message_str.size(); + buf_sum_size += buf_size; + if (buf_size > buf_max_size) { + buf_max_size = buf_size; + } + if (buf_size < buf_min_size) { + buf_min_size = buf_size; + } + } + + const auto& timer_s_desc = StringPrintf( + "ElemNum=%5zd Iterations=%8zd", kElemNum, kIterNum); + LOG(INFO) << Substitute("Protobuf serialize : $0 $1", + timer_s_desc, timer_s.elapsed().ToString()); + const auto& timer_d_desc = StringPrintf( + "ElemNum=%5zd Iterations=%8zd", kElemNum, kIterNum); + LOG(INFO) << Substitute("Protobuf deserialize: $0 $1", + timer_d_desc, timer_d.elapsed().ToString()); + const auto& buffer_size_desc = StringPrintf( + "ElemNum=%5zd Iterations=%8zd min=%5zd max=%5zd average=%5zd", + kElemNum, kIterNum, buf_min_size, buf_max_size, buf_sum_size / kIterNum); + LOG(INFO) << Substitute("Flatbuffers buffer size: $0", + buffer_size_desc); +} + +} // namespace kudu diff --git a/thirdparty/build-definitions.sh b/thirdparty/build-definitions.sh index b0b4307f3..42fea9a20 100644 --- a/thirdparty/build-definitions.sh +++ b/thirdparty/build-definitions.sh @@ -566,6 +566,28 @@ build_gmock_gtest() { rsync -av $GMOCK_SOURCE/googletest/include/ $PREFIX/include/ } +build_flatbuffers() { + FLATBUFFERS_BDIR=$TP_BUILD_DIR/$FLATBUFFERS_NAME$MODE_SUFFIX + mkdir -p $FLATBUFFERS_BDIR + pushd $FLATBUFFERS_BDIR + rm -rf CMakeCache.txt CMakeFiles/ + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_POSITION_INDEPENDENT_CODE=On \ + -DCMAKE_INSTALL_PREFIX=$PREFIX \ + -DCMAKE_CXX_FLAGS="$EXTRA_CXXFLAGS" \ + -DCMAKE_EXE_LINKER_FLAGS="$EXTRA_LDFLAGS $EXTRA_LIBS" \ + -DCMAKE_MODULE_LINKER_FLAGS="$EXTRA_LDFLAGS $EXTRA_LIBS" \ + -DCMAKE_SHARED_LINKER_FLAGS="$EXTRA_LDFLAGS $EXTRA_LIBS" \ + -DFLATBUFFERS_BUILD_SHAREDLIB=On \ + -DFLATBUFFERS_BUILD_TESTS=Off \ + -DFLATBUFFERS_CPP_STD=17 \ + $EXTRA_CMAKE_FLAGS \ + $FLATBUFFERS_SOURCE + ${NINJA:-make} -j$PARALLEL $EXTRA_MAKEFLAGS install + popd +} + build_protobuf() { PROTOBUF_BDIR=$TP_BUILD_DIR/$PROTOBUF_NAME$MODE_SUFFIX mkdir -p $PROTOBUF_BDIR diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 2809fa241..ecd08f256 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -398,6 +398,10 @@ if [ -n "$F_UNINSTRUMENTED" -o -n "$F_GMOCK" ]; then build_gmock_gtest fi +if [ -n "$F_UNINSTRUMENTED" -o -n "$F_FLATBUFFERS" ]; then + build_flatbuffers +fi + if [ -n "$F_UNINSTRUMENTED" -o -n "$F_PROTOBUF" ]; then build_protobuf fi @@ -576,6 +580,10 @@ EXTRA_LDFLAGS="-stdlib=libc++ $EXTRA_LDFLAGS" EXTRA_CFLAGS="-g $EXTRA_CFLAGS" EXTRA_CXXFLAGS="-g $EXTRA_CXXFLAGS" +if [ -n "$F_TSAN" -o -n "$F_FLATBUFFERS" ]; then + build_flatbuffers +fi + if [ -n "$F_TSAN" -o -n "$F_PROTOBUF" ]; then build_protobuf fi diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index cd3e3b78e..3ad9256cf 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -199,6 +199,13 @@ fetch_and_patch \ "patch -p1 < $TP_DIR/patches/gperftools-Replace-namespace-base-with-namespace-tcmalloc.patch" \ "autoreconf -fvi" +FLATBUFFERS_PATCHLEVEL=1 +fetch_and_patch \ + flatbuffers-${FLATBUFFERS_VERSION}.tar.gz \ + $FLATBUFFERS_SOURCE \ + $FLATBUFFERS_PATCHLEVEL \ + "patch -p1 < $TP_DIR/patches/flatbuffers-length-to-size-uint8-ptr.patch" + # NOTE: creating an empty 'third_party/googletest/m4' subdir is a recipe from # the $PROTOBUF_SOURCE/autogen.sh file: # diff --git a/thirdparty/patches/flatbuffers-length-to-size-uint8-ptr.patch b/thirdparty/patches/flatbuffers-length-to-size-uint8-ptr.patch new file mode 100644 index 000000000..1c9d464fc --- /dev/null +++ b/thirdparty/patches/flatbuffers-length-to-size-uint8-ptr.patch @@ -0,0 +1,58 @@ +--- a/include/flatbuffers/flatbuffer_builder.h 2025-06-18 11:24:45 ++++ b/include/flatbuffers/flatbuffer_builder.h 2025-08-22 21:06:24 +@@ -538,6 +538,13 @@ + CalculateOffset<typename OffsetT<String>::offset_type>()); + } + ++ template<template<typename> class OffsetT = Offset> ++ OffsetT<String> CreateString(const uint8_t *str, size_t len) { ++ CreateStringImpl(str, len); ++ return OffsetT<String>( ++ CalculateOffset<typename OffsetT<String>::offset_type>()); ++ } ++ + /// @brief Store a string in the buffer, which is null-terminated. + /// @param[in] str A const char pointer to a C-string to add to the buffer. + /// @return Returns the offset in the buffer where the string starts. +@@ -559,7 +566,7 @@ + /// @return Returns the offset in the buffer where the string starts. + template<template<typename> class OffsetT = Offset> + OffsetT<String> CreateString(const std::string &str) { +- return CreateString<OffsetT>(str.c_str(), str.length()); ++ return CreateString<OffsetT>(str.c_str(), str.size()); + } + + // clang-format off +@@ -591,7 +598,7 @@ + // it. + int &...ExplicitArgumentBarrier, typename T> + OffsetT<String> CreateString(const T &str) { +- return CreateString<OffsetT>(str.data(), str.length()); ++ return CreateString<OffsetT>(str.data(), str.size()); + } + + /// @brief Store a string in the buffer, which can contain any binary data. +@@ -651,7 +658,7 @@ + /// @param[in] str A const reference to a std::string to store in the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset<String> CreateSharedString(const std::string &str) { +- return CreateSharedString(str.c_str(), str.length()); ++ return CreateSharedString(str.c_str(), str.size()); + } + #endif + +@@ -1377,6 +1384,14 @@ + PreAlign<uoffset_t>(len + 1); // Always 0-terminated. + buf_.fill(1); + PushBytes(reinterpret_cast<const uint8_t *>(str), len); ++ PushElement(static_cast<uoffset_t>(len)); ++ } ++ ++ void CreateStringImpl(const uint8_t *str, size_t len) { ++ NotNested(); ++ PreAlign<uoffset_t>(len + 1); // Always 0-terminated. ++ buf_.fill(1); ++ PushBytes(str, len); + PushElement(static_cast<uoffset_t>(len)); + } + diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 3bcf0d7f2..2a4524ec8 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -54,6 +54,10 @@ GPERFTOOLS_VERSION=2.13 GPERFTOOLS_NAME=gperftools-$GPERFTOOLS_VERSION GPERFTOOLS_SOURCE=$TP_SOURCE_DIR/$GPERFTOOLS_NAME +FLATBUFFERS_VERSION=25.2.10 +FLATBUFFERS_NAME=flatbuffers-$FLATBUFFERS_VERSION +FLATBUFFERS_SOURCE=$TP_SOURCE_DIR/$FLATBUFFERS_NAME + PROTOBUF_VERSION=3.21.9 PROTOBUF_NAME=protobuf-$PROTOBUF_VERSION PROTOBUF_SOURCE=$TP_SOURCE_DIR/$PROTOBUF_NAME
