This is an automated email from the ASF dual-hosted git repository. mgreber pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 2c7757b26a649864d64be0494820ae3678a8663b Author: Alexey Serbin <[email protected]> AuthorDate: Thu Jan 22 23:21:49 2026 -0800 KUDU-3736 add CodegenTest.CodegenRandomSchemas scenario This new test scenario reproduces SIGSEGV crash on RedHat9 with libgcc versions 11.5.0-10 and newer. At least, without KUDU-3736 fix in place, running the scenario with --codegen_test_random_schemas_runtime_sec=-1 crashes every time on x86_64 RHEL9 when the following libgcc package is installed: libgcc-11.5.0-11.el9.x86_64 Change-Id: Ic51e8fec02f74ecc11fa740f05ffeb9a7f41d8d9 Reviewed-on: http://gerrit.cloudera.org:8080/23893 Tested-by: Alexey Serbin <[email protected]> Reviewed-by: Zoltan Martonka <[email protected]> Reviewed-by: Marton Greber <[email protected]> --- src/kudu/codegen/codegen-test.cc | 276 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 270 insertions(+), 6 deletions(-) diff --git a/src/kudu/codegen/codegen-test.cc b/src/kudu/codegen/codegen-test.cc index a1eda8c8b..b6c545540 100644 --- a/src/kudu/codegen/codegen-test.cc +++ b/src/kudu/codegen/codegen-test.cc @@ -16,16 +16,21 @@ // under the License. #include <algorithm> +#include <array> +#include <atomic> #include <cstddef> #include <cstdint> +#include <iterator> +#include <list> #include <memory> #include <ostream> +#include <random> #include <string> #include <thread> #include <vector> // IWYU pragma: no_include "testing/base/public/gunit.h" -#include <gflags/gflags_declare.h> +#include <gflags/gflags.h> #include <glog/logging.h> #include <glog/stl_logging.h> // IWYU pragma: keep #include <gmock/gmock.h> @@ -39,11 +44,14 @@ #include "kudu/common/rowblock.h" #include "kudu/common/rowblock_memory.h" #include "kudu/common/schema.h" +#include "kudu/gutil/integral_types.h" #include "kudu/gutil/ref_counted.h" #include "kudu/gutil/singleton.h" +#include "kudu/gutil/stringprintf.h" #include "kudu/util/countdown_latch.h" #include "kudu/util/logging_test_util.h" #include "kudu/util/memory/arena.h" +#include "kudu/util/monotime.h" #include "kudu/util/random.h" #include "kudu/util/random_util.h" #include "kudu/util/slice.h" @@ -51,13 +59,23 @@ #include "kudu/util/test_macros.h" #include "kudu/util/test_util.h" +using std::array; +using std::atomic; +using std::back_inserter; +using std::list; +using std::sample; using std::string; using std::unique_ptr; using std::thread; using std::vector; +DEFINE_int32(codegen_test_random_schemas_runtime_sec, 60, + "number of seconds to run the CodegenTest.CodegenRandomSchemas " + "scenario; a negative number means 'unlimited'"); + DECLARE_bool(codegen_dump_mc); DECLARE_int32(codegen_cache_capacity); +DECLARE_int32(codegen_queue_capacity); DECLARE_int32(codegen_compiler_manager_pool_max_threads_num); namespace kudu { @@ -129,8 +147,21 @@ class CodegenTest : public KuduTest { } protected: + typedef const void* DefaultValueType; + static const DefaultValueType kI8R; + static const DefaultValueType kI8W; + static const DefaultValueType kI16R; + static const DefaultValueType kI16W; + static const DefaultValueType kI32R; + static const DefaultValueType kI32W; + static const DefaultValueType kI64R; + static const DefaultValueType kI64W; + static const DefaultValueType kStrR; + static const DefaultValueType kStrW; + Schema base_; Schema defaults_; + Random random_; // Compares the projection-for-read and projection-for-write results // of the codegen projection and the non-codegen projection @@ -169,11 +200,8 @@ class CodegenTest : public KuduTest { static const int kNumTestRows = 10; static const size_t kIndirectPerRow = 4 * kRandomStringMaxLength; static const size_t kIndirectPerProjection = kIndirectPerRow * kNumTestRows; - typedef const void* DefaultValueType; - static const DefaultValueType kI32R, kI32W, kStrR, kStrW; codegen::CodeGenerator generator_; - Random random_; unique_ptr<ConstContiguousRow> test_rows_[kNumTestRows]; RowBlockMemory projections_mem_; unique_ptr<Arena> test_rows_arena_; @@ -181,10 +209,16 @@ class CodegenTest : public KuduTest { namespace { +const int8_t kI8RValue = 0xBE; +const int8_t kI8WValue = 0xEB; +const int16_t kI16RValue = 0xA5A5; +const int16_t kI16WValue = 0x5A5A; const int32_t kI32RValue = 0xFFFF0000; const int32_t kI32WValue = 0x0000FFFF; -const Slice kStrRValue = "RRRRR STRING DEFAULT READ"; -const Slice kStrWValue = "WWWWW STRING DEFAULT WRITE"; +const int64_t kI64RValue = 0xF0F0F0F0F0F0F0F0; +const int64_t kI64WValue = 0x0F0F0F0F0F0F0F0F; +const Slice kStrRValue = "RRRRR STRING DEFAULT READ"; +const Slice kStrWValue = "WWWWW STRING DEFAULT WRITE"; // Assumes all rows are selected // Also assumes schemas are the same. @@ -204,8 +238,14 @@ void CheckRowBlocksEqual(const RowBlock* rb1, const RowBlock* rb2, } // anonymous namespace +const CodegenTest::DefaultValueType CodegenTest::kI8R = &kI8RValue; +const CodegenTest::DefaultValueType CodegenTest::kI8W = &kI8WValue; +const CodegenTest::DefaultValueType CodegenTest::kI16R = &kI16RValue; +const CodegenTest::DefaultValueType CodegenTest::kI16W = &kI16WValue; const CodegenTest::DefaultValueType CodegenTest::kI32R = &kI32RValue; const CodegenTest::DefaultValueType CodegenTest::kI32W = &kI32WValue; +const CodegenTest::DefaultValueType CodegenTest::kI64R = &kI64RValue; +const CodegenTest::DefaultValueType CodegenTest::kI64W = &kI64WValue; const CodegenTest::DefaultValueType CodegenTest::kStrR = &kStrRValue; const CodegenTest::DefaultValueType CodegenTest::kStrW = &kStrWValue; @@ -510,4 +550,228 @@ TEST_F(CodegenTest, CodegenEHFrameRace) { } } +// This is a scenario to stress-test the generation of row projections' code. +// The generated code isn't run, but just compiled and put into the codegen +// cache. References to the generated code are being retained not only in the +// cache but also kept around for time intervals of randomized durations. +// The number and the order of columns in schemas and corresponding projections +// are randomized as well. +TEST_F(CodegenTest, CodegenRandomSchemas) { + SKIP_IF_SLOW_NOT_ALLOWED(); + + constexpr const size_t kNumThreads = 2; + constexpr const size_t kNumProjectionsPerSchema = 32; + constexpr const size_t kNumShufflesPerProjection = 16; + + // Create 'library' of columns to build schemas for the test scenario. + const array<ColumnSchema, 31> cs_library{ + ColumnSchema("key", INT64, ColumnSchema::NOT_NULL), + ColumnSchema("c_bool", BOOL, ColumnSchema::NOT_NULL), + ColumnSchema("c_bool_n", BOOL, ColumnSchema::NULLABLE), + ColumnSchema("c_int8", INT8, ColumnSchema::NOT_NULL), + ColumnSchema("c_int8_n", INT8, ColumnSchema::NULLABLE), + ColumnSchema("c_int16", INT16, ColumnSchema::NOT_NULL), + ColumnSchema("c_int16_n", INT16, ColumnSchema::NULLABLE), + ColumnSchema("c_int32", INT32, ColumnSchema::NOT_NULL), + ColumnSchema("c_int32_n", INT32, ColumnSchema::NULLABLE), + ColumnSchema("c_int64", INT64, ColumnSchema::NOT_NULL), + ColumnSchema("c_int64_n", INT64, ColumnSchema::NULLABLE), + ColumnSchema("c_str", STRING, ColumnSchema::NOT_NULL), + ColumnSchema("c_str_n", STRING, ColumnSchema::NULLABLE), + ColumnSchema("c_bin", BINARY, ColumnSchema::NOT_NULL), + ColumnSchema("c_bin_n", BINARY, ColumnSchema::NULLABLE), + ColumnSchemaBuilder() + .name("c_int32_r") + .type(INT32) + .read_default(kI32R), + ColumnSchemaBuilder() + .name("c_int32_nr") + .type(INT32) + .nullable(true) + .read_default(kI32R), + ColumnSchemaBuilder() + .name("c_int32_rw") + .type(INT32) + .read_default(kI32R) + .write_default(kI32W), + ColumnSchemaBuilder() + .name("c_int32_nrw") + .type(INT32) + .nullable(true) + .read_default(kI32R) + .write_default(kI32W), + ColumnSchemaBuilder() + .name("c_int64_r") + .type(INT64) + .read_default(kI64R), + ColumnSchemaBuilder() + .name("c_int64_nr") + .type(INT64) + .nullable(true) + .read_default(kI64R), + ColumnSchemaBuilder() + .name("c_int64_rw") + .type(INT64) + .read_default(kI64R) + .write_default(kI64W), + ColumnSchemaBuilder() + .name("c_int64_nrw") + .type(INT64) + .nullable(true) + .read_default(kI64R) + .write_default(kI64W), + ColumnSchemaBuilder() + .name("c_str_r") + .type(STRING) + .read_default(kStrR), + ColumnSchemaBuilder() + .name("c_str_nr") + .type(STRING) + .nullable(true) + .read_default(kStrR), + ColumnSchemaBuilder() + .name("c_str_rw") + .type(STRING) + .read_default(kStrR) + .write_default(kStrW), + ColumnSchemaBuilder() + .name("c_str_nrw") + .type(STRING) + .nullable(true) + .read_default(kStrR) + .write_default(kStrW), + ColumnSchemaBuilder() + .name("c_bin_r") + .type(BINARY) + .read_default(kStrR), + ColumnSchemaBuilder() + .name("c_bin_nr") + .type(BINARY) + .nullable(true) + .read_default(kStrR), + ColumnSchemaBuilder() + .name("c_bin_rw") + .type(BINARY) + .read_default(kStrR) + .write_default(kStrW), + ColumnSchemaBuilder() + .name("c_bin_nrw") + .type(BINARY) + .nullable(true) + .read_default(kStrR) + .write_default(kStrW), + }; + + // A part of the codegenned projection code should fit into the cache, but + // make sure the elements are purged out of the cache from time to time. + FLAGS_codegen_cache_capacity = + kNumThreads * kNumProjectionsPerSchema * kNumShufflesPerProjection / 5; + + // Make sure there is enough space in the codegen compilation queue to + // accommodate requests from all the running threads. Even if each of them + // waits for all currently running compilations to complete, they might + // race to submit their tasks, so add significant extra margin. + FLAGS_codegen_queue_capacity = 3 * kNumThreads; + + Singleton<CompilationManager>::UnsafeReset(); + CompilationManager* cm = CompilationManager::GetSingleton(); + + atomic<bool> stop = false; + vector<thread> threads; + threads.reserve(kNumThreads); + for (size_t thread_idx = 0; thread_idx < kNumThreads; ++thread_idx) { + threads.emplace_back([&, thread_idx = thread_idx]() { + std::mt19937 gen(SeedRandom()); + list<unique_ptr<CodegenRP>> projectors; + while (!stop) { + // Choose a random number: it's the number of columns in the new schema. + const size_t num_columns = 1 + gen() % cs_library.size(); + VLOG(1) << StringPrintf("thread %2zd: %2zd-column schema", + thread_idx, num_columns); + auto cs_library_first_non_key = cs_library.begin(); + ++cs_library_first_non_key; + vector<ColumnSchema> column_schemas; + column_schemas.reserve(num_columns); + column_schemas.push_back(cs_library[0]); // the 'key' column is always present + sample(cs_library_first_non_key, + cs_library.end(), + back_inserter(column_schemas), + num_columns - 1, + gen); + + // Create a schema with the given number of columns, picking columns + // from the 'column schema library' in random order. + SchemaBuilder sb; + for (const auto& cs : column_schemas) { + CHECK_OK(sb.AddColumn(cs)); + } + const Schema schema = sb.Build(); + + // Generate various projections, using random subsets of columns + // in the schema. + for (size_t iter = 0; iter < kNumProjectionsPerSchema && !stop; ++iter) { + const size_t proj_col_num = 1 + gen() % column_schemas.size(); + VLOG(2) << StringPrintf("thread %2zd: %2zd-column projection", + thread_idx, proj_col_num); + vector<ColumnId> col_ids; + col_ids.reserve(proj_col_num); + + const auto& all_col_ids = schema.column_ids(); + sample(all_col_ids.begin(), + all_col_ids.end(), + back_inserter(col_ids), + proj_col_num, + gen); + for (size_t s_idx = 0; s_idx < kNumShufflesPerProjection; ++s_idx) { + Schema projection; + // Shuffle the contents since std::sample is stable with the given + // vector's forward iterator. + if (s_idx != 0) { + std::shuffle(col_ids.begin(), col_ids.end(), gen); + } + + // Create a projection with columns at the specified indices. + CHECK_OK(schema.CreateProjectionByIdsIgnoreMissing(col_ids, &projection)); + + // Request code generation. + unique_ptr<CodegenRP> projector; + if (cm->RequestRowProjector(&schema, &projection, &projector)) { + // If that's a codegenned projector, store it to keep a reference. + projectors.push_back(std::move(projector)); + } else { + // Let the codegen compilation complete before going next cycle to + // avoid overflowing the codegen queue. + cm->Wait(); + } + } + + // Randomly purge some of the references to keep at most + // kNumProjectionsPerSchema between cycles in the 'projections' + // container. + size_t count = 0; + while (projectors.size() > kNumProjectionsPerSchema / 2) { + const size_t offset = gen() % projectors.size(); + auto it = projectors.cbegin(); + std::advance(it, offset); + projectors.erase(it); + ++count; + } + VLOG(2) << StringPrintf("thread %2zd: %4zd references dropped", + thread_idx, count); + } + } + }); + } + + // Let them run for at least for the specified time. + if (const int32 runtime_sec = FLAGS_codegen_test_random_schemas_runtime_sec; + runtime_sec >= 0) { + SleepFor(MonoDelta::FromSeconds(runtime_sec)); + stop = true; + } + for (auto& t : threads) { + t.join(); + } +} + } // namespace kudu
