This is an automated email from the ASF dual-hosted git repository.
smp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new aa1cabea2e GH-49441: [C++][Gandiva] Add rand_integer function (#49442)
aa1cabea2e is described below
commit aa1cabea2e53a82cc7bb250a39a0c90c08809909
Author: Dmitry Chirkov <[email protected]>
AuthorDate: Fri Mar 6 09:52:31 2026 -0800
GH-49441: [C++][Gandiva] Add rand_integer function (#49442)
### Rationale for this change
Add `rand_integer` function to Gandiva to generate random integers,
complementing the existing `rand`/`random` functions that generate
random doubles. This provides native integer random number generation
and offers a more efficient alternative to `CAST(rand() * range AS
INT)`.
### What changes are included in this PR?
- Add `RandomIntegerGeneratorHolder` class following the existing
`RandomGeneratorHolder` pattern
- Implement three function signatures:
- `rand_integer()` → int32 in range [INT32_MIN, INT32_MAX]
- `rand_integer(int32 range)` → int32 in range [0, range-1]
- `rand_integer(int32 min, int32 max)` → int32 in range [min, max]
inclusive
- Add parameter validation (range > 0, min <= max) at expression
compilation time
- Add 8 unit tests covering all signatures and edge cases
- Use `std::uniform_int_distribution<int32_t>` with Mersenne Twister
engine
### Are these changes tested?
Yes, added 8 unit tests in `random_generator_holder_test.cc`:
- `NoParams` - verifies full int32 range
- `WithRange` - verifies [0, range-1] bounds
- `WithMinMax` - verifies [min, max] inclusive bounds
- `WithNegativeMinMax` - verifies negative range handling
- `InvalidRangeZero` - verifies range=0 is rejected
- `InvalidRangeNegative` - verifies negative range is rejected
- `InvalidMinGreaterThanMax` - verifies min > max is rejected
- `NullRangeDefaultsToOne` - verifies null parameter handling
### Are there any user-facing changes?
Yes, this adds a new `rand_integer` function to Gandiva with three
signatures as described above.
* GitHub Issue: #49441
---
cpp/src/gandiva/function_holder_maker_registry.cc | 1 +
cpp/src/gandiva/function_registry_math_ops.cc | 8 ++
cpp/src/gandiva/gdv_function_stubs.cc | 39 +++++-
cpp/src/gandiva/random_generator_holder.cc | 61 +++++++++
cpp/src/gandiva/random_generator_holder.h | 33 +++++
cpp/src/gandiva/random_generator_holder_test.cc | 159 ++++++++++++++++++++++
cpp/src/gandiva/tests/projector_test.cc | 157 +++++++++++++++++++++
7 files changed, 457 insertions(+), 1 deletion(-)
diff --git a/cpp/src/gandiva/function_holder_maker_registry.cc
b/cpp/src/gandiva/function_holder_maker_registry.cc
index 2d96574896..f45cf2b820 100644
--- a/cpp/src/gandiva/function_holder_maker_registry.cc
+++ b/cpp/src/gandiva/function_holder_maker_registry.cc
@@ -62,6 +62,7 @@ FunctionHolderMakerRegistry::MakerMap
FunctionHolderMakerRegistry::DefaultHolder
{"to_date", HolderMaker<ToDateHolder>},
{"random", HolderMaker<RandomGeneratorHolder>},
{"rand", HolderMaker<RandomGeneratorHolder>},
+ {"rand_integer", HolderMaker<RandomIntegerGeneratorHolder>},
{"regexp_replace", HolderMaker<ReplaceHolder>},
{"regexp_extract", HolderMaker<ExtractHolder>},
{"castintervalday", HolderMaker<IntervalDaysHolder>},
diff --git a/cpp/src/gandiva/function_registry_math_ops.cc
b/cpp/src/gandiva/function_registry_math_ops.cc
index 232c7c5326..3bfcfc180e 100644
--- a/cpp/src/gandiva/function_registry_math_ops.cc
+++ b/cpp/src/gandiva/function_registry_math_ops.cc
@@ -103,6 +103,14 @@ std::vector<NativeFunction> GetMathOpsFunctionRegistry() {
"gdv_fn_random", NativeFunction::kNeedsFunctionHolder),
NativeFunction("random", {"rand"}, DataTypeVector{int32()}, float64(),
kResultNullNever, "gdv_fn_random_with_seed",
+ NativeFunction::kNeedsFunctionHolder),
+ NativeFunction("rand_integer", {}, DataTypeVector{}, int32(),
kResultNullNever,
+ "gdv_fn_rand_integer",
NativeFunction::kNeedsFunctionHolder),
+ NativeFunction("rand_integer", {}, DataTypeVector{int32()}, int32(),
+ kResultNullNever, "gdv_fn_rand_integer_with_range",
+ NativeFunction::kNeedsFunctionHolder),
+ NativeFunction("rand_integer", {}, DataTypeVector{int32(), int32()},
int32(),
+ kResultNullNever, "gdv_fn_rand_integer_with_min_max",
NativeFunction::kNeedsFunctionHolder)};
return math_fn_registry_;
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc
b/cpp/src/gandiva/gdv_function_stubs.cc
index dff15e6fd2..3eda4afadb 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -67,12 +67,33 @@ double gdv_fn_random(int64_t ptr) {
return (*holder)();
}
-double gdv_fn_random_with_seed(int64_t ptr, int32_t seed, bool seed_validity) {
+double gdv_fn_random_with_seed(int64_t ptr, int32_t /*seed*/, bool
/*seed_validity*/) {
gandiva::RandomGeneratorHolder* holder =
reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
return (*holder)();
}
+int32_t gdv_fn_rand_integer(int64_t ptr) {
+ gandiva::RandomIntegerGeneratorHolder* holder =
+ reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
+ return (*holder)();
+}
+
+int32_t gdv_fn_rand_integer_with_range(int64_t ptr, int32_t /*range*/,
+ bool /*range_validity*/) {
+ gandiva::RandomIntegerGeneratorHolder* holder =
+ reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
+ return (*holder)();
+}
+
+int32_t gdv_fn_rand_integer_with_min_max(int64_t ptr, int32_t /*min*/,
+ bool /*min_validity*/, int32_t
/*max*/,
+ bool /*max_validity*/) {
+ gandiva::RandomIntegerGeneratorHolder* holder =
+ reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
+ return (*holder)();
+}
+
bool gdv_fn_in_expr_lookup_int32(int64_t ptr, int32_t value, bool in_validity)
{
if (!in_validity) {
return false;
@@ -864,6 +885,22 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine*
engine) const {
engine->AddGlobalMappingForFunc("gdv_fn_random_with_seed",
types->double_type(), args,
reinterpret_cast<void*>(gdv_fn_random_with_seed));
+ // gdv_fn_rand_integer
+ args = {types->i64_type()};
+ engine->AddGlobalMappingForFunc("gdv_fn_rand_integer", types->i32_type(),
args,
+
reinterpret_cast<void*>(gdv_fn_rand_integer));
+
+ args = {types->i64_type(), types->i32_type(), types->i1_type()};
+ engine->AddGlobalMappingForFunc(
+ "gdv_fn_rand_integer_with_range", types->i32_type(), args,
+ reinterpret_cast<void*>(gdv_fn_rand_integer_with_range));
+
+ args = {types->i64_type(), types->i32_type(), types->i1_type(),
types->i32_type(),
+ types->i1_type()};
+ engine->AddGlobalMappingForFunc(
+ "gdv_fn_rand_integer_with_min_max", types->i32_type(), args,
+ reinterpret_cast<void*>(gdv_fn_rand_integer_with_min_max));
+
// gdv_fn_dec_from_string
args = {
types->i64_type(), // context
diff --git a/cpp/src/gandiva/random_generator_holder.cc
b/cpp/src/gandiva/random_generator_holder.cc
index 8f80c5826d..2729c2875a 100644
--- a/cpp/src/gandiva/random_generator_holder.cc
+++ b/cpp/src/gandiva/random_generator_holder.cc
@@ -16,6 +16,9 @@
// under the License.
#include "gandiva/random_generator_holder.h"
+
+#include <limits>
+
#include "gandiva/node.h"
namespace gandiva {
@@ -40,4 +43,62 @@ Result<std::shared_ptr<RandomGeneratorHolder>>
RandomGeneratorHolder::Make(
return std::shared_ptr<RandomGeneratorHolder>(new RandomGeneratorHolder(
literal->is_null() ? 0 : std::get<int32_t>(literal->holder())));
}
+
+Result<std::shared_ptr<RandomIntegerGeneratorHolder>>
RandomIntegerGeneratorHolder::Make(
+ const FunctionNode& node) {
+ ARROW_RETURN_IF(
+ node.children().size() > 2,
+ Status::Invalid("'rand_integer' function requires at most two
parameters"));
+
+ // No params: full int32 range [INT32_MIN, INT32_MAX]
+ if (node.children().empty()) {
+ return std::shared_ptr<RandomIntegerGeneratorHolder>(
+ new RandomIntegerGeneratorHolder());
+ }
+
+ // One param: range [0, range - 1]
+ if (node.children().size() == 1) {
+ auto literal = dynamic_cast<LiteralNode*>(node.children().at(0).get());
+ ARROW_RETURN_IF(
+ literal == nullptr,
+ Status::Invalid("'rand_integer' function requires a literal as
parameter"));
+ ARROW_RETURN_IF(
+ literal->return_type()->id() != arrow::Type::INT32,
+ Status::Invalid(
+ "'rand_integer' function requires an int32 literal as parameter"));
+
+ // NULL range defaults to INT32_MAX (full positive range)
+ int32_t range = literal->is_null() ? std::numeric_limits<int32_t>::max()
+ : std::get<int32_t>(literal->holder());
+ ARROW_RETURN_IF(range <= 0,
+ Status::Invalid("'rand_integer' function range must be
positive"));
+
+ return std::shared_ptr<RandomIntegerGeneratorHolder>(
+ new RandomIntegerGeneratorHolder(range));
+ }
+
+ // Two params: min, max [min, max] inclusive
+ auto min_literal = dynamic_cast<LiteralNode*>(node.children().at(0).get());
+ auto max_literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+
+ ARROW_RETURN_IF(
+ min_literal == nullptr || max_literal == nullptr,
+ Status::Invalid("'rand_integer' function requires literals as
parameters"));
+ ARROW_RETURN_IF(
+ min_literal->return_type()->id() != arrow::Type::INT32 ||
+ max_literal->return_type()->id() != arrow::Type::INT32,
+ Status::Invalid("'rand_integer' function requires int32 literals as
parameters"));
+
+ // NULL min defaults to 0, NULL max defaults to INT32_MAX
+ int32_t min_val = min_literal->is_null() ? 0 :
std::get<int32_t>(min_literal->holder());
+ int32_t max_val = max_literal->is_null() ?
std::numeric_limits<int32_t>::max()
+ :
std::get<int32_t>(max_literal->holder());
+
+ ARROW_RETURN_IF(min_val > max_val,
+ Status::Invalid("'rand_integer' function min must be <=
max"));
+
+ return std::shared_ptr<RandomIntegerGeneratorHolder>(
+ new RandomIntegerGeneratorHolder(min_val, max_val));
+}
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/random_generator_holder.h
b/cpp/src/gandiva/random_generator_holder.h
index ffab725aa7..752e8d2420 100644
--- a/cpp/src/gandiva/random_generator_holder.h
+++ b/cpp/src/gandiva/random_generator_holder.h
@@ -17,6 +17,7 @@
#pragma once
+#include <limits>
#include <memory>
#include <random>
@@ -53,4 +54,36 @@ class GANDIVA_EXPORT RandomGeneratorHolder : public
FunctionHolder {
std::uniform_real_distribution<> distribution_;
};
+/// Function Holder for 'rand_integer'
+class GANDIVA_EXPORT RandomIntegerGeneratorHolder : public FunctionHolder {
+ public:
+ ~RandomIntegerGeneratorHolder() override = default;
+
+ static Result<std::shared_ptr<RandomIntegerGeneratorHolder>> Make(
+ const FunctionNode& node);
+
+ int32_t operator()() { return distribution_(generator_); }
+
+ private:
+ // Full range: [INT32_MIN, INT32_MAX]
+ RandomIntegerGeneratorHolder()
+ : distribution_(std::numeric_limits<int32_t>::min(),
+ std::numeric_limits<int32_t>::max()) {
+ generator_.seed(::arrow::internal::GetRandomSeed());
+ }
+
+ // Range: [0, range - 1]
+ explicit RandomIntegerGeneratorHolder(int32_t range) : distribution_(0,
range - 1) {
+ generator_.seed(::arrow::internal::GetRandomSeed());
+ }
+
+ // Min/Max: [min, max] inclusive
+ RandomIntegerGeneratorHolder(int32_t min, int32_t max) : distribution_(min,
max) {
+ generator_.seed(::arrow::internal::GetRandomSeed());
+ }
+
+ std::mt19937_64 generator_;
+ std::uniform_int_distribution<int32_t> distribution_;
+};
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/random_generator_holder_test.cc
b/cpp/src/gandiva/random_generator_holder_test.cc
index 77b2750f2e..26677515c2 100644
--- a/cpp/src/gandiva/random_generator_holder_test.cc
+++ b/cpp/src/gandiva/random_generator_holder_test.cc
@@ -17,8 +17,10 @@
#include "gandiva/random_generator_holder.h"
+#include <limits>
#include <memory>
+#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "arrow/testing/gtest_util.h"
@@ -87,4 +89,161 @@ TEST_F(TestRandGenHolder, WithInValidSeed) {
EXPECT_EQ(random_1(), random_2());
}
+// Test that non-literal seed argument is rejected
+TEST_F(TestRandGenHolder, NonLiteralSeedRejected) {
+ auto field_node = std::make_shared<FieldNode>(arrow::field("seed",
arrow::int32()));
+ FunctionNode rand_func = {"rand", {field_node}, arrow::float64()};
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+ ::testing::HasSubstr("requires a literal as
parameter"),
+
RandomGeneratorHolder::Make(rand_func).status());
+}
+
+class TestRandIntGenHolder : public ::testing::Test {
+ public:
+ FunctionNode BuildRandIntFunc() { return {"rand_integer", {},
arrow::int32()}; }
+
+ FunctionNode BuildRandIntWithRangeFunc(int32_t range, bool range_is_null) {
+ auto range_node = std::make_shared<LiteralNode>(arrow::int32(),
LiteralHolder(range),
+ range_is_null);
+ return {"rand_integer", {range_node}, arrow::int32()};
+ }
+
+ FunctionNode BuildRandIntWithMinMaxFunc(int32_t min, bool min_is_null,
int32_t max,
+ bool max_is_null) {
+ auto min_node =
+ std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(min),
min_is_null);
+ auto max_node =
+ std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(max),
max_is_null);
+ return {"rand_integer", {min_node, max_node}, arrow::int32()};
+ }
+};
+
+TEST_F(TestRandIntGenHolder, NoParams) {
+ FunctionNode rand_func = BuildRandIntFunc();
+ EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
+ RandomIntegerGeneratorHolder::Make(rand_func));
+
+ auto& random = *rand_gen_holder;
+ // Generate multiple values and verify they are integers
+ for (int i = 0; i < 10; i++) {
+ int32_t val = random();
+ EXPECT_GE(val, std::numeric_limits<int32_t>::min());
+ EXPECT_LE(val, std::numeric_limits<int32_t>::max());
+ }
+}
+
+TEST_F(TestRandIntGenHolder, WithRange) {
+ FunctionNode rand_func = BuildRandIntWithRangeFunc(100, false);
+ EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
+ RandomIntegerGeneratorHolder::Make(rand_func));
+
+ auto& random = *rand_gen_holder;
+ // Generate multiple values and verify they are in range [0, 99]
+ for (int i = 0; i < 100; i++) {
+ int32_t val = random();
+ EXPECT_GE(val, 0);
+ EXPECT_LT(val, 100);
+ }
+}
+
+TEST_F(TestRandIntGenHolder, WithMinMax) {
+ FunctionNode rand_func = BuildRandIntWithMinMaxFunc(10, false, 20, false);
+ EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
+ RandomIntegerGeneratorHolder::Make(rand_func));
+
+ auto& random = *rand_gen_holder;
+ // Generate multiple values and verify they are in range [10, 20]
+ for (int i = 0; i < 100; i++) {
+ int32_t val = random();
+ EXPECT_GE(val, 10);
+ EXPECT_LE(val, 20);
+ }
+}
+
+TEST_F(TestRandIntGenHolder, WithNegativeMinMax) {
+ FunctionNode rand_func = BuildRandIntWithMinMaxFunc(-50, false, -10, false);
+ EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
+ RandomIntegerGeneratorHolder::Make(rand_func));
+
+ auto& random = *rand_gen_holder;
+ // Generate multiple values and verify they are in range [-50, -10]
+ for (int i = 0; i < 100; i++) {
+ int32_t val = random();
+ EXPECT_GE(val, -50);
+ EXPECT_LE(val, -10);
+ }
+}
+
+TEST_F(TestRandIntGenHolder, InvalidRangeZero) {
+ FunctionNode rand_func = BuildRandIntWithRangeFunc(0, false);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be
positive"),
+
RandomIntegerGeneratorHolder::Make(rand_func).status());
+}
+
+TEST_F(TestRandIntGenHolder, InvalidRangeNegative) {
+ FunctionNode rand_func = BuildRandIntWithRangeFunc(-5, false);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be
positive"),
+
RandomIntegerGeneratorHolder::Make(rand_func).status());
+}
+
+TEST_F(TestRandIntGenHolder, InvalidMinGreaterThanMax) {
+ FunctionNode rand_func = BuildRandIntWithMinMaxFunc(20, false, 10, false);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("min must be
<= max"),
+
RandomIntegerGeneratorHolder::Make(rand_func).status());
+}
+
+TEST_F(TestRandIntGenHolder, NullRangeDefaultsToMaxInt) {
+ FunctionNode rand_func = BuildRandIntWithRangeFunc(0, true); // null range
+ EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
+ RandomIntegerGeneratorHolder::Make(rand_func));
+
+ auto& random = *rand_gen_holder;
+ // With NULL range defaulting to INT32_MAX, values should be in [0,
INT32_MAX-1]
+ for (int i = 0; i < 100; i++) {
+ int32_t val = random();
+ EXPECT_GE(val, 0);
+ EXPECT_LT(val, std::numeric_limits<int32_t>::max());
+ }
+}
+
+// Test that non-literal arguments are rejected
+TEST_F(TestRandIntGenHolder, NonLiteralRangeRejected) {
+ // Create a FieldNode instead of LiteralNode for the range parameter
+ auto field_node = std::make_shared<FieldNode>(arrow::field("range",
arrow::int32()));
+ FunctionNode rand_func = {"rand_integer", {field_node}, arrow::int32()};
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+ ::testing::HasSubstr("requires a literal as
parameter"),
+
RandomIntegerGeneratorHolder::Make(rand_func).status());
+}
+
+TEST_F(TestRandIntGenHolder, NonLiteralMinMaxRejected) {
+ // Create FieldNodes instead of LiteralNodes for min/max parameters
+ auto min_field = std::make_shared<FieldNode>(arrow::field("min",
arrow::int32()));
+ auto max_literal =
+ std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(100), false);
+ FunctionNode rand_func = {"rand_integer", {min_field, max_literal},
arrow::int32()};
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+ ::testing::HasSubstr("requires literals as
parameters"),
+
RandomIntegerGeneratorHolder::Make(rand_func).status());
+}
+
+TEST_F(TestRandIntGenHolder, NullMinMaxDefaults) {
+ // Test null handling for 2-arg form: NULL min defaults to 0, NULL max
defaults to
+ // INT32_MAX
+ FunctionNode rand_func = BuildRandIntWithMinMaxFunc(0, true, 0, true); //
both null
+ EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
+ RandomIntegerGeneratorHolder::Make(rand_func));
+
+ auto& random = *rand_gen_holder;
+ // With NULL min=0, NULL max=INT32_MAX, values should be in [0, INT32_MAX]
+ for (int i = 0; i < 100; i++) {
+ int32_t val = random();
+ EXPECT_GE(val, 0);
+ EXPECT_LE(val, std::numeric_limits<int32_t>::max());
+ }
+}
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc
b/cpp/src/gandiva/tests/projector_test.cc
index dc1ac9dfd2..268cb55a64 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -3678,4 +3678,161 @@ TEST_F(TestProjector,
TestExtendedCFunctionThatNeedsContext) {
EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0));
}
+TEST_F(TestProjector, TestRandomNoArgs) {
+ // Test random() with no arguments - returns double in [0, 1)
+ auto dummy_field = field("dummy", arrow::int32());
+ auto schema = arrow::schema({dummy_field});
+ auto out_field = field("out", arrow::float64());
+
+ auto rand_node = TreeExprBuilder::MakeFunction("random", {},
arrow::float64());
+ auto expr = TreeExprBuilder::MakeExpression(rand_node, out_field);
+
+ std::shared_ptr<Projector> projector;
+ ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(),
&projector));
+
+ int num_records = 100;
+ auto dummy_array = MakeArrowArrayInt32(std::vector<int32_t>(num_records, 0),
+ std::vector<bool>(num_records, true));
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+
+ // Verify all values are in range [0, 1)
+ auto result = std::dynamic_pointer_cast<arrow::DoubleArray>(outs.at(0));
+ EXPECT_EQ(result->length(), num_records);
+ EXPECT_EQ(result->null_count(), 0);
+ for (int i = 0; i < num_records; i++) {
+ double value = result->Value(i);
+ EXPECT_GE(value, 0.0);
+ EXPECT_LT(value, 1.0);
+ }
+}
+
+TEST_F(TestProjector, TestRandomWithSeed) {
+ // Test rand(seed) - with seed literal, returns double in [0, 1)
+ auto dummy_field = field("dummy", arrow::int32());
+ auto schema = arrow::schema({dummy_field});
+ auto out_field = field("out", arrow::float64());
+
+ auto seed_literal =
TreeExprBuilder::MakeLiteral(static_cast<int32_t>(12345));
+ auto rand_node =
+ TreeExprBuilder::MakeFunction("rand", {seed_literal}, arrow::float64());
+ auto expr = TreeExprBuilder::MakeExpression(rand_node, out_field);
+
+ std::shared_ptr<Projector> projector;
+ ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(),
&projector));
+
+ int num_records = 100;
+ auto dummy_array = MakeArrowArrayInt32(std::vector<int32_t>(num_records, 0),
+ std::vector<bool>(num_records, true));
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+
+ // Verify all values are in range [0, 1)
+ auto result = std::dynamic_pointer_cast<arrow::DoubleArray>(outs.at(0));
+ EXPECT_EQ(result->length(), num_records);
+ EXPECT_EQ(result->null_count(), 0);
+ for (int i = 0; i < num_records; i++) {
+ double value = result->Value(i);
+ EXPECT_GE(value, 0.0);
+ EXPECT_LT(value, 1.0);
+ }
+}
+
+TEST_F(TestProjector, TestRandIntegerNoArgs) {
+ // Test rand_integer() with no arguments - full int32 range
+ auto dummy_field = field("dummy", arrow::int32());
+ auto schema = arrow::schema({dummy_field});
+ auto out_field = field("out", arrow::int32());
+
+ auto rand_int_node = TreeExprBuilder::MakeFunction("rand_integer", {},
arrow::int32());
+ auto expr = TreeExprBuilder::MakeExpression(rand_int_node, out_field);
+
+ std::shared_ptr<Projector> projector;
+ ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(),
&projector));
+
+ int num_records = 100;
+ auto dummy_array = MakeArrowArrayInt32(std::vector<int32_t>(num_records, 0),
+ std::vector<bool>(num_records, true));
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+
+ // Verify all values are valid int32 (no specific range check for full range)
+ auto result = std::dynamic_pointer_cast<arrow::Int32Array>(outs.at(0));
+ EXPECT_EQ(result->length(), num_records);
+ EXPECT_EQ(result->null_count(), 0);
+}
+
+TEST_F(TestProjector, TestRandIntegerWithRange) {
+ // Test rand_integer(10) - range [0, 9]
+ auto dummy_field = field("dummy", arrow::int32());
+ auto schema = arrow::schema({dummy_field});
+ auto out_field = field("out", arrow::int32());
+
+ auto range_literal = TreeExprBuilder::MakeLiteral(static_cast<int32_t>(10));
+ auto rand_int_node =
+ TreeExprBuilder::MakeFunction("rand_integer", {range_literal},
arrow::int32());
+ auto expr = TreeExprBuilder::MakeExpression(rand_int_node, out_field);
+
+ std::shared_ptr<Projector> projector;
+ ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(),
&projector));
+
+ int num_records = 100;
+ auto dummy_array = MakeArrowArrayInt32(std::vector<int32_t>(num_records, 0),
+ std::vector<bool>(num_records, true));
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+
+ // Verify all values are in range [0, 9]
+ auto result = std::dynamic_pointer_cast<arrow::Int32Array>(outs.at(0));
+ EXPECT_EQ(result->length(), num_records);
+ EXPECT_EQ(result->null_count(), 0);
+ for (int i = 0; i < num_records; i++) {
+ int32_t value = result->Value(i);
+ EXPECT_GE(value, 0);
+ EXPECT_LT(value, 10);
+ }
+}
+
+TEST_F(TestProjector, TestRandIntegerWithMinMax) {
+ // Test rand_integer(5, 15) - range [5, 15] inclusive
+ auto dummy_field = field("dummy", arrow::int32());
+ auto schema = arrow::schema({dummy_field});
+ auto out_field = field("out", arrow::int32());
+
+ auto min_literal = TreeExprBuilder::MakeLiteral(static_cast<int32_t>(5));
+ auto max_literal = TreeExprBuilder::MakeLiteral(static_cast<int32_t>(15));
+ auto rand_int_node = TreeExprBuilder::MakeFunction(
+ "rand_integer", {min_literal, max_literal}, arrow::int32());
+ auto expr = TreeExprBuilder::MakeExpression(rand_int_node, out_field);
+
+ std::shared_ptr<Projector> projector;
+ ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(),
&projector));
+
+ int num_records = 100;
+ auto dummy_array = MakeArrowArrayInt32(std::vector<int32_t>(num_records, 0),
+ std::vector<bool>(num_records, true));
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+
+ // Verify all values are in range [5, 15] inclusive
+ auto result = std::dynamic_pointer_cast<arrow::Int32Array>(outs.at(0));
+ EXPECT_EQ(result->length(), num_records);
+ EXPECT_EQ(result->null_count(), 0);
+ for (int i = 0; i < num_records; i++) {
+ int32_t value = result->Value(i);
+ EXPECT_GE(value, 5);
+ EXPECT_LE(value, 15);
+ }
+}
+
} // namespace gandiva