pitrou commented on a change in pull request #10177:
URL: https://github.com/apache/arrow/pull/10177#discussion_r689592888
##########
File path: cpp/src/arrow/type.h
##########
@@ -1317,6 +1317,40 @@ class ARROW_EXPORT DayTimeIntervalType : public
IntervalType {
std::string name() const override { return "day_time_interval"; }
};
+/// \brief Represents a number of days and milliseconds (fraction of day).
Review comment:
Update docstring? (this looks like a paste of the `DayTimeIntervalType`
docstring)
##########
File path: cpp/src/arrow/array/array_primitive.h
##########
@@ -132,4 +136,33 @@ class ARROW_EXPORT DayTimeIntervalArray : public
PrimitiveArray {
const uint8_t* raw_values() const { return raw_values_ + data_->offset *
byte_width(); }
};
+/// \brief Array of Month, Day and nanosecond values.
+class ARROW_EXPORT MonthDayNanoIntervalArray : public PrimitiveArray {
+ public:
+ using TypeClass = MonthDayNanoIntervalType;
+
+ explicit MonthDayNanoIntervalArray(const std::shared_ptr<ArrayData>& data);
+
+ MonthDayNanoIntervalArray(const std::shared_ptr<DataType>& type, int64_t
length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap =
NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t
offset = 0);
+
+ MonthDayNanoIntervalArray(int64_t length, const std::shared_ptr<Buffer>&
data,
+ const std::shared_ptr<Buffer>& null_bitmap =
NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t
offset = 0);
+
+ TypeClass::MonthDayNanos GetValue(int64_t i) const;
+ TypeClass::MonthDayNanos Value(int64_t i) const { return GetValue(i); }
+
+ // For compatibility with Take kernel.
+ TypeClass::MonthDayNanos GetView(int64_t i) const { return GetValue(i); }
+
+ int32_t byte_width() const { return sizeof(TypeClass::MonthDayNanos); }
+ static_assert(sizeof(TypeClass::MonthDayNanos) == 16,
+ "MonthDayNanos should only take 16 bytes");
Review comment:
This is already in `type.h`, no need to repeat it here IMHO.
##########
File path: cpp/src/arrow/testing/random.cc
##########
@@ -867,6 +869,7 @@ std::shared_ptr<Array> RandomArrayGenerator::ArrayOf(const
Field& field, int64_t
// This isn't as flexible as it could be, but the array-of-structs
layout of this
// type means it's not a (useful) composition of other generators
GENERATE_INTEGRAL_CASE_VIEW(Int64Type, DayTimeIntervalType);
+ GENERATE_INTEGRAL_CASE_VIEW(Int64Type, MonthDayNanoIntervalType);
Review comment:
Hmm... it seems you should view a `fixed_size_binary(16)` instead.
Also, can you add some tests in `random_test.cc`?
##########
File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
##########
@@ -803,6 +803,24 @@ TEST_F(TestIndexInKernel, FixedSizeBinary) {
CheckIndexIn(fixed_size_binary(0), R"([])", R"([])", R"([])");
}
+TEST_F(TestIndexInKernel, MonthDayNanoInterval) {
+ auto type = month_day_nano_interval();
+
+ CheckIndexIn(type,
+ /*input=*/R"([[5, -1, 5], null, [4, 5, 6], [5, -1, 5], [1, 2,
3]])",
+ /*value_set=*/R"([null, [4, 5, 6], [5, -1, 5]])",
+ /*expected=*/R"([2, 0, 1, 2, null])",
+ /*skip_nulls=*/false);
+
+ // Duplicates in value_set
+ CheckIndexIn(
+ type,
+ /*input=*/R"([[7, 8, 0], null, [0, 0, 0], [7, 8, 0], [0, 0, 1]])",
+ /*value_set=*/R"([null, null, [0, 0, 0], [0, 0, 0], [7, 8, 0], [7, 8,
0]])",
+ /*expected=*/R"([4, 0, 2, 4, null])",
+ /*skip_nulls=*/false);
+}
+
Review comment:
Interesting! Do you think it's useful to add implementation for other
interval types or not?
##########
File path: cpp/src/arrow/type.h
##########
@@ -1317,6 +1317,40 @@ class ARROW_EXPORT DayTimeIntervalType : public
IntervalType {
std::string name() const override { return "day_time_interval"; }
};
+/// \brief Represents a number of days and milliseconds (fraction of day).
+class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType {
+ public:
+ struct MonthDayNanos {
+ int32_t months;
+ int32_t days;
+ int64_t nanoseconds;
+ bool operator==(MonthDayNanos other) const {
+ return this->months == other.months && this->days == other.days &&
+ this->nanoseconds == other.nanoseconds;
+ }
+ bool operator!=(MonthDayNanos other) const { return !(*this == other); }
+ };
+ using c_type = MonthDayNanos;
+ using PhysicalType = MonthDayNanoIntervalType;
+
+ static_assert(sizeof(MonthDayNanos) == 16,
+ "MonthDayNanos struct assumed to be of size 8 bytes");
Review comment:
Fix message?
##########
File path: cpp/src/arrow/testing/random.h
##########
@@ -454,6 +454,37 @@ std::shared_ptr<arrow::Array> GenerateArray(const Field&
field, int64_t size,
// Assorted functions
//
+inline void rand_day_millis(int64_t N,
+ std::vector<DayTimeIntervalType::DayMilliseconds>*
out) {
+ const int random_seed = 0;
+ std::default_random_engine gen(random_seed);
+ std::uniform_int_distribution<int32_t> d(std::numeric_limits<int32_t>::min(),
+
std::numeric_limits<int32_t>::max());
+ out->resize(N, {});
+ std::generate(out->begin(), out->end(), [&d, &gen] {
+ DayTimeIntervalType::DayMilliseconds tmp;
+ tmp.days = d(gen);
+ tmp.milliseconds = d(gen);
+ return tmp;
+ });
+}
+
+inline void rand_month_day_nanos(
+ int64_t N, std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out) {
+ const int random_seed = 0;
+ std::default_random_engine gen(random_seed);
+ std::uniform_int_distribution<int64_t> d(std::numeric_limits<int64_t>::min(),
+
std::numeric_limits<int64_t>::max());
+ out->resize(N, {});
+ std::generate(out->begin(), out->end(), [&d, &gen] {
+ MonthDayNanoIntervalType::MonthDayNanos tmp;
+ tmp.months = static_cast<int32_t>(d(gen));
+ tmp.days = static_cast<int32_t>(d(gen));
+ tmp.nanoseconds = d(gen);
+ return tmp;
+ });
+}
+
Review comment:
Please don't inline functions which don't need to be inlined. Only put
the declarations here.
(also, the pointer-out signature seems unnecessary)
##########
File path: cpp/src/arrow/array/array_test.cc
##########
@@ -3060,4 +3098,14 @@ TEST(TestSwapEndianArrayData, ExtensionType) {
AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
}
+TEST(TestSwapEndianArrayData, MonthDayNanoInterval) {
+ auto array = ArrayFromJSON(month_day_nano_interval(), R"([[0, 1, 2],
+ [5000, 200,
3000000000]])");
+ auto swap_array =
MakeArray(*::arrow::internal::SwapEndianArrayData(array->data()));
+ EXPECT_TRUE(!swap_array->Equals(array));
+ ASSERT_ARRAYS_EQUAL(
+ *MakeArray(*::arrow::internal::SwapEndianArrayData(swap_array->data())),
*array);
+ ASSERT_OK(swap_array->ValidateFull());
+}
Review comment:
Hmm... it should be pretty easy to check the expected result here.
##########
File path: cpp/src/arrow/array/array_test.cc
##########
@@ -1332,12 +1366,16 @@ TYPED_TEST(TestPrimitiveBuilder,
TestAppendValuesLazyIter) {
}
TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
- DECL_T();
+ typedef typename TestFixture::CType T;
// find type we can safely convert the tested values to and from
- using conversion_type =
- typename std::conditional<std::is_floating_point<T>::value, double,
- typename
std::conditional<std::is_unsigned<T>::value,
- uint64_t,
int64_t>::type>::type;
+ using conversion_type = typename std::conditional<
+ std::is_floating_point<T>::value, double,
+ typename std::conditional<
+ std::is_same<T, DayTimeIntervalType::DayMilliseconds>::value ||
+ std::is_same<T, MonthDayNanoIntervalType::MonthDayNanos>::value,
+ T,
+ typename std::conditional<std::is_unsigned<T>::value, uint64_t,
+ int64_t>::type>::type>::type;
Review comment:
Hmm... can you put something on the various P* classes instead?
##########
File path: cpp/src/arrow/testing/random.h
##########
@@ -454,6 +454,37 @@ std::shared_ptr<arrow::Array> GenerateArray(const Field&
field, int64_t size,
// Assorted functions
//
+inline void rand_day_millis(int64_t N,
+ std::vector<DayTimeIntervalType::DayMilliseconds>*
out) {
+ const int random_seed = 0;
+ std::default_random_engine gen(random_seed);
+ std::uniform_int_distribution<int32_t> d(std::numeric_limits<int32_t>::min(),
+
std::numeric_limits<int32_t>::max());
+ out->resize(N, {});
+ std::generate(out->begin(), out->end(), [&d, &gen] {
+ DayTimeIntervalType::DayMilliseconds tmp;
+ tmp.days = d(gen);
+ tmp.milliseconds = d(gen);
+ return tmp;
+ });
+}
+
+inline void rand_month_day_nanos(
+ int64_t N, std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out) {
+ const int random_seed = 0;
+ std::default_random_engine gen(random_seed);
+ std::uniform_int_distribution<int64_t> d(std::numeric_limits<int64_t>::min(),
+
std::numeric_limits<int64_t>::max());
+ out->resize(N, {});
+ std::generate(out->begin(), out->end(), [&d, &gen] {
+ MonthDayNanoIntervalType::MonthDayNanos tmp;
+ tmp.months = static_cast<int32_t>(d(gen));
+ tmp.days = static_cast<int32_t>(d(gen));
+ tmp.nanoseconds = d(gen);
+ return tmp;
+ });
+}
+
Review comment:
Actually, I'm not even sure where these functions are used?
##########
File path: cpp/src/arrow/ipc/json_simple.cc
##########
@@ -410,6 +410,41 @@ class DayTimeIntervalConverter final
std::shared_ptr<DayTimeIntervalBuilder> builder_;
};
+class MonthDayNanoIntervalConverter final
+ : public ConcreteConverter<MonthDayNanoIntervalConverter> {
+ public:
+ explicit MonthDayNanoIntervalConverter(const std::shared_ptr<DataType>&
type) {
+ this->type_ = type;
+ builder_ =
std::make_shared<MonthDayNanoIntervalBuilder>(default_memory_pool());
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ MonthDayNanoIntervalType::MonthDayNanos value;
+ if (!json_obj.IsArray()) {
+ return JSONTypeError("array", json_obj.GetType());
+ }
+ if (json_obj.Size() != 3) {
+ return Status::Invalid(
+ "month day nanos interval pair must have exactly two elements, had ",
Review comment:
Hmm, I think the error message is slightly off.
Also, I think we need to settle on a spelling of the type name. Apparently
it's "month_day_nano_interval". Let's not vary gratuitously :-)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]