pitrou commented on a change in pull request #10177:
URL: https://github.com/apache/arrow/pull/10177#discussion_r689592888



##########
File path: cpp/src/arrow/type.h
##########
@@ -1317,6 +1317,40 @@ class ARROW_EXPORT DayTimeIntervalType : public 
IntervalType {
   std::string name() const override { return "day_time_interval"; }
 };
 
+/// \brief Represents a number of days and milliseconds (fraction of day).

Review comment:
       Update docstring? (this looks like a paste of the `DayTimeIntervalType` 
docstring)

##########
File path: cpp/src/arrow/array/array_primitive.h
##########
@@ -132,4 +136,33 @@ class ARROW_EXPORT DayTimeIntervalArray : public 
PrimitiveArray {
   const uint8_t* raw_values() const { return raw_values_ + data_->offset * 
byte_width(); }
 };
 
+/// \brief Array of Month, Day and nanosecond values.
+class ARROW_EXPORT MonthDayNanoIntervalArray : public PrimitiveArray {
+ public:
+  using TypeClass = MonthDayNanoIntervalType;
+
+  explicit MonthDayNanoIntervalArray(const std::shared_ptr<ArrayData>& data);
+
+  MonthDayNanoIntervalArray(const std::shared_ptr<DataType>& type, int64_t 
length,
+                            const std::shared_ptr<Buffer>& data,
+                            const std::shared_ptr<Buffer>& null_bitmap = 
NULLPTR,
+                            int64_t null_count = kUnknownNullCount, int64_t 
offset = 0);
+
+  MonthDayNanoIntervalArray(int64_t length, const std::shared_ptr<Buffer>& 
data,
+                            const std::shared_ptr<Buffer>& null_bitmap = 
NULLPTR,
+                            int64_t null_count = kUnknownNullCount, int64_t 
offset = 0);
+
+  TypeClass::MonthDayNanos GetValue(int64_t i) const;
+  TypeClass::MonthDayNanos Value(int64_t i) const { return GetValue(i); }
+
+  // For compatibility with Take kernel.
+  TypeClass::MonthDayNanos GetView(int64_t i) const { return GetValue(i); }
+
+  int32_t byte_width() const { return sizeof(TypeClass::MonthDayNanos); }
+  static_assert(sizeof(TypeClass::MonthDayNanos) == 16,
+                "MonthDayNanos should only take 16 bytes");

Review comment:
       This is already in `type.h`, no need to repeat it here IMHO.

##########
File path: cpp/src/arrow/testing/random.cc
##########
@@ -867,6 +869,7 @@ std::shared_ptr<Array> RandomArrayGenerator::ArrayOf(const 
Field& field, int64_t
       // This isn't as flexible as it could be, but the array-of-structs 
layout of this
       // type means it's not a (useful) composition of other generators
       GENERATE_INTEGRAL_CASE_VIEW(Int64Type, DayTimeIntervalType);
+      GENERATE_INTEGRAL_CASE_VIEW(Int64Type, MonthDayNanoIntervalType);

Review comment:
       Hmm... it seems you should view a `fixed_size_binary(16)` instead.
   Also, can you add some tests in `random_test.cc`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
##########
@@ -803,6 +803,24 @@ TEST_F(TestIndexInKernel, FixedSizeBinary) {
   CheckIndexIn(fixed_size_binary(0), R"([])", R"([])", R"([])");
 }
 
+TEST_F(TestIndexInKernel, MonthDayNanoInterval) {
+  auto type = month_day_nano_interval();
+
+  CheckIndexIn(type,
+               /*input=*/R"([[5, -1, 5], null, [4, 5, 6], [5, -1, 5], [1, 2, 
3]])",
+               /*value_set=*/R"([null, [4, 5, 6], [5, -1, 5]])",
+               /*expected=*/R"([2, 0, 1, 2, null])",
+               /*skip_nulls=*/false);
+
+  // Duplicates in value_set
+  CheckIndexIn(
+      type,
+      /*input=*/R"([[7, 8, 0], null, [0, 0, 0], [7, 8, 0], [0, 0, 1]])",
+      /*value_set=*/R"([null, null, [0, 0, 0], [0, 0, 0], [7, 8, 0], [7, 8, 
0]])",
+      /*expected=*/R"([4, 0, 2, 4, null])",
+      /*skip_nulls=*/false);
+}
+

Review comment:
       Interesting! Do you think it's useful to add implementation for other 
interval types or not?

##########
File path: cpp/src/arrow/type.h
##########
@@ -1317,6 +1317,40 @@ class ARROW_EXPORT DayTimeIntervalType : public 
IntervalType {
   std::string name() const override { return "day_time_interval"; }
 };
 
+/// \brief Represents a number of days and milliseconds (fraction of day).
+class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType {
+ public:
+  struct MonthDayNanos {
+    int32_t months;
+    int32_t days;
+    int64_t nanoseconds;
+    bool operator==(MonthDayNanos other) const {
+      return this->months == other.months && this->days == other.days &&
+             this->nanoseconds == other.nanoseconds;
+    }
+    bool operator!=(MonthDayNanos other) const { return !(*this == other); }
+  };
+  using c_type = MonthDayNanos;
+  using PhysicalType = MonthDayNanoIntervalType;
+
+  static_assert(sizeof(MonthDayNanos) == 16,
+                "MonthDayNanos  struct assumed to be of size 8 bytes");

Review comment:
       Fix message?

##########
File path: cpp/src/arrow/testing/random.h
##########
@@ -454,6 +454,37 @@ std::shared_ptr<arrow::Array> GenerateArray(const Field& 
field, int64_t size,
 // Assorted functions
 //
 
+inline void rand_day_millis(int64_t N,
+                            std::vector<DayTimeIntervalType::DayMilliseconds>* 
out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  std::uniform_int_distribution<int32_t> d(std::numeric_limits<int32_t>::min(),
+                                           
std::numeric_limits<int32_t>::max());
+  out->resize(N, {});
+  std::generate(out->begin(), out->end(), [&d, &gen] {
+    DayTimeIntervalType::DayMilliseconds tmp;
+    tmp.days = d(gen);
+    tmp.milliseconds = d(gen);
+    return tmp;
+  });
+}
+
+inline void rand_month_day_nanos(
+    int64_t N, std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  std::uniform_int_distribution<int64_t> d(std::numeric_limits<int64_t>::min(),
+                                           
std::numeric_limits<int64_t>::max());
+  out->resize(N, {});
+  std::generate(out->begin(), out->end(), [&d, &gen] {
+    MonthDayNanoIntervalType::MonthDayNanos tmp;
+    tmp.months = static_cast<int32_t>(d(gen));
+    tmp.days = static_cast<int32_t>(d(gen));
+    tmp.nanoseconds = d(gen);
+    return tmp;
+  });
+}
+

Review comment:
       Please don't inline functions which don't need to be inlined. Only put 
the declarations here.
   (also, the pointer-out signature seems unnecessary)

##########
File path: cpp/src/arrow/array/array_test.cc
##########
@@ -3060,4 +3098,14 @@ TEST(TestSwapEndianArrayData, ExtensionType) {
   AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
 }
 
+TEST(TestSwapEndianArrayData, MonthDayNanoInterval) {
+  auto array = ArrayFromJSON(month_day_nano_interval(), R"([[0, 1, 2],
+                                                          [5000, 200, 
3000000000]])");
+  auto swap_array = 
MakeArray(*::arrow::internal::SwapEndianArrayData(array->data()));
+  EXPECT_TRUE(!swap_array->Equals(array));
+  ASSERT_ARRAYS_EQUAL(
+      *MakeArray(*::arrow::internal::SwapEndianArrayData(swap_array->data())), 
*array);
+  ASSERT_OK(swap_array->ValidateFull());
+}

Review comment:
       Hmm... it should be pretty easy to check the expected result here.

##########
File path: cpp/src/arrow/array/array_test.cc
##########
@@ -1332,12 +1366,16 @@ TYPED_TEST(TestPrimitiveBuilder, 
TestAppendValuesLazyIter) {
 }
 
 TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
-  DECL_T();
+  typedef typename TestFixture::CType T;
   // find type we can safely convert the tested values to and from
-  using conversion_type =
-      typename std::conditional<std::is_floating_point<T>::value, double,
-                                typename 
std::conditional<std::is_unsigned<T>::value,
-                                                          uint64_t, 
int64_t>::type>::type;
+  using conversion_type = typename std::conditional<
+      std::is_floating_point<T>::value, double,
+      typename std::conditional<
+          std::is_same<T, DayTimeIntervalType::DayMilliseconds>::value ||
+              std::is_same<T, MonthDayNanoIntervalType::MonthDayNanos>::value,
+          T,
+          typename std::conditional<std::is_unsigned<T>::value, uint64_t,
+                                    int64_t>::type>::type>::type;

Review comment:
       Hmm... can you put something on the various P* classes instead?

##########
File path: cpp/src/arrow/testing/random.h
##########
@@ -454,6 +454,37 @@ std::shared_ptr<arrow::Array> GenerateArray(const Field& 
field, int64_t size,
 // Assorted functions
 //
 
+inline void rand_day_millis(int64_t N,
+                            std::vector<DayTimeIntervalType::DayMilliseconds>* 
out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  std::uniform_int_distribution<int32_t> d(std::numeric_limits<int32_t>::min(),
+                                           
std::numeric_limits<int32_t>::max());
+  out->resize(N, {});
+  std::generate(out->begin(), out->end(), [&d, &gen] {
+    DayTimeIntervalType::DayMilliseconds tmp;
+    tmp.days = d(gen);
+    tmp.milliseconds = d(gen);
+    return tmp;
+  });
+}
+
+inline void rand_month_day_nanos(
+    int64_t N, std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  std::uniform_int_distribution<int64_t> d(std::numeric_limits<int64_t>::min(),
+                                           
std::numeric_limits<int64_t>::max());
+  out->resize(N, {});
+  std::generate(out->begin(), out->end(), [&d, &gen] {
+    MonthDayNanoIntervalType::MonthDayNanos tmp;
+    tmp.months = static_cast<int32_t>(d(gen));
+    tmp.days = static_cast<int32_t>(d(gen));
+    tmp.nanoseconds = d(gen);
+    return tmp;
+  });
+}
+

Review comment:
       Actually, I'm not even sure where these functions are used?

##########
File path: cpp/src/arrow/ipc/json_simple.cc
##########
@@ -410,6 +410,41 @@ class DayTimeIntervalConverter final
   std::shared_ptr<DayTimeIntervalBuilder> builder_;
 };
 
+class MonthDayNanoIntervalConverter final
+    : public ConcreteConverter<MonthDayNanoIntervalConverter> {
+ public:
+  explicit MonthDayNanoIntervalConverter(const std::shared_ptr<DataType>& 
type) {
+    this->type_ = type;
+    builder_ = 
std::make_shared<MonthDayNanoIntervalBuilder>(default_memory_pool());
+  }
+
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return this->AppendNull();
+    }
+    MonthDayNanoIntervalType::MonthDayNanos value;
+    if (!json_obj.IsArray()) {
+      return JSONTypeError("array", json_obj.GetType());
+    }
+    if (json_obj.Size() != 3) {
+      return Status::Invalid(
+          "month day nanos interval pair must have exactly two elements, had ",

Review comment:
       Hmm, I think the error message is slightly off.
   Also, I think we need to settle on a spelling of the type name. Apparently 
it's "month_day_nano_interval". Let's not vary gratuitously :-)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to