wgtmac commented on code in PR #49615:
URL: https://github.com/apache/arrow/pull/49615#discussion_r3013242986
##########
cpp/src/parquet/arrow/arrow_reader_writer_test.cc:
##########
@@ -2178,6 +2178,45 @@ TEST(TestArrowReadWrite,
ImplicitSecondToMillisecondTimestampCoercion) {
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*tx, *to));
}
+TEST(TestArrowReadWrite, TimestampCoercionOverflow) {
+ using ::arrow::ArrayFromVector;
+ using ::arrow::field;
+ using ::arrow::schema;
+
+ auto t_s = ::arrow::timestamp(TimeUnit::SECOND);
+
+ std::vector<int64_t> overflow_values = {9223372036854776LL};
+ std::vector<bool> is_valid = {true};
+
+ std::shared_ptr<Array> a_s;
+ ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid,
overflow_values, &a_s);
+
+ auto s = schema({field("timestamp", t_s)});
+ auto table = Table::Make(s, {a_s});
+
+ ASSERT_RAISES(Invalid, WriteTable(*table, default_memory_pool(),
Review Comment:
It seems that this check can also be merged to the for loop by checking if
the TimeUint is `TimeUnit::SECOND` and then skips `coerce_timestamps`?
##########
cpp/src/parquet/arrow/arrow_reader_writer_test.cc:
##########
@@ -2178,6 +2178,45 @@ TEST(TestArrowReadWrite,
ImplicitSecondToMillisecondTimestampCoercion) {
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*tx, *to));
}
+TEST(TestArrowReadWrite, TimestampCoercionOverflow) {
+ using ::arrow::ArrayFromVector;
+ using ::arrow::field;
+ using ::arrow::schema;
+
+ auto t_s = ::arrow::timestamp(TimeUnit::SECOND);
+
+ std::vector<int64_t> overflow_values = {9223372036854776LL};
+ std::vector<bool> is_valid = {true};
+
+ std::shared_ptr<Array> a_s;
+ ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid,
overflow_values, &a_s);
+
+ auto s = schema({field("timestamp", t_s)});
+ auto table = Table::Make(s, {a_s});
+
+ ASSERT_RAISES(Invalid, WriteTable(*table, default_memory_pool(),
+ CreateOutputStream(), table->num_rows()));
+
+ for (auto unit : {TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
+ auto coerce_props =
+ ArrowWriterProperties::Builder().coerce_timestamps(unit)->build();
+ ASSERT_RAISES(
+ Invalid,
+ WriteTable(*table, default_memory_pool(), CreateOutputStream(),
+ table->num_rows(), default_writer_properties(),
+ coerce_props));
+ }
+
+ std::vector<int64_t> null_overflow_values = {9223372036854776LL};
+ std::vector<bool> null_valid = {false};
+ std::shared_ptr<Array> a_null;
+ ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, null_valid,
null_overflow_values,
Review Comment:
```suggestion
std::vector<bool> null_valid = {false};
std::shared_ptr<Array> a_null;
ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, null_valid,
overflow_values,
```
##########
cpp/src/parquet/arrow/arrow_reader_writer_test.cc:
##########
@@ -2178,6 +2178,45 @@ TEST(TestArrowReadWrite,
ImplicitSecondToMillisecondTimestampCoercion) {
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*tx, *to));
}
+TEST(TestArrowReadWrite, TimestampCoercionOverflow) {
+ using ::arrow::ArrayFromVector;
+ using ::arrow::field;
+ using ::arrow::schema;
+
+ auto t_s = ::arrow::timestamp(TimeUnit::SECOND);
+
+ std::vector<int64_t> overflow_values = {9223372036854776LL};
+ std::vector<bool> is_valid = {true};
+
+ std::shared_ptr<Array> a_s;
+ ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid,
overflow_values, &a_s);
+
+ auto s = schema({field("timestamp", t_s)});
+ auto table = Table::Make(s, {a_s});
+
+ ASSERT_RAISES(Invalid, WriteTable(*table, default_memory_pool(),
+ CreateOutputStream(), table->num_rows()));
+
+ for (auto unit : {TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
+ auto coerce_props =
+ ArrowWriterProperties::Builder().coerce_timestamps(unit)->build();
+ ASSERT_RAISES(
+ Invalid,
+ WriteTable(*table, default_memory_pool(), CreateOutputStream(),
+ table->num_rows(), default_writer_properties(),
+ coerce_props));
+ }
+
+ std::vector<int64_t> null_overflow_values = {9223372036854776LL};
+ std::vector<bool> null_valid = {false};
+ std::shared_ptr<Array> a_null;
+ ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, null_valid,
null_overflow_values,
Review Comment:
It looks unnecessary to use `null_overflow_values`
##########
cpp/src/parquet/arrow/arrow_reader_writer_test.cc:
##########
@@ -2178,6 +2178,45 @@ TEST(TestArrowReadWrite,
ImplicitSecondToMillisecondTimestampCoercion) {
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*tx, *to));
}
+TEST(TestArrowReadWrite, TimestampCoercionOverflow) {
+ using ::arrow::ArrayFromVector;
+ using ::arrow::field;
+ using ::arrow::schema;
+
+ auto t_s = ::arrow::timestamp(TimeUnit::SECOND);
+
+ std::vector<int64_t> overflow_values = {9223372036854776LL};
+ std::vector<bool> is_valid = {true};
+
+ std::shared_ptr<Array> a_s;
+ ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid,
overflow_values, &a_s);
Review Comment:
```suggestion
auto t_s = ::arrow::timestamp(TimeUnit::SECOND);
std::vector<int64_t> overflow_values = {9223372036854776LL};
std::vector<bool> is_valid = {true};
std::shared_ptr<Array> a_s;
ArrayFromVector<::arrow::TimestampType, int64_t>(t_s, is_valid,
overflow_values, &a_s);
```
Let's remove these blank lines to be consistent with lines after 2210?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]