pitrou commented on code in PR #13531:
URL: https://github.com/apache/arrow/pull/13531#discussion_r917906321
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
+ table_schema = schema({field("uint8", uint8()), field("uint16", uint16()),
+ field("uint32", uint32()), field("uint64",
uint64())});
+ auto write_options = adapters::orc::WriteOptions();
+#ifdef ARROW_WITH_SNAPPY
+ write_options.compression = Compression::SNAPPY;
+#else
+ write_options.compression = Compression::UNCOMPRESSED;
+#endif
+ write_options.file_version = adapters::orc::FileVersion(0, 11);
+ write_options.compression_block_size = 32768;
+ write_options.row_index_stride = 5000;
+ }
+
+ protected:
+ std::shared_ptr<Schema> table_schema;
+ adapters::orc::WriteOptions write_options;
+};
+TEST_F(TestORCWriterInvalidTypes, noWriteInvalidTypes) {
Review Comment:
The test name is cryptic. How are the types invalid? Why "noWrite"?
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
+ table_schema = schema({field("uint8", uint8()), field("uint16", uint16()),
+ field("uint32", uint32()), field("uint64",
uint64())});
+ auto write_options = adapters::orc::WriteOptions();
+#ifdef ARROW_WITH_SNAPPY
+ write_options.compression = Compression::SNAPPY;
+#else
+ write_options.compression = Compression::UNCOMPRESSED;
+#endif
+ write_options.file_version = adapters::orc::FileVersion(0, 11);
+ write_options.compression_block_size = 32768;
+ write_options.row_index_stride = 5000;
+ }
+
+ protected:
+ std::shared_ptr<Schema> table_schema;
+ adapters::orc::WriteOptions write_options;
+};
+TEST_F(TestORCWriterInvalidTypes, noWriteInvalidTypes) {
+ const std::shared_ptr<Table> table = GenerateRandomTable(table_schema, 5332,
5, 7, 0);
Review Comment:
Why 5332 rows? I would expect the error to show up even with a small number
of rows. There's no reason to generate a lot of data here.
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
+ table_schema = schema({field("uint8", uint8()), field("uint16", uint16()),
+ field("uint32", uint32()), field("uint64",
uint64())});
+ auto write_options = adapters::orc::WriteOptions();
Review Comment:
`auto` declares a local variable, so this won't set the member variable.
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
Review Comment:
Usually this would be a `SetUp()` method instead.
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
+ table_schema = schema({field("uint8", uint8()), field("uint16", uint16()),
+ field("uint32", uint32()), field("uint64",
uint64())});
+ auto write_options = adapters::orc::WriteOptions();
+#ifdef ARROW_WITH_SNAPPY
+ write_options.compression = Compression::SNAPPY;
+#else
+ write_options.compression = Compression::UNCOMPRESSED;
+#endif
+ write_options.file_version = adapters::orc::FileVersion(0, 11);
+ write_options.compression_block_size = 32768;
+ write_options.row_index_stride = 5000;
+ }
+
+ protected:
+ std::shared_ptr<Schema> table_schema;
+ adapters::orc::WriteOptions write_options;
Review Comment:
```suggestion
std::shared_ptr<Schema> table_schema_;
adapters::orc::WriteOptions write_options_;
```
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
+ table_schema = schema({field("uint8", uint8()), field("uint16", uint16()),
+ field("uint32", uint32()), field("uint64",
uint64())});
+ auto write_options = adapters::orc::WriteOptions();
+#ifdef ARROW_WITH_SNAPPY
+ write_options.compression = Compression::SNAPPY;
+#else
+ write_options.compression = Compression::UNCOMPRESSED;
+#endif
Review Comment:
This doesn't seem necessary, is it?
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
+ table_schema = schema({field("uint8", uint8()), field("uint16", uint16()),
+ field("uint32", uint32()), field("uint64",
uint64())});
+ auto write_options = adapters::orc::WriteOptions();
+#ifdef ARROW_WITH_SNAPPY
+ write_options.compression = Compression::SNAPPY;
+#else
+ write_options.compression = Compression::UNCOMPRESSED;
+#endif
+ write_options.file_version = adapters::orc::FileVersion(0, 11);
+ write_options.compression_block_size = 32768;
+ write_options.row_index_stride = 5000;
+ }
+
+ protected:
+ std::shared_ptr<Schema> table_schema;
+ adapters::orc::WriteOptions write_options;
+};
+TEST_F(TestORCWriterInvalidTypes, noWriteInvalidTypes) {
+ const std::shared_ptr<Table> table = GenerateRandomTable(table_schema, 5332,
5, 7, 0);
+ EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
+
io::BufferOutputStream::Create(kDefaultSmallMemStreamSize / 16));
+ EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
+ buffer_output_stream.get(),
write_options));
+ ASSERT_RAISES(NotImplemented, writer->Write(*table));
Review Comment:
Also, for clarity, this should also check the message error (or some
significant part thereof). You can look for instances of
`EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT` for examples.
##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -487,6 +487,36 @@ TEST_F(TestORCWriterTrivialWithConversion, writeChunkless)
{
kDefaultSmallMemStreamSize / 16);
}
+// Invalid Types
+class TestORCWriterInvalidTypes : public ::testing::Test {
+ public:
+ TestORCWriterInvalidTypes() {
Review Comment:
```suggestion
void SetUp() override {
```
##########
python/pyarrow/tests/test_orc.py:
##########
@@ -626,11 +626,20 @@ def test_wrong_usage_orc_writer(tempdir):
def test_orc_writer_with_null_arrays(tempdir):
from pyarrow import orc
- import pyarrow as pa
path = str(tempdir / 'test.orc')
a = pa.array([1, None, 3, None])
b = pa.array([None, None, None, None])
table = pa.table({"int64": a, "utf8": b})
with pytest.raises(pa.ArrowNotImplementedError):
orc.write_table(table, path)
+
+
+def test_orc_writer_with_arrays_with_unsupported_types():
Review Comment:
There's a test on the C++ side, I don't think it's worth checking it a
second time here. These tests should probably focus on exercising the Python
APIs, not reproduce the existing C++ tests.
##########
cpp/src/arrow/adapters/orc/util.cc:
##########
@@ -897,20 +897,6 @@ Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const
DataType& type) {
ARROW_ASSIGN_OR_RAISE(auto item_orc_type, GetOrcType(*item_arrow_type));
return liborc::createMapType(std::move(key_orc_type),
std::move(item_orc_type));
}
- case Type::type::DENSE_UNION:
- case Type::type::SPARSE_UNION: {
Review Comment:
Why are these being removed? Is this addressing the same issue?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]