jnwan opened a new issue, #1296:
URL: https://github.com/apache/orc/issues/1296
Below is the code to reproduce the issue, it works when removing the empty
struct column "col2" or writing small number of rows or changing the value to
"rand() % 100"
Am I doing anything wrong?
Code
```
WriterOptions options;
auto stream = writeLocalFile("orc_file_test");
MemoryPool* pool = getDefaultPool();
std::unique_ptr<Type> type(Type::buildTypeFromString(
"struct<col0:struct<col1:int>,col2:struct<col3:int>>"));
size_t num = 50000;
std::unique_ptr<Writer> writer = createWriter(*type, stream.get(),
options);
std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num);
StructVectorBatch* structBatch =
dynamic_cast<StructVectorBatch*>(batch.get());
StructVectorBatch* structBatch2 =
dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
LongVectorBatch* intBatch =
dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
StructVectorBatch* structBatch3 =
dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
LongVectorBatch* intBatch2 =
dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]);
structBatch->numElements = num;
structBatch2->numElements = num;
structBatch3->numElements = num;
structBatch3->hasNulls = true;
for (int64_t i = 0; i < num; ++i) {
intBatch->data.data()[i] = rand() % 150000;
intBatch->notNull[i] = 1;
intBatch2->notNull[i] = 0;
intBatch2->hasNulls = true;
structBatch3->notNull[i] = 0;
}
intBatch->hasNulls = false;
writer->add(*batch);
writer->close();
ReaderOptions readOptions;
readOptions.setMemoryPool(*getDefaultPool());
auto reader = createReader(readLocalFile("orc_file_test"), readOptions);
orc::RowReaderOptions rowOptions;
rowOptions.searchArgument(
SearchArgumentFactory::newBuilder()
->startAnd()
.equals(2, PredicateDataType::LONG, Literal((int64_t)5))
.end()
.build());
std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowOptions);
batch = rowReader->createRowBatch(num);
structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
structBatch2 = dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
intBatch = dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
structBatch3 = dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
while (rowReader->next(*batch)) {
for (size_t i = 0; i < batch->numElements; i++) {
}
}
```
Stack trace:
```
terminate called after throwing an instance of 'orc::ParseError'
what(): bad read in nextBuffer
*** Aborted at 1666816640 (Unix time, try 'date -d @1666816640') ***
*** Signal 6 (SIGABRT) (0x2035c0002b7ad) received by PID 178093 (pthread TID
0x7ffb12545a80) (linux TID 178093) (maybe from PID 178093, UID 131932) (code:
-6), stack trace: ***
@ 0000000000000000 (unknown)
@ 000000000009c9d3 __GI___pthread_kill
@ 00000000000444ec __GI_raise
@ 000000000002c432 __GI_abort
@ 00000000000a3fd4 __gnu_cxx::__verbose_terminate_handler()
@ 00000000000a1b39 __cxxabiv1::__terminate(void (*)())
@ 00000000000a1ba4 std::terminate()
@ 00000000000a1e6f __cxa_throw
@ 0000000001efcd55 __cxa_throw
@ 00000000075b676c
orc::BooleanRleDecoderImpl::seek(orc::PositionProvider&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ByteRLE.cc:526
@ 00000000075af711
orc::IntegerColumnReader::seekToRowGroup(std::unordered_map<unsigned long,
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>,
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:120
@ 00000000075af67f
orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long,
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>,
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
@ 00000000075af67f
orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long,
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>,
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
@ 0000000007598179 orc::RowReaderImpl::seekToRowGroup(unsigned int)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:440
@ 000000000759d700 orc::RowReaderImpl::startNextStripe()
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1037
@ 000000000759daf4 orc::RowReaderImpl::next(orc::ColumnVectorBatch&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1055
@ 0000000002fba9bc main
@ 000000000002c656 __libc_start_call_main
@ 000000000002c717 __libc_start_main_alias_2
@ 0000000002fb2780 _start
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]