This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 40abb9aa4 ORC-1551: Use `orc-format` `1.0.0-beta`
40abb9aa4 is described below
commit 40abb9aa4b52a013e544f169f82ef719ff767ebb
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Fri Dec 8 21:47:04 2023 -0800
ORC-1551: Use `orc-format` `1.0.0-beta`
### What changes were proposed in this pull request?
This PR aims to use `orc-format` `1.0.0-beta`.
### Why are the changes needed?
`1.0.0-beta` has the following changes.
- https://github.com/apache/orc-format/pull/5
- https://github.com/apache/orc-format/pull/4
- https://github.com/apache/orc-format/pull/7
- https://github.com/apache/orc-format/pull/9
### How was this patch tested?
Pass the CIs.
Closes #1688 from dongjoon-hyun/ORC-1551.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
c++/src/BloomFilter.cc | 8 +-
c++/src/CMakeLists.txt | 4 +-
c++/src/ColumnReader.cc | 2 +-
c++/src/ColumnWriter.cc | 50 +++---
c++/src/Reader.cc | 168 +++++++++++----------
c++/src/Statistics.cc | 142 ++++++++---------
c++/src/Statistics.hh | 86 +++++------
c++/src/StripeStream.cc | 6 +-
c++/src/StripeStream.hh | 4 +-
c++/src/TypeImpl.cc | 8 +-
c++/src/Writer.cc | 50 +++---
c++/src/sargs/PredicateLeaf.cc | 84 ++++++-----
c++/src/sargs/SargsApplier.cc | 4 +-
c++/test/CreateTestFiles.cc | 20 +--
c++/test/TestBloomFilter.cc | 2 +-
c++/test/TestBufferedOutputStream.cc | 12 +-
c++/test/TestColumnReader.cc | 10 +-
c++/test/TestColumnStatistics.cc | 20 +--
c++/test/TestCompression.cc | 12 +-
c++/test/TestPredicateLeaf.cc | 62 ++++----
c++/test/TestSargsApplier.cc | 26 ++--
c++/test/TestType.cc | 28 ++--
cmake_modules/ThirdpartyToolchain.cmake | 4 +-
.../src/test/org/apache/orc/TestVectorOrcFile.java | 6 +-
tools/test/TestFileMetadata.cc | 86 +++++------
25 files changed, 455 insertions(+), 449 deletions(-)
diff --git a/c++/src/BloomFilter.cc b/c++/src/BloomFilter.cc
index e7ef6575e..882c6f425 100644
--- a/c++/src/BloomFilter.cc
+++ b/c++/src/BloomFilter.cc
@@ -175,7 +175,7 @@ namespace orc {
// caller should make sure input proto::BloomFilter is valid since
// no check will be performed in the following constructor
BloomFilterImpl::BloomFilterImpl(const proto::BloomFilter& bloomFilter) {
- mNumHashFunctions = static_cast<int32_t>(bloomFilter.numhashfunctions());
+ mNumHashFunctions = static_cast<int32_t>(bloomFilter.num_hash_functions());
const std::string& bitsetStr = bloomFilter.utf8bitset();
mNumBits = bitsetStr.size() << SHIFT_3_BITS;
@@ -263,7 +263,7 @@ namespace orc {
}
void BloomFilterImpl::serialize(proto::BloomFilter& bloomFilter) const {
- bloomFilter.set_numhashfunctions(static_cast<uint32_t>(mNumHashFunctions));
+
bloomFilter.set_num_hash_functions(static_cast<uint32_t>(mNumHashFunctions));
// According to ORC standard, the encoding is a sequence of bytes with
// a little endian encoding in the utf8bitset field.
@@ -304,12 +304,12 @@ namespace orc {
}
// make sure we don't use unknown encodings or original timestamp encodings
- if (!encoding.has_bloomencoding() || encoding.bloomencoding() != 1) {
+ if (!encoding.has_bloom_encoding() || encoding.bloom_encoding() != 1) {
return nullptr;
}
// make sure all required fields exist
- if (!bloomFilter.has_numhashfunctions() || !bloomFilter.has_utf8bitset()) {
+ if (!bloomFilter.has_num_hash_functions() ||
!bloomFilter.has_utf8bitset()) {
return nullptr;
}
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 63c2043af..90f3ed87a 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -146,9 +146,9 @@ include_directories (
add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
COMMAND ${PROTOBUF_EXECUTABLE}
- -I ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto
+ -I
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto
--cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
-
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc_proto.proto
+
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto/orc_proto.proto
)
set(SOURCE_FILES
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 7fdcd530f..dc0ecb147 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -567,7 +567,7 @@ namespace orc {
StripeStreams&
stripe)
: ColumnReader(type, stripe), dictionary(new
StringDictionary(stripe.getMemoryPool())) {
RleVersion rleVersion =
convertRleVersion(stripe.getEncoding(columnId).kind());
- uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize();
+ uint32_t dictSize = stripe.getEncoding(columnId).dictionary_size();
std::unique_ptr<SeekableInputStream> stream =
stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
if (stream == nullptr) {
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index a7412c0e4..5c7ad2179 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -184,7 +184,7 @@ namespace orc {
void ColumnWriter::addBloomFilterEntry() {
if (enableBloomFilter) {
- BloomFilterUTF8Utils::serialize(*bloomFilter,
*bloomFilterIndex->add_bloomfilter());
+ BloomFilterUTF8Utils::serialize(*bloomFilter,
*bloomFilterIndex->add_bloom_filter());
bloomFilter->reset();
}
}
@@ -244,7 +244,7 @@ namespace orc {
if (enableBloomFilter) {
bloomFilter->reset();
- bloomFilterIndex->clear_bloomfilter();
+ bloomFilterIndex->clear_bloom_filter();
}
}
@@ -353,7 +353,7 @@ namespace orc {
void
StructColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>&
encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
encodings.push_back(encoding);
for (uint32_t i = 0; i < children.size(); ++i) {
children[i]->getColumnEncoding(encodings);
@@ -513,9 +513,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -622,9 +622,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -735,9 +735,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -863,9 +863,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -1201,9 +1201,9 @@ namespace orc {
encoding.set_kind(rleVersion == RleVersion_1 ?
proto::ColumnEncoding_Kind_DICTIONARY
:
proto::ColumnEncoding_Kind_DICTIONARY_V2);
}
- encoding.set_dictionarysize(static_cast<uint32_t>(dictionary.size()));
+ encoding.set_dictionary_size(static_cast<uint32_t>(dictionary.size()));
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -1765,9 +1765,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -1952,9 +1952,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -2059,9 +2059,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(RleVersion_2));
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
@@ -2299,9 +2299,9 @@ namespace orc {
void ListColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>&
encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
if (child.get()) {
@@ -2525,9 +2525,9 @@ namespace orc {
void MapColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>&
encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
if (keyWriter.get()) {
@@ -2752,9 +2752,9 @@ namespace orc {
void
UnionColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>&
encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
- encoding.set_dictionarysize(0);
+ encoding.set_dictionary_size(0);
if (enableBloomFilter) {
- encoding.set_bloomencoding(BloomFilterVersion::UTF8);
+ encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
for (uint32_t i = 0; i < children.size(); ++i) {
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 386793f0b..162f69e10 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -57,8 +57,8 @@ namespace orc {
}
uint64_t getCompressionBlockSize(const proto::PostScript& ps) {
- if (ps.has_compressionblocksize()) {
- return ps.compressionblocksize();
+ if (ps.has_compression_block_size()) {
+ return ps.compression_block_size();
} else {
return 256 * 1024;
}
@@ -84,10 +84,10 @@ namespace orc {
}
WriterVersion getWriterVersionImpl(const FileContents* contents) {
- if (!contents->postscript->has_writerversion()) {
+ if (!contents->postscript->has_writer_version()) {
return WriterVersion_ORIGINAL;
}
- return static_cast<WriterVersion>(contents->postscript->writerversion());
+ return static_cast<WriterVersion>(contents->postscript->writer_version());
}
void ColumnSelector::selectChildren(std::vector<bool>& selectedColumns,
const Type& type) {
@@ -272,7 +272,7 @@ namespace orc {
for (size_t i = 0; i < numberOfStripes; ++i) {
firstRowOfStripe[i] = rowTotal;
proto::StripeInformation stripeInfo =
footer->stripes(static_cast<int>(i));
- rowTotal += stripeInfo.numberofrows();
+ rowTotal += stripeInfo.number_of_rows();
bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() &&
stripeInfo.offset() < opts.getOffset() +
opts.getLength();
if (isStripeInRange) {
@@ -282,9 +282,10 @@ namespace orc {
if (i >= lastStripe) {
lastStripe = i + 1;
}
- if (footer->rowindexstride() > 0) {
+ if (footer->row_index_stride() > 0) {
numRowGroupsInStripeRange +=
- (stripeInfo.numberofrows() + footer->rowindexstride() - 1) /
footer->rowindexstride();
+ (stripeInfo.number_of_rows() + footer->row_index_stride() - 1) /
+ footer->row_index_stride();
}
}
}
@@ -294,7 +295,7 @@ namespace orc {
if (currentStripe == 0) {
previousRow = (std::numeric_limits<uint64_t>::max)();
} else if (currentStripe == numberOfStripes) {
- previousRow = footer->numberofrows();
+ previousRow = footer->number_of_rows();
} else {
previousRow = firstRowOfStripe[firstStripe] - 1;
}
@@ -303,11 +304,11 @@ namespace orc {
column_selector.updateSelected(selectedColumns, opts);
// prepare SargsApplier if SearchArgument is available
- if (opts.getSearchArgument() && footer->rowindexstride() > 0) {
+ if (opts.getSearchArgument() && footer->row_index_stride() > 0) {
sargs = opts.getSearchArgument();
- sargsApplier.reset(new SargsApplier(*contents->schema, sargs.get(),
footer->rowindexstride(),
-
getWriterVersionImpl(_contents.get()),
- contents->readerMetrics));
+ sargsApplier.reset(
+ new SargsApplier(*contents->schema, sargs.get(),
footer->row_index_stride(),
+ getWriterVersionImpl(_contents.get()),
contents->readerMetrics));
}
skipBloomFilters = hasBadBloomFilters();
@@ -321,9 +322,9 @@ namespace orc {
// 1.6.x releases before 1.6.11 won't have it. On the other side, the C++
writer
// supports writing bloom filters since 1.6.0. So files written by the C++
writer
// and with 'softwareVersion' unset would have bad bloom filters.
- if (!footer->has_softwareversion()) return true;
+ if (!footer->has_software_version()) return true;
- const std::string& fullVersion = footer->softwareversion();
+ const std::string& fullVersion = footer->software_version();
std::string version;
// Deal with snapshot versions, e.g. 1.6.12-SNAPSHOT.
if (fullVersion.find('-') != std::string::npos) {
@@ -375,10 +376,10 @@ namespace orc {
// seeking past lastStripe
uint64_t num_stripes = static_cast<uint64_t>(footer->stripes_size());
- if ((lastStripe == num_stripes && rowNumber >= footer->numberofrows()) ||
+ if ((lastStripe == num_stripes && rowNumber >= footer->number_of_rows()) ||
(lastStripe < num_stripes && rowNumber >=
firstRowOfStripe[lastStripe])) {
currentStripe = num_stripes;
- previousRow = footer->numberofrows();
+ previousRow = footer->number_of_rows();
return;
}
@@ -390,14 +391,14 @@ namespace orc {
// seeking before the first stripe
if (seekToStripe < firstStripe) {
currentStripe = num_stripes;
- previousRow = footer->numberofrows();
+ previousRow = footer->number_of_rows();
return;
}
previousRow = rowNumber;
- auto rowIndexStride = footer->rowindexstride();
+ auto rowIndexStride = footer->row_index_stride();
if (!isCurrentStripeInited() || currentStripe != seekToStripe ||
rowIndexStride == 0 ||
- currentStripeInfo.indexlength() == 0) {
+ currentStripeInfo.index_length() == 0) {
// current stripe is not initialized or
// target stripe is not current stripe or
// current stripe doesn't have row indexes
@@ -412,14 +413,14 @@ namespace orc {
if (sargsApplier) {
// advance to selected row group if predicate pushdown is enabled
currentRowInStripe =
- advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
footer->rowindexstride(),
- sargsApplier->getNextSkippedRows());
+ advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
+ footer->row_index_stride(),
sargsApplier->getNextSkippedRows());
}
}
uint64_t rowsToSkip = currentRowInStripe;
// seek to the target row group if row indexes exists
- if (rowIndexStride > 0 && currentStripeInfo.indexlength() > 0) {
+ if (rowIndexStride > 0 && currentStripeInfo.index_length() > 0) {
if (rowIndexes.empty()) {
loadStripeIndex();
}
@@ -466,10 +467,10 @@ namespace orc {
throw ParseError("Failed to parse bloom filter index");
}
BloomFilterIndex bfIndex;
- for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) {
+ for (int j = 0; j < pbBFIndex.bloom_filter_size(); j++) {
bfIndex.entries.push_back(BloomFilterUTF8Utils::deserialize(
pbStream.kind(),
currentStripeFooter.columns(static_cast<int>(pbStream.column())),
- pbBFIndex.bloomfilter(j)));
+ pbBFIndex.bloom_filter(j)));
}
// add bloom filters to result for one column
bloomFilterIndex[pbStream.column()] = bfIndex;
@@ -520,8 +521,8 @@ namespace orc {
proto::StripeFooter getStripeFooter(const proto::StripeInformation& info,
const FileContents& contents) {
- uint64_t stripeFooterStart = info.offset() + info.indexlength() +
info.datalength();
- uint64_t stripeFooterLength = info.footerlength();
+ uint64_t stripeFooterStart = info.offset() + info.index_length() +
info.data_length();
+ uint64_t stripeFooterLength = info.footer_length();
std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
contents.compression,
std::make_unique<SeekableFileInputStream>(contents.stream.get(),
stripeFooterStart,
@@ -562,8 +563,8 @@ namespace orc {
mutable_ps->CopyFrom(*contents->postscript);
proto::Footer* mutableFooter = tail.mutable_footer();
mutableFooter->CopyFrom(*footer);
- tail.set_filelength(fileLength);
- tail.set_postscriptlength(postscriptLength);
+ tail.set_file_length(fileLength);
+ tail.set_postscript_length(postscriptLength);
std::string result;
if (!tail.SerializeToString(&result)) {
throw ParseError("Failed to serialize file tail");
@@ -593,7 +594,7 @@ namespace orc {
}
return contents->metadata == nullptr
? 0
- : static_cast<uint64_t>(contents->metadata->stripestats_size());
+ :
static_cast<uint64_t>(contents->metadata->stripe_stats_size());
}
std::unique_ptr<StripeInformation> ReaderImpl::getStripe(uint64_t
stripeIndex) const {
@@ -603,8 +604,8 @@ namespace orc {
proto::StripeInformation stripeInfo =
footer->stripes(static_cast<int>(stripeIndex));
return std::unique_ptr<StripeInformation>(new StripeInformationImpl(
- stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(),
- stripeInfo.footerlength(), stripeInfo.numberofrows(),
contents->stream.get(),
+ stripeInfo.offset(), stripeInfo.index_length(),
stripeInfo.data_length(),
+ stripeInfo.footer_length(), stripeInfo.number_of_rows(),
contents->stream.get(),
*contents->pool, contents->compression, contents->blockSize,
contents->readerMetrics));
}
@@ -616,7 +617,7 @@ namespace orc {
}
uint64_t ReaderImpl::getNumberOfRows() const {
- return footer->numberofrows();
+ return footer->number_of_rows();
}
WriterId ReaderImpl::getWriterId() const {
@@ -642,8 +643,8 @@ namespace orc {
std::string ReaderImpl::getSoftwareVersion() const {
std::ostringstream buffer;
buffer << writerIdToString(getWriterIdValue());
- if (footer->has_softwareversion()) {
- buffer << " " << footer->softwareversion();
+ if (footer->has_software_version()) {
+ buffer << " " << footer->software_version();
}
return buffer.str();
}
@@ -653,15 +654,15 @@ namespace orc {
}
uint64_t ReaderImpl::getContentLength() const {
- return footer->contentlength();
+ return footer->content_length();
}
uint64_t ReaderImpl::getStripeStatisticsLength() const {
- return contents->postscript->metadatalength();
+ return contents->postscript->metadata_length();
}
uint64_t ReaderImpl::getFileFooterLength() const {
- return contents->postscript->footerlength();
+ return contents->postscript->footer_length();
}
uint64_t ReaderImpl::getFilePostscriptLength() const {
@@ -673,7 +674,7 @@ namespace orc {
}
uint64_t ReaderImpl::getRowIndexStride() const {
- return footer->rowindexstride();
+ return footer->row_index_stride();
}
const std::string& ReaderImpl::getStreamName() const {
@@ -703,7 +704,7 @@ namespace orc {
std::vector<std::vector<proto::ColumnStatistics>>* indexStats) const {
int num_streams = currentStripeFooter.streams_size();
uint64_t offset = stripeInfo.offset();
- uint64_t indexEnd = stripeInfo.offset() + stripeInfo.indexlength();
+ uint64_t indexEnd = stripeInfo.offset() + stripeInfo.index_length();
for (int i = 0; i < num_streams; i++) {
const proto::Stream& stream = currentStripeFooter.streams(i);
StreamKind streamKind = static_cast<StreamKind>(stream.kind());
@@ -714,7 +715,7 @@ namespace orc {
msg << "Malformed RowIndex stream meta in stripe " << stripeIndex
<< ": streamOffset=" << offset << ", streamLength=" << length
<< ", stripeOffset=" << stripeInfo.offset()
- << ", stripeIndexLength=" << stripeInfo.indexlength();
+ << ", stripeIndexLength=" << stripeInfo.index_length();
throw ParseError(msg.str());
}
std::unique_ptr<SeekableInputStream> pbStream =
@@ -759,7 +760,7 @@ namespace orc {
throw std::logic_error("No stripe statistics in file");
}
size_t num_cols = static_cast<size_t>(
-
contents->metadata->stripestats(static_cast<int>(stripeIndex)).colstats_size());
+
contents->metadata->stripe_stats(static_cast<int>(stripeIndex)).col_stats_size());
std::vector<std::vector<proto::ColumnStatistics>> indexStats(num_cols);
proto::StripeInformation currentStripeInfo =
footer->stripes(static_cast<int>(stripeIndex));
@@ -767,12 +768,12 @@ namespace orc {
getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter,
&indexStats);
- const Timezone& writerTZ = currentStripeFooter.has_writertimezone()
- ?
getTimezoneByName(currentStripeFooter.writertimezone())
+ const Timezone& writerTZ = currentStripeFooter.has_writer_timezone()
+ ?
getTimezoneByName(currentStripeFooter.writer_timezone())
: getLocalTimezone();
StatContext statContext(hasCorrectStatistics(), &writerTZ);
return std::make_unique<StripeStatisticsImpl>(
- contents->metadata->stripestats(static_cast<int>(stripeIndex)),
indexStats, statContext);
+ contents->metadata->stripe_stats(static_cast<int>(stripeIndex)),
indexStats, statContext);
}
std::unique_ptr<Statistics> ReaderImpl::getStatistics() const {
@@ -791,8 +792,8 @@ namespace orc {
}
void ReaderImpl::readMetadata() const {
- uint64_t metadataSize = contents->postscript->metadatalength();
- uint64_t footerLength = contents->postscript->footerlength();
+ uint64_t metadataSize = contents->postscript->metadata_length();
+ uint64_t footerLength = contents->postscript->footer_length();
if (fileLength < metadataSize + footerLength + postscriptLength + 1) {
std::stringstream msg;
msg << "Invalid Metadata length: fileLength=" << fileLength
@@ -935,13 +936,13 @@ namespace orc {
uint64_t maxDataLength = 0;
if (stripeIx >= 0 && stripeIx < footer->stripes_size()) {
- uint64_t stripe = footer->stripes(stripeIx).datalength();
+ uint64_t stripe = footer->stripes(stripeIx).data_length();
if (maxDataLength < stripe) {
maxDataLength = stripe;
}
} else {
for (int i = 0; i < footer->stripes_size(); i++) {
- uint64_t stripe = footer->stripes(i).datalength();
+ uint64_t stripe = footer->stripes(i).data_length();
if (maxDataLength < stripe) {
maxDataLength = stripe;
}
@@ -969,7 +970,7 @@ namespace orc {
}
}
- /* If a string column is read, use stripe datalength as a memory estimate
+ /* If a string column is read, use stripe data_length as a memory estimate
* because we don't know the dictionary size. Multiply by 2 because
* a string column requires two buffers:
* in the input stream and in the seekable input stream.
@@ -981,11 +982,11 @@ namespace orc {
nSelectedStreams *
contents->stream->getNaturalReadSize());
// Do we need even more memory to read the footer or the metadata?
- if (memory < contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS) {
- memory = contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS;
+ if (memory < contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS)
{
+ memory = contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS;
}
- if (memory < contents->postscript->metadatalength()) {
- memory = contents->postscript->metadatalength();
+ if (memory < contents->postscript->metadata_length()) {
+ memory = contents->postscript->metadata_length();
}
// Account for firstRowOfStripe.
@@ -1018,7 +1019,7 @@ namespace orc {
previousRow = 0;
} else {
previousRow = firstRowOfStripe[lastStripe - 1] +
- footer->stripes(static_cast<int>(lastStripe -
1)).numberofrows();
+ footer->stripes(static_cast<int>(lastStripe -
1)).number_of_rows();
}
}
@@ -1037,30 +1038,30 @@ namespace orc {
do {
currentStripeInfo = footer->stripes(static_cast<int>(currentStripe));
uint64_t fileLength = contents->stream->getLength();
- if (currentStripeInfo.offset() + currentStripeInfo.indexlength() +
- currentStripeInfo.datalength() +
currentStripeInfo.footerlength() >=
+ if (currentStripeInfo.offset() + currentStripeInfo.index_length() +
+ currentStripeInfo.data_length() +
currentStripeInfo.footer_length() >=
fileLength) {
std::stringstream msg;
msg << "Malformed StripeInformation at stripe index " << currentStripe
<< ": fileLength=" << fileLength
<< ", StripeInfo=(offset=" << currentStripeInfo.offset()
- << ", indexLength=" << currentStripeInfo.indexlength()
- << ", dataLength=" << currentStripeInfo.datalength()
- << ", footerLength=" << currentStripeInfo.footerlength() << ")";
+ << ", indexLength=" << currentStripeInfo.index_length()
+ << ", dataLength=" << currentStripeInfo.data_length()
+ << ", footerLength=" << currentStripeInfo.footer_length() << ")";
throw ParseError(msg.str());
}
currentStripeFooter = getStripeFooter(currentStripeInfo,
*contents.get());
- rowsInCurrentStripe = currentStripeInfo.numberofrows();
+ rowsInCurrentStripe = currentStripeInfo.number_of_rows();
processingStripe = currentStripe;
if (sargsApplier) {
bool isStripeNeeded = true;
if (contents->metadata) {
const auto& currentStripeStats =
- contents->metadata->stripestats(static_cast<int>(currentStripe));
+
contents->metadata->stripe_stats(static_cast<int>(currentStripe));
// skip this stripe after stats fail to satisfy sargs
uint64_t stripeRowGroupCount =
- (rowsInCurrentStripe + footer->rowindexstride() - 1) /
footer->rowindexstride();
+ (rowsInCurrentStripe + footer->row_index_stride() - 1) /
footer->row_index_stride();
isStripeNeeded =
sargsApplier->evaluateStripeStatistics(currentStripeStats,
stripeRowGroupCount);
}
@@ -1087,9 +1088,10 @@ namespace orc {
if (currentStripe < lastStripe) {
// get writer timezone info from stripe footer to help understand
timestamp values.
- const Timezone& writerTimezone = currentStripeFooter.has_writertimezone()
- ?
getTimezoneByName(currentStripeFooter.writertimezone())
- : localTimezone;
+ const Timezone& writerTimezone =
+ currentStripeFooter.has_writer_timezone()
+ ? getTimezoneByName(currentStripeFooter.writer_timezone())
+ : localTimezone;
StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo,
currentStripeFooter,
currentStripeInfo.offset(),
*contents->stream, writerTimezone,
readerTimezone);
@@ -1099,11 +1101,11 @@ namespace orc {
if (sargsApplier) {
// move to the 1st selected row group when PPD is enabled.
currentRowInStripe =
- advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
footer->rowindexstride(),
- sargsApplier->getNextSkippedRows());
+ advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
+ footer->row_index_stride(),
sargsApplier->getNextSkippedRows());
previousRow = firstRowOfStripe[currentStripe] + currentRowInStripe - 1;
if (currentRowInStripe > 0) {
- seekToRowGroup(static_cast<uint32_t>(currentRowInStripe /
footer->rowindexstride()));
+ seekToRowGroup(static_cast<uint32_t>(currentRowInStripe /
footer->row_index_stride()));
}
}
} else {
@@ -1126,7 +1128,7 @@ namespace orc {
std::min(static_cast<uint64_t>(data.capacity), rowsInCurrentStripe -
currentRowInStripe);
if (sargsApplier && rowsToRead > 0) {
rowsToRead = computeBatchSize(rowsToRead, currentRowInStripe,
rowsInCurrentStripe,
- footer->rowindexstride(),
sargsApplier->getNextSkippedRows());
+ footer->row_index_stride(),
sargsApplier->getNextSkippedRows());
}
data.numElements = rowsToRead;
if (rowsToRead == 0) {
@@ -1145,13 +1147,13 @@ namespace orc {
// check if we need to advance to next selected row group
if (sargsApplier) {
uint64_t nextRowToRead =
- advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
footer->rowindexstride(),
+ advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe,
footer->row_index_stride(),
sargsApplier->getNextSkippedRows());
if (currentRowInStripe != nextRowToRead) {
// it is guaranteed to be at start of a row group
currentRowInStripe = nextRowToRead;
if (currentRowInStripe < rowsInCurrentStripe) {
- seekToRowGroup(static_cast<uint32_t>(currentRowInStripe /
footer->rowindexstride()));
+ seekToRowGroup(static_cast<uint32_t>(currentRowInStripe /
footer->row_index_stride()));
}
}
}
@@ -1302,9 +1304,9 @@ namespace orc {
for (int i = 0; i < maxId; ++i) {
const proto::Type& type = footer.types(i);
if (type.kind() == proto::Type_Kind_STRUCT &&
- type.subtypes_size() != type.fieldnames_size()) {
+ type.subtypes_size() != type.field_names_size()) {
msg << "Footer is corrupt: STRUCT type " << i << " has " <<
type.subtypes_size()
- << " subTypes, but has " << type.fieldnames_size() << "
fieldNames";
+ << " subTypes, but has " << type.field_names_size() << "
fieldNames";
throw ParseError(msg.str());
}
for (int j = 0; j < type.subtypes_size(); ++j) {
@@ -1339,10 +1341,10 @@ namespace orc {
MemoryPool& memoryPool,
ReaderMetrics* readerMetrics) {
const char* footerPtr = buffer->data() + footerOffset;
- std::unique_ptr<SeekableInputStream> pbStream =
- createDecompressor(convertCompressionKind(ps),
-
std::make_unique<SeekableArrayInputStream>(footerPtr, ps.footerlength()),
- getCompressionBlockSize(ps), memoryPool,
readerMetrics);
+ std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
+ convertCompressionKind(ps),
+ std::make_unique<SeekableArrayInputStream>(footerPtr,
ps.footer_length()),
+ getCompressionBlockSize(ps), memoryPool, readerMetrics);
auto footer = std::make_unique<proto::Footer>();
if (!footer->ParseFromZeroCopyStream(pbStream.get())) {
@@ -1370,8 +1372,8 @@ namespace orc {
}
contents->postscript =
std::make_unique<proto::PostScript>(tail.postscript());
contents->footer = std::make_unique<proto::Footer>(tail.footer());
- fileLength = tail.filelength();
- postscriptLength = tail.postscriptlength();
+ fileLength = tail.file_length();
+ postscriptLength = tail.postscript_length();
} else {
// figure out the size of the file using the option or filesystem
fileLength = std::min(options.getTailLocation(),
static_cast<uint64_t>(stream->getLength()));
@@ -1386,7 +1388,7 @@ namespace orc {
postscriptLength = buffer->data()[readSize - 1] & 0xff;
contents->postscript = readPostscript(stream.get(), buffer.get(),
postscriptLength);
- uint64_t footerSize = contents->postscript->footerlength();
+ uint64_t footerSize = contents->postscript->footer_length();
uint64_t tailSize = 1 + postscriptLength + footerSize;
if (tailSize >= fileLength) {
std::stringstream msg;
@@ -1430,7 +1432,7 @@ namespace orc {
footer->stripes(static_cast<int>(stripeIndex));
const proto::StripeFooter currentStripeFooter =
getStripeFooter(currentStripeInfo, *contents);
- // iterate stripe footer to get stream of bloomfilter
+ // iterate stripe footer to get stream of bloom_filter
uint64_t offset = static_cast<uint64_t>(currentStripeInfo.offset());
for (int i = 0; i < currentStripeFooter.streams_size(); i++) {
const proto::Stream& stream = currentStripeFooter.streams(i);
@@ -1452,10 +1454,10 @@ namespace orc {
}
BloomFilterIndex bfIndex;
- for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) {
+ for (int j = 0; j < pbBFIndex.bloom_filter_size(); j++) {
std::unique_ptr<BloomFilter> entry =
BloomFilterUTF8Utils::deserialize(
stream.kind(),
currentStripeFooter.columns(static_cast<int>(stream.column())),
- pbBFIndex.bloomfilter(j));
+ pbBFIndex.bloom_filter(j));
bfIndex.entries.push_back(std::shared_ptr<BloomFilter>(std::move(entry)));
}
diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc
index 7b6487396..8ed29d0e7 100644
--- a/c++/src/Statistics.cc
+++ b/c++/src/Statistics.cc
@@ -26,23 +26,23 @@ namespace orc {
ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
const StatContext& statContext) {
- if (s.has_intstatistics()) {
+ if (s.has_int_statistics()) {
return new IntegerColumnStatisticsImpl(s);
- } else if (s.has_doublestatistics()) {
+ } else if (s.has_double_statistics()) {
return new DoubleColumnStatisticsImpl(s);
- } else if (s.has_collectionstatistics()) {
+ } else if (s.has_collection_statistics()) {
return new CollectionColumnStatisticsImpl(s);
- } else if (s.has_stringstatistics()) {
+ } else if (s.has_string_statistics()) {
return new StringColumnStatisticsImpl(s, statContext);
- } else if (s.has_bucketstatistics()) {
+ } else if (s.has_bucket_statistics()) {
return new BooleanColumnStatisticsImpl(s, statContext);
- } else if (s.has_decimalstatistics()) {
+ } else if (s.has_decimal_statistics()) {
return new DecimalColumnStatisticsImpl(s, statContext);
- } else if (s.has_timestampstatistics()) {
+ } else if (s.has_timestamp_statistics()) {
return new TimestampColumnStatisticsImpl(s, statContext);
- } else if (s.has_datestatistics()) {
+ } else if (s.has_date_statistics()) {
return new DateColumnStatisticsImpl(s, statContext);
- } else if (s.has_binarystatistics()) {
+ } else if (s.has_binary_statistics()) {
return new BinaryColumnStatisticsImpl(s, statContext);
} else {
return new ColumnStatisticsImpl(s);
@@ -51,8 +51,8 @@ namespace orc {
StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats,
const StatContext& statContext) {
- for (int i = 0; i < stripeStats.colstats_size(); i++) {
- colStats.push_back(convertColumnStatistics(stripeStats.colstats(i),
statContext));
+ for (int i = 0; i < stripeStats.col_stats_size(); i++) {
+ colStats.push_back(convertColumnStatistics(stripeStats.col_stats(i),
statContext));
}
}
@@ -180,27 +180,27 @@ namespace orc {
}
ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics&
pb) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
}
BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl(const
proto::ColumnStatistics& pb,
const StatContext&
statContext) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (pb.has_binarystatistics() && statContext.correctStats) {
- _stats.setHasTotalLength(pb.binarystatistics().has_sum());
-
_stats.setTotalLength(static_cast<uint64_t>(pb.binarystatistics().sum()));
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (pb.has_binary_statistics() && statContext.correctStats) {
+ _stats.setHasTotalLength(pb.binary_statistics().has_sum());
+
_stats.setTotalLength(static_cast<uint64_t>(pb.binary_statistics().sum()));
}
}
BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl(const
proto::ColumnStatistics& pb,
const StatContext&
statContext) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (pb.has_bucketstatistics() && statContext.correctStats) {
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (pb.has_bucket_statistics() && statContext.correctStats) {
_hasCount = true;
- _trueCount = pb.bucketstatistics().count(0);
+ _trueCount = pb.bucket_statistics().count(0);
} else {
_hasCount = false;
_trueCount = 0;
@@ -209,27 +209,27 @@ namespace orc {
DateColumnStatisticsImpl::DateColumnStatisticsImpl(const
proto::ColumnStatistics& pb,
const StatContext&
statContext) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_datestatistics() || !statContext.correctStats) {
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (!pb.has_date_statistics() || !statContext.correctStats) {
// hasMinimum_ is false by default;
// hasMaximum_ is false by default;
_stats.setMinimum(0);
_stats.setMaximum(0);
} else {
- _stats.setHasMinimum(pb.datestatistics().has_minimum());
- _stats.setHasMaximum(pb.datestatistics().has_maximum());
- _stats.setMinimum(pb.datestatistics().minimum());
- _stats.setMaximum(pb.datestatistics().maximum());
+ _stats.setHasMinimum(pb.date_statistics().has_minimum());
+ _stats.setHasMaximum(pb.date_statistics().has_maximum());
+ _stats.setMinimum(pb.date_statistics().minimum());
+ _stats.setMaximum(pb.date_statistics().maximum());
}
}
DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(const
proto::ColumnStatistics& pb,
const StatContext&
statContext) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (pb.has_decimalstatistics() && statContext.correctStats) {
- const proto::DecimalStatistics& stats = pb.decimalstatistics();
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (pb.has_decimal_statistics() && statContext.correctStats) {
+ const proto::DecimalStatistics& stats = pb.decimal_statistics();
_stats.setHasMinimum(stats.has_minimum());
_stats.setHasMaximum(stats.has_maximum());
_stats.setHasSum(stats.has_sum());
@@ -241,14 +241,14 @@ namespace orc {
}
DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl(const
proto::ColumnStatistics& pb) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_doublestatistics()) {
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (!pb.has_double_statistics()) {
_stats.setMinimum(0);
_stats.setMaximum(0);
_stats.setSum(0);
} else {
- const proto::DoubleStatistics& stats = pb.doublestatistics();
+ const proto::DoubleStatistics& stats = pb.double_statistics();
_stats.setHasMinimum(stats.has_minimum());
_stats.setHasMaximum(stats.has_maximum());
_stats.setHasSum(stats.has_sum());
@@ -260,14 +260,14 @@ namespace orc {
}
IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl(const
proto::ColumnStatistics& pb) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_intstatistics()) {
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (!pb.has_int_statistics()) {
_stats.setMinimum(0);
_stats.setMaximum(0);
_stats.setSum(0);
} else {
- const proto::IntegerStatistics& stats = pb.intstatistics();
+ const proto::IntegerStatistics& stats = pb.int_statistics();
_stats.setHasMinimum(stats.has_minimum());
_stats.setHasMaximum(stats.has_maximum());
_stats.setHasSum(stats.has_sum());
@@ -280,12 +280,12 @@ namespace orc {
StringColumnStatisticsImpl::StringColumnStatisticsImpl(const
proto::ColumnStatistics& pb,
const StatContext&
statContext) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_stringstatistics() || !statContext.correctStats) {
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (!pb.has_string_statistics() || !statContext.correctStats) {
_stats.setTotalLength(0);
} else {
- const proto::StringStatistics& stats = pb.stringstatistics();
+ const proto::StringStatistics& stats = pb.string_statistics();
_stats.setHasMinimum(stats.has_minimum());
_stats.setHasMaximum(stats.has_maximum());
_stats.setHasTotalLength(stats.has_sum());
@@ -298,9 +298,9 @@ namespace orc {
TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(const
proto::ColumnStatistics& pb,
const
StatContext& statContext) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_timestampstatistics() || !statContext.correctStats) {
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (!pb.has_timestamp_statistics() || !statContext.correctStats) {
_stats.setMinimum(0);
_stats.setMaximum(0);
_lowerBound = 0;
@@ -308,21 +308,21 @@ namespace orc {
_minimumNanos = DEFAULT_MIN_NANOS;
_maximumNanos = DEFAULT_MAX_NANOS;
} else {
- const proto::TimestampStatistics& stats = pb.timestampstatistics();
- _stats.setHasMinimum(stats.has_minimumutc() ||
+ const proto::TimestampStatistics& stats = pb.timestamp_statistics();
+ _stats.setHasMinimum(stats.has_minimum_utc() ||
(stats.has_minimum() && (statContext.writerTimezone
!= nullptr)));
- _stats.setHasMaximum(stats.has_maximumutc() ||
+ _stats.setHasMaximum(stats.has_maximum_utc() ||
(stats.has_maximum() && (statContext.writerTimezone
!= nullptr)));
- _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
- _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
- // to be consistent with java side, non-default minimumnanos and
maximumnanos
+ _hasLowerBound = stats.has_minimum_utc() || stats.has_minimum();
+ _hasUpperBound = stats.has_maximum_utc() || stats.has_maximum();
+ // to be consistent with java side, non-default minimum_nanos and
maximum_nanos
// are added by one in their serialized form.
- _minimumNanos = stats.has_minimumnanos() ? stats.minimumnanos() - 1 :
DEFAULT_MIN_NANOS;
- _maximumNanos = stats.has_maximumnanos() ? stats.maximumnanos() - 1 :
DEFAULT_MAX_NANOS;
+ _minimumNanos = stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 :
DEFAULT_MIN_NANOS;
+ _maximumNanos = stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 :
DEFAULT_MAX_NANOS;
// Timestamp stats are stored in milliseconds
- if (stats.has_minimumutc()) {
- int64_t minimum = stats.minimumutc();
+ if (stats.has_minimum_utc()) {
+ int64_t minimum = stats.minimum_utc();
_stats.setMinimum(minimum);
_lowerBound = minimum;
} else if (statContext.writerTimezone) {
@@ -340,8 +340,8 @@ namespace orc {
}
// Timestamp stats are stored in milliseconds
- if (stats.has_maximumutc()) {
- int64_t maximum = stats.maximumutc();
+ if (stats.has_maximum_utc()) {
+ int64_t maximum = stats.maximum_utc();
_stats.setMaximum(maximum);
_upperBound = maximum;
} else if (statContext.writerTimezone) {
@@ -364,21 +364,21 @@ namespace orc {
CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl(
const proto::ColumnStatistics& pb) {
- _stats.setNumberOfValues(pb.numberofvalues());
- _stats.setHasNull(pb.hasnull());
- if (!pb.has_collectionstatistics()) {
+ _stats.setNumberOfValues(pb.number_of_values());
+ _stats.setHasNull(pb.has_null());
+ if (!pb.has_collection_statistics()) {
_stats.setMinimum(0);
_stats.setMaximum(0);
_stats.setSum(0);
} else {
- const proto::CollectionStatistics& stats = pb.collectionstatistics();
- _stats.setHasMinimum(stats.has_minchildren());
- _stats.setHasMaximum(stats.has_maxchildren());
- _stats.setHasSum(stats.has_totalchildren());
-
- _stats.setMinimum(stats.minchildren());
- _stats.setMaximum(stats.maxchildren());
- _stats.setSum(stats.totalchildren());
+ const proto::CollectionStatistics& stats = pb.collection_statistics();
+ _stats.setHasMinimum(stats.has_min_children());
+ _stats.setHasMaximum(stats.has_max_children());
+ _stats.setHasSum(stats.has_total_children());
+
+ _stats.setMinimum(stats.min_children());
+ _stats.setMaximum(stats.max_children());
+ _stats.setSum(stats.total_children());
}
}
diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh
index a1aafa7db..e585bf971 100644
--- a/c++/src/Statistics.hh
+++ b/c++/src/Statistics.hh
@@ -278,8 +278,8 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
}
std::string toString() const override {
@@ -355,10 +355,10 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics();
+ proto::BinaryStatistics* binStats = pbStats.mutable_binary_statistics();
binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
}
@@ -457,10 +457,10 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::BucketStatistics* bucketStats =
pbStats.mutable_bucketstatistics();
+ proto::BucketStatistics* bucketStats =
pbStats.mutable_bucket_statistics();
if (_hasCount) {
bucketStats->add_count(_trueCount);
} else {
@@ -563,10 +563,10 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::DateStatistics* dateStatistics = pbStats.mutable_datestatistics();
+ proto::DateStatistics* dateStatistics =
pbStats.mutable_date_statistics();
if (_stats.hasMinimum()) {
dateStatistics->set_maximum(_stats.getMaximum());
dateStatistics->set_minimum(_stats.getMinimum());
@@ -706,10 +706,10 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics();
+ proto::DecimalStatistics* decStats =
pbStats.mutable_decimal_statistics();
if (_stats.hasMinimum()) {
decStats->set_minimum(_stats.getMinimum().toString(true));
decStats->set_maximum(_stats.getMaximum().toString(true));
@@ -883,10 +883,10 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::DoubleStatistics* doubleStats =
pbStats.mutable_doublestatistics();
+ proto::DoubleStatistics* doubleStats =
pbStats.mutable_double_statistics();
if (_stats.hasMinimum()) {
doubleStats->set_minimum(_stats.getMinimum());
doubleStats->set_maximum(_stats.getMaximum());
@@ -1051,10 +1051,10 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics();
+ proto::IntegerStatistics* intStats = pbStats.mutable_int_statistics();
if (_stats.hasMinimum()) {
intStats->set_minimum(_stats.getMinimum());
intStats->set_maximum(_stats.getMaximum());
@@ -1220,10 +1220,10 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::StringStatistics* strStats = pbStats.mutable_stringstatistics();
+ proto::StringStatistics* strStats = pbStats.mutable_string_statistics();
if (_stats.hasMinimum()) {
strStats->set_minimum(_stats.getMinimum());
strStats->set_maximum(_stats.getMaximum());
@@ -1408,24 +1408,24 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::TimestampStatistics* tsStats =
pbStats.mutable_timestampstatistics();
+ proto::TimestampStatistics* tsStats =
pbStats.mutable_timestamp_statistics();
if (_stats.hasMinimum()) {
- tsStats->set_minimumutc(_stats.getMinimum());
- tsStats->set_maximumutc(_stats.getMaximum());
+ tsStats->set_minimum_utc(_stats.getMinimum());
+ tsStats->set_maximum_utc(_stats.getMaximum());
if (_minimumNanos != DEFAULT_MIN_NANOS) {
- tsStats->set_minimumnanos(_minimumNanos + 1);
+ tsStats->set_minimum_nanos(_minimumNanos + 1);
}
if (_maximumNanos != DEFAULT_MAX_NANOS) {
- tsStats->set_maximumnanos(_maximumNanos + 1);
+ tsStats->set_maximum_nanos(_maximumNanos + 1);
}
} else {
- tsStats->clear_minimumutc();
- tsStats->clear_maximumutc();
- tsStats->clear_minimumnanos();
- tsStats->clear_maximumnanos();
+ tsStats->clear_minimum_utc();
+ tsStats->clear_maximum_utc();
+ tsStats->clear_minimum_nanos();
+ tsStats->clear_maximum_nanos();
}
}
@@ -1639,21 +1639,21 @@ namespace orc {
}
void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_hasnull(_stats.hasNull());
- pbStats.set_numberofvalues(_stats.getNumberOfValues());
+ pbStats.set_has_null(_stats.hasNull());
+ pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::CollectionStatistics* collectionStats =
pbStats.mutable_collectionstatistics();
+ proto::CollectionStatistics* collectionStats =
pbStats.mutable_collection_statistics();
if (_stats.hasMinimum()) {
- collectionStats->set_minchildren(_stats.getMinimum());
- collectionStats->set_maxchildren(_stats.getMaximum());
+ collectionStats->set_min_children(_stats.getMinimum());
+ collectionStats->set_max_children(_stats.getMaximum());
} else {
- collectionStats->clear_minchildren();
- collectionStats->clear_maxchildren();
+ collectionStats->clear_min_children();
+ collectionStats->clear_max_children();
}
if (_stats.hasSum()) {
- collectionStats->set_totalchildren(_stats.getSum());
+ collectionStats->set_total_children(_stats.getSum());
} else {
- collectionStats->clear_totalchildren();
+ collectionStats->clear_total_children();
}
}
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index 6b95a4dc4..8507e9576 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -81,7 +81,7 @@ namespace orc {
proto::Stream_Kind kind,
bool
shouldStream) const {
uint64_t offset = stripeStart;
- uint64_t dataEnd = stripeInfo.offset() + stripeInfo.indexlength() +
stripeInfo.datalength();
+ uint64_t dataEnd = stripeInfo.offset() + stripeInfo.index_length() +
stripeInfo.data_length();
MemoryPool* pool = reader.getFileContents().pool;
for (int i = 0; i < footer.streams_size(); ++i) {
const proto::Stream& stream = footer.streams(i);
@@ -94,8 +94,8 @@ namespace orc {
msg << "Malformed stream meta at stream index " << i << " in stripe
" << stripeIndex
<< ": streamOffset=" << offset << ", streamLength=" <<
streamLength
<< ", stripeOffset=" << stripeInfo.offset()
- << ", stripeIndexLength=" << stripeInfo.indexlength()
- << ", stripeDataLength=" << stripeInfo.datalength();
+ << ", stripeIndexLength=" << stripeInfo.index_length()
+ << ", stripeDataLength=" << stripeInfo.data_length();
throw ParseError(msg.str());
}
return createDecompressor(reader.getCompression(),
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index a3b748c6e..eae6ce0c3 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -198,12 +198,12 @@ namespace orc {
uint64_t getDictionarySize(uint64_t colId) const override {
ensureStripeFooterLoaded();
return static_cast<ColumnEncodingKind>(
- stripeFooter->columns(static_cast<int>(colId)).dictionarysize());
+ stripeFooter->columns(static_cast<int>(colId)).dictionary_size());
}
const std::string& getWriterTimezone() const override {
ensureStripeFooterLoaded();
- return stripeFooter->writertimezone();
+ return stripeFooter->writer_timezone();
}
};
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index c914d84f4..cf8aa0ad7 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -440,7 +440,7 @@ namespace orc {
case proto::Type_Kind_CHAR:
case proto::Type_Kind_VARCHAR:
- ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()),
type.maximumlength());
+ ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()),
type.maximum_length());
break;
case proto::Type_Kind_DECIMAL:
@@ -465,11 +465,11 @@ namespace orc {
case proto::Type_Kind_STRUCT: {
ret = std::make_unique<TypeImpl>(STRUCT);
- if (type.subtypes_size() > type.fieldnames_size())
- throw ParseError("Illegal STRUCT type that contains less fieldnames
than subtypes");
+ if (type.subtypes_size() > type.field_names_size())
+ throw ParseError("Illegal STRUCT type that contains less field_names
than subtypes");
for (int i = 0; i < type.subtypes_size(); ++i) {
ret->addStructField(
- type.fieldnames(i),
+ type.field_names(i),
convertType(footer.types(static_cast<int>(type.subtypes(i))),
footer));
}
break;
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 84c8a502e..e478fc7ac 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -440,25 +440,25 @@ namespace orc {
currentOffset += magicIdLength;
// Initialize file footer
- fileFooter.set_headerlength(currentOffset);
- fileFooter.set_contentlength(0);
- fileFooter.set_numberofrows(0);
-
fileFooter.set_rowindexstride(static_cast<uint32_t>(options.getRowIndexStride()));
+ fileFooter.set_header_length(currentOffset);
+ fileFooter.set_content_length(0);
+ fileFooter.set_number_of_rows(0);
+
fileFooter.set_row_index_stride(static_cast<uint32_t>(options.getRowIndexStride()));
fileFooter.set_writer(writerId);
- fileFooter.set_softwareversion(ORC_VERSION);
+ fileFooter.set_software_version(ORC_VERSION);
uint32_t index = 0;
buildFooterType(type, fileFooter, index);
// Initialize post script
- postScript.set_footerlength(0);
+ postScript.set_footer_length(0);
postScript.set_compression(WriterImpl::convertCompressionKind(options.getCompression()));
- postScript.set_compressionblocksize(options.getCompressionBlockSize());
+ postScript.set_compression_block_size(options.getCompressionBlockSize());
postScript.add_version(options.getFileVersion().getMajor());
postScript.add_version(options.getFileVersion().getMinor());
- postScript.set_writerversion(WriterVersion_ORC_135);
+ postScript.set_writer_version(WriterVersion_ORC_135);
postScript.set_magic("ORC");
// Initialize first stripe
@@ -467,10 +467,10 @@ namespace orc {
void WriterImpl::initStripe() {
stripeInfo.set_offset(currentOffset);
- stripeInfo.set_indexlength(0);
- stripeInfo.set_datalength(0);
- stripeInfo.set_footerlength(0);
- stripeInfo.set_numberofrows(0);
+ stripeInfo.set_index_length(0);
+ stripeInfo.set_data_length(0);
+ stripeInfo.set_footer_length(0);
+ stripeInfo.set_number_of_rows(0);
stripeRows = indexRows = 0;
}
@@ -507,14 +507,14 @@ namespace orc {
*stripeFooter.add_columns() = encodings[i];
}
- stripeFooter.set_writertimezone(options.getTimezoneName());
+ stripeFooter.set_writer_timezone(options.getTimezoneName());
// add stripe statistics to metadata
- proto::StripeStatistics* stripeStats = metadata.add_stripestats();
+ proto::StripeStatistics* stripeStats = metadata.add_stripe_stats();
std::vector<proto::ColumnStatistics> colStats;
columnWriter->getStripeStatistics(colStats);
for (uint32_t i = 0; i != colStats.size(); ++i) {
- *stripeStats->add_colstats() = colStats[i];
+ *stripeStats->add_col_stats() = colStats[i];
}
// merge stripe stats into file stats and clear stripe stats
columnWriter->mergeStripeStatsIntoFileStats();
@@ -537,10 +537,10 @@ namespace orc {
}
// update stripe info
- stripeInfo.set_indexlength(indexLength);
- stripeInfo.set_datalength(dataLength);
- stripeInfo.set_footerlength(footerLength);
- stripeInfo.set_numberofrows(stripeRows);
+ stripeInfo.set_index_length(indexLength);
+ stripeInfo.set_data_length(dataLength);
+ stripeInfo.set_footer_length(footerLength);
+ stripeInfo.set_number_of_rows(stripeRows);
*fileFooter.add_stripes() = stripeInfo;
@@ -556,12 +556,12 @@ namespace orc {
if (!metadata.SerializeToZeroCopyStream(compressionStream.get())) {
throw std::logic_error("Failed to write metadata.");
}
- postScript.set_metadatalength(compressionStream.get()->flush());
+ postScript.set_metadata_length(compressionStream.get()->flush());
}
void WriterImpl::writeFileFooter() {
- fileFooter.set_contentlength(currentOffset - fileFooter.headerlength());
- fileFooter.set_numberofrows(totalRows);
+ fileFooter.set_content_length(currentOffset - fileFooter.header_length());
+ fileFooter.set_number_of_rows(totalRows);
// update file statistics
std::vector<proto::ColumnStatistics> colStats;
@@ -574,7 +574,7 @@ namespace orc {
if (!fileFooter.SerializeToZeroCopyStream(compressionStream.get())) {
throw std::logic_error("Failed to write file footer.");
}
- postScript.set_footerlength(compressionStream->flush());
+ postScript.set_footer_length(compressionStream->flush());
}
void WriterImpl::writePostscript() {
@@ -588,7 +588,7 @@ namespace orc {
void WriterImpl::buildFooterType(const Type& t, proto::Footer& footer,
uint32_t& index) {
proto::Type protoType;
- protoType.set_maximumlength(static_cast<uint32_t>(t.getMaximumLength()));
+ protoType.set_maximum_length(static_cast<uint32_t>(t.getMaximumLength()));
protoType.set_precision(static_cast<uint32_t>(t.getPrecision()));
protoType.set_scale(static_cast<uint32_t>(t.getScale()));
@@ -686,7 +686,7 @@ namespace orc {
for (uint64_t i = 0; i < t.getSubtypeCount(); ++i) {
// only add subtypes' field names if this type is STRUCT
if (t.getKind() == STRUCT) {
- footer.mutable_types(pos)->add_fieldnames(t.getFieldName(i));
+ footer.mutable_types(pos)->add_field_names(t.getFieldName(i));
}
footer.mutable_types(pos)->add_subtypes(++index);
buildFooterType(*t.getSubtype(i), footer, index);
diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc
index 9e9f41c33..525901b1f 100644
--- a/c++/src/sargs/PredicateLeaf.cc
+++ b/c++/src/sargs/PredicateLeaf.cc
@@ -393,14 +393,14 @@ namespace orc {
static TruthValue evaluateBoolPredicate(const PredicateLeaf::Operator op,
const std::vector<Literal>& literals,
const proto::ColumnStatistics&
stats) {
- bool hasNull = stats.hasnull();
- if (!stats.has_bucketstatistics() || stats.bucketstatistics().count_size()
== 0) {
+ bool hasNull = stats.has_null();
+ if (!stats.has_bucket_statistics() ||
stats.bucket_statistics().count_size() == 0) {
// does not have bool stats
return hasNull ? TruthValue::YES_NO_NULL : TruthValue::YES_NO;
}
- auto trueCount = stats.bucketstatistics().count(0);
- auto falseCount = stats.numberofvalues() - trueCount;
+ auto trueCount = stats.bucket_statistics().count(0);
+ auto falseCount = stats.number_of_values() - trueCount;
switch (op) {
case PredicateLeaf::Operator::IS_NULL:
return hasNull ? TruthValue::YES_NO : TruthValue::NO;
@@ -509,77 +509,80 @@ namespace orc {
TruthValue result = TruthValue::YES_NO_NULL;
switch (mType) {
case PredicateDataType::LONG: {
- if (colStats.has_intstatistics() &&
colStats.intstatistics().has_minimum() &&
- colStats.intstatistics().has_maximum()) {
- const auto& stats = colStats.intstatistics();
+ if (colStats.has_int_statistics() &&
colStats.int_statistics().has_minimum() &&
+ colStats.int_statistics().has_maximum()) {
+ const auto& stats = colStats.int_statistics();
result = evaluatePredicateRange(mOperator, literal2Long(mLiterals),
stats.minimum(),
- stats.maximum(), colStats.hasnull());
+ stats.maximum(),
colStats.has_null());
}
break;
}
case PredicateDataType::FLOAT: {
- if (colStats.has_doublestatistics() &&
colStats.doublestatistics().has_minimum() &&
- colStats.doublestatistics().has_maximum()) {
- const auto& stats = colStats.doublestatistics();
+ if (colStats.has_double_statistics() &&
colStats.double_statistics().has_minimum() &&
+ colStats.double_statistics().has_maximum()) {
+ const auto& stats = colStats.double_statistics();
if (!std::isfinite(stats.sum())) {
- result = colStats.hasnull() ? TruthValue::YES_NO_NULL :
TruthValue::YES_NO;
+ result = colStats.has_null() ? TruthValue::YES_NO_NULL :
TruthValue::YES_NO;
} else {
result = evaluatePredicateRange(mOperator,
literal2Double(mLiterals), stats.minimum(),
- stats.maximum(),
colStats.hasnull());
+ stats.maximum(),
colStats.has_null());
}
}
break;
}
case PredicateDataType::STRING: {
/// TODO: check lowerBound and upperBound as well
- if (colStats.has_stringstatistics() &&
colStats.stringstatistics().has_minimum() &&
- colStats.stringstatistics().has_maximum()) {
- const auto& stats = colStats.stringstatistics();
+ if (colStats.has_string_statistics() &&
colStats.string_statistics().has_minimum() &&
+ colStats.string_statistics().has_maximum()) {
+ const auto& stats = colStats.string_statistics();
result = evaluatePredicateRange(mOperator,
literal2String(mLiterals), stats.minimum(),
- stats.maximum(), colStats.hasnull());
+ stats.maximum(),
colStats.has_null());
}
break;
}
case PredicateDataType::DATE: {
- if (colStats.has_datestatistics() &&
colStats.datestatistics().has_minimum() &&
- colStats.datestatistics().has_maximum()) {
- const auto& stats = colStats.datestatistics();
+ if (colStats.has_date_statistics() &&
colStats.date_statistics().has_minimum() &&
+ colStats.date_statistics().has_maximum()) {
+ const auto& stats = colStats.date_statistics();
result = evaluatePredicateRange(mOperator, literal2Date(mLiterals),
stats.minimum(),
- stats.maximum(), colStats.hasnull());
+ stats.maximum(),
colStats.has_null());
}
break;
}
case PredicateDataType::TIMESTAMP: {
- if (colStats.has_timestampstatistics() &&
colStats.timestampstatistics().has_minimumutc() &&
- colStats.timestampstatistics().has_maximumutc()) {
- const auto& stats = colStats.timestampstatistics();
+ if (colStats.has_timestamp_statistics() &&
+ colStats.timestamp_statistics().has_minimum_utc() &&
+ colStats.timestamp_statistics().has_maximum_utc()) {
+ const auto& stats = colStats.timestamp_statistics();
constexpr int32_t DEFAULT_MIN_NANOS = 0;
constexpr int32_t DEFAULT_MAX_NANOS = 999999;
- int32_t minNano = stats.has_minimumnanos() ? stats.minimumnanos() -
1 : DEFAULT_MIN_NANOS;
- int32_t maxNano = stats.has_maximumnanos() ? stats.maximumnanos() -
1 : DEFAULT_MAX_NANOS;
+ int32_t minNano =
+ stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 :
DEFAULT_MIN_NANOS;
+ int32_t maxNano =
+ stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 :
DEFAULT_MAX_NANOS;
Literal::Timestamp minTimestamp(
- stats.minimumutc() / 1000,
- static_cast<int32_t>((stats.minimumutc() % 1000) * 1000000) +
minNano);
+ stats.minimum_utc() / 1000,
+ static_cast<int32_t>((stats.minimum_utc() % 1000) * 1000000) +
minNano);
Literal::Timestamp maxTimestamp(
- stats.maximumutc() / 1000,
- static_cast<int32_t>((stats.maximumutc() % 1000) * 1000000) +
maxNano);
+ stats.maximum_utc() / 1000,
+ static_cast<int32_t>((stats.maximum_utc() % 1000) * 1000000) +
maxNano);
result = evaluatePredicateRange(mOperator,
literal2Timestamp(mLiterals), minTimestamp,
- maxTimestamp, colStats.hasnull());
+ maxTimestamp, colStats.has_null());
}
break;
}
case PredicateDataType::DECIMAL: {
- if (colStats.has_decimalstatistics() &&
colStats.decimalstatistics().has_minimum() &&
- colStats.decimalstatistics().has_maximum()) {
- const auto& stats = colStats.decimalstatistics();
+ if (colStats.has_decimal_statistics() &&
colStats.decimal_statistics().has_minimum() &&
+ colStats.decimal_statistics().has_maximum()) {
+ const auto& stats = colStats.decimal_statistics();
result = evaluatePredicateRange(mOperator,
literal2Decimal(mLiterals),
Decimal(stats.minimum()),
Decimal(stats.maximum()),
- colStats.hasnull());
+ colStats.has_null());
}
break;
}
case PredicateDataType::BOOLEAN: {
- if (colStats.has_bucketstatistics()) {
+ if (colStats.has_bucket_statistics()) {
result = evaluateBoolPredicate(mOperator, mLiterals, colStats);
}
break;
@@ -589,7 +592,7 @@ namespace orc {
}
// make sure null literal is respected for IN operator
- if (mOperator == Operator::IN && colStats.hasnull()) {
+ if (mOperator == Operator::IN && colStats.has_null()) {
for (const auto& literal : mLiterals) {
if (literal.isNull()) {
result = TruthValue::YES_NO_NULL;
@@ -698,12 +701,13 @@ namespace orc {
}
}
- bool allNull = colStats.hasnull() && colStats.numberofvalues() == 0;
+ bool allNull = colStats.has_null() && colStats.number_of_values() == 0;
if (mOperator == Operator::IS_NULL ||
((mOperator == Operator::EQUALS || mOperator ==
Operator::NULL_SAFE_EQUALS) &&
mLiterals.at(0).isNull())) {
// IS_NULL operator does not need to check min/max stats and bloom filter
- return allNull ? TruthValue::YES : (colStats.hasnull() ?
TruthValue::YES_NO : TruthValue::NO);
+ return allNull ? TruthValue::YES
+ : (colStats.has_null() ? TruthValue::YES_NO :
TruthValue::NO);
} else if (allNull) {
// if we don't have any value, everything must have been null
return TruthValue::IS_NULL;
@@ -711,7 +715,7 @@ namespace orc {
TruthValue result = evaluatePredicateMinMax(colStats);
if (shouldEvaluateBloomFilter(mOperator, result, bloomFilter)) {
- return evaluatePredicateBloomFiter(bloomFilter, colStats.hasnull());
+ return evaluatePredicateBloomFiter(bloomFilter, colStats.has_null());
} else {
return result;
}
diff --git a/c++/src/sargs/SargsApplier.cc b/c++/src/sargs/SargsApplier.cc
index 2cc3a7cf4..7032a8812 100644
--- a/c++/src/sargs/SargsApplier.cc
+++ b/c++/src/sargs/SargsApplier.cc
@@ -155,11 +155,11 @@ namespace orc {
bool SargsApplier::evaluateStripeStatistics(const proto::StripeStatistics&
stripeStats,
uint64_t stripeRowGroupCount) {
- if (stripeStats.colstats_size() == 0) {
+ if (stripeStats.col_stats_size() == 0) {
return true;
}
- bool ret = evaluateColumnStatistics(stripeStats.colstats());
+ bool ret = evaluateColumnStatistics(stripeStats.col_stats());
if (!ret) {
// reset mNextSkippedRows when the current stripe does not satisfy the
PPD
mNextSkippedRows.clear();
diff --git a/c++/test/CreateTestFiles.cc b/c++/test/CreateTestFiles.cc
index 56f86fd77..19956afbe 100644
--- a/c++/test/CreateTestFiles.cc
+++ b/c++/test/CreateTestFiles.cc
@@ -42,14 +42,14 @@ void writeCustomOrcFile(const std::string& filename, const
orc::proto::Metadata&
exit(1);
}
orc::proto::PostScript ps;
- ps.set_footerlength(static_cast<uint64_t>(footer.ByteSizeLong()));
+ ps.set_footer_length(static_cast<uint64_t>(footer.ByteSizeLong()));
ps.set_compression(orc::proto::NONE);
- ps.set_compressionblocksize(64 * 1024);
+ ps.set_compression_block_size(64 * 1024);
for (size_t i = 0; i < version.size(); ++i) {
ps.add_version(version[i]);
}
- ps.set_metadatalength(static_cast<uint64_t>(metadata.ByteSizeLong()));
- ps.set_writerversion(writerVersion);
+ ps.set_metadata_length(static_cast<uint64_t>(metadata.ByteSizeLong()));
+ ps.set_writer_version(writerVersion);
ps.set_magic("ORC");
if (!ps.SerializeToOstream(&output)) {
std::cerr << "Failed to write postscript for " << filename << "\n";
@@ -64,15 +64,15 @@ void writeCustomOrcFile(const std::string& filename, const
orc::proto::Metadata&
void writeVersion1999() {
orc::proto::Metadata meta;
orc::proto::Footer footer;
- footer.set_headerlength(3);
- footer.set_contentlength(3);
+ footer.set_header_length(3);
+ footer.set_content_length(3);
orc::proto::Type* type = footer.add_types();
type->set_kind(orc::proto::Type_Kind_STRUCT);
- footer.set_numberofrows(0);
- footer.set_rowindexstride(10000);
+ footer.set_number_of_rows(0);
+ footer.set_row_index_stride(10000);
orc::proto::ColumnStatistics* stats = footer.add_statistics();
- stats->set_numberofvalues(0);
- stats->set_hasnull(false);
+ stats->set_number_of_values(0);
+ stats->set_has_null(false);
std::vector<std::uint32_t> version;
version.push_back(19);
version.push_back(99);
diff --git a/c++/test/TestBloomFilter.cc b/c++/test/TestBloomFilter.cc
index 47ab717de..0b6cc9ebe 100644
--- a/c++/test/TestBloomFilter.cc
+++ b/c++/test/TestBloomFilter.cc
@@ -242,7 +242,7 @@ namespace orc {
proto::BloomFilter pbBloomFilter;
proto::ColumnEncoding encoding;
- encoding.set_bloomencoding(1);
+ encoding.set_bloom_encoding(1);
// serialize
BloomFilterUTF8Utils::serialize(srcBloomFilter, pbBloomFilter);
diff --git a/c++/test/TestBufferedOutputStream.cc
b/c++/test/TestBufferedOutputStream.cc
index 6735ac43d..98c492867 100644
--- a/c++/test/TestBufferedOutputStream.cc
+++ b/c++/test/TestBufferedOutputStream.cc
@@ -109,12 +109,12 @@ namespace orc {
BufferedOutputStream bufStream(*pool, &memStream, capacity, block,
&metrics);
proto::PostScript ps;
- ps.set_footerlength(197934);
+ ps.set_footer_length(197934);
ps.set_compression(proto::ZLIB);
ps.add_version(6);
ps.add_version(20);
- ps.set_metadatalength(100);
- ps.set_writerversion(789);
+ ps.set_metadata_length(100);
+ ps.set_writer_version(789);
ps.set_magic("protobuff_serialization");
EXPECT_TRUE(ps.SerializeToZeroCopyStream(&bufStream));
@@ -124,12 +124,12 @@ namespace orc {
proto::PostScript ps2;
ps2.ParseFromArray(memStream.getData(),
static_cast<int>(memStream.getLength()));
- EXPECT_EQ(ps.footerlength(), ps2.footerlength());
+ EXPECT_EQ(ps.footer_length(), ps2.footer_length());
EXPECT_EQ(ps.compression(), ps2.compression());
EXPECT_EQ(ps.version(0), ps2.version(0));
EXPECT_EQ(ps.version(1), ps2.version(1));
- EXPECT_EQ(ps.metadatalength(), ps2.metadatalength());
- EXPECT_EQ(ps.writerversion(), ps2.writerversion());
+ EXPECT_EQ(ps.metadata_length(), ps2.metadata_length());
+ EXPECT_EQ(ps.writer_version(), ps2.writer_version());
EXPECT_EQ(ps.magic(), ps2.magic());
}
} // namespace orc
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index 98f2d86bd..fcbf00763 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -339,7 +339,7 @@ namespace orc {
EXPECT_CALL(streams,
getEncoding(0)).WillRepeatedly(testing::Return(directEncoding));
proto::ColumnEncoding dictionaryEncoding;
dictionaryEncoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY);
- dictionaryEncoding.set_dictionarysize(2);
+ dictionaryEncoding.set_dictionary_size(2);
EXPECT_CALL(streams,
getEncoding(1)).WillRepeatedly(testing::Return(dictionaryEncoding));
// set getStream
@@ -438,12 +438,12 @@ namespace orc {
proto::ColumnEncoding dictionary2Encoding;
dictionary2Encoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY);
- dictionary2Encoding.set_dictionarysize(2);
+ dictionary2Encoding.set_dictionary_size(2);
EXPECT_CALL(streams,
getEncoding(1)).WillRepeatedly(testing::Return(dictionary2Encoding));
proto::ColumnEncoding dictionary0Encoding;
dictionary0Encoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY);
- dictionary0Encoding.set_dictionarysize(0);
+ dictionary0Encoding.set_dictionary_size(0);
EXPECT_CALL(streams, getEncoding(testing::Ge(2)))
.WillRepeatedly(testing::Return(dictionary0Encoding));
@@ -645,7 +645,7 @@ namespace orc {
EXPECT_CALL(streams,
getEncoding(testing::_)).WillRepeatedly(testing::Return(directEncoding));
proto::ColumnEncoding dictionaryEncoding;
dictionaryEncoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY);
- dictionaryEncoding.set_dictionarysize(100);
+ dictionaryEncoding.set_dictionary_size(100);
EXPECT_CALL(streams,
getEncoding(2)).WillRepeatedly(testing::Return(dictionaryEncoding));
// set getStream
@@ -4091,7 +4091,7 @@ namespace orc {
EXPECT_CALL(streams,
getEncoding(0)).WillRepeatedly(testing::Return(directEncoding));
proto::ColumnEncoding dictionaryEncoding;
dictionaryEncoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY);
- dictionaryEncoding.set_dictionarysize(2);
+ dictionaryEncoding.set_dictionary_size(2);
EXPECT_CALL(streams,
getEncoding(1)).WillRepeatedly(testing::Return(dictionaryEncoding));
// set getStream
diff --git a/c++/test/TestColumnStatistics.cc b/c++/test/TestColumnStatistics.cc
index 8a4c59b14..5cf2d9e41 100644
--- a/c++/test/TestColumnStatistics.cc
+++ b/c++/test/TestColumnStatistics.cc
@@ -449,10 +449,10 @@ namespace orc {
proto::ColumnStatistics pbStats;
tsStats->toProtoBuf(pbStats);
- EXPECT_EQ(100, pbStats.timestampstatistics().minimumutc());
- EXPECT_EQ(200, pbStats.timestampstatistics().maximumutc());
- EXPECT_FALSE(pbStats.timestampstatistics().has_minimumnanos());
- EXPECT_FALSE(pbStats.timestampstatistics().has_maximumnanos());
+ EXPECT_EQ(100, pbStats.timestamp_statistics().minimum_utc());
+ EXPECT_EQ(200, pbStats.timestamp_statistics().maximum_utc());
+ EXPECT_FALSE(pbStats.timestamp_statistics().has_minimum_nanos());
+ EXPECT_FALSE(pbStats.timestamp_statistics().has_maximum_nanos());
StatContext ctx(true, nullptr);
auto tsStatsFromPb =
std::make_unique<TimestampColumnStatisticsImpl>(pbStats, ctx);
@@ -465,12 +465,12 @@ namespace orc {
tsStats->update(500, 9999);
pbStats.Clear();
tsStats->toProtoBuf(pbStats);
- EXPECT_EQ(50, pbStats.timestampstatistics().minimumutc());
- EXPECT_EQ(500, pbStats.timestampstatistics().maximumutc());
- EXPECT_TRUE(pbStats.timestampstatistics().has_minimumnanos());
- EXPECT_TRUE(pbStats.timestampstatistics().has_maximumnanos());
- EXPECT_EQ(5555 + 1, pbStats.timestampstatistics().minimumnanos());
- EXPECT_EQ(9999 + 1, pbStats.timestampstatistics().maximumnanos());
+ EXPECT_EQ(50, pbStats.timestamp_statistics().minimum_utc());
+ EXPECT_EQ(500, pbStats.timestamp_statistics().maximum_utc());
+ EXPECT_TRUE(pbStats.timestamp_statistics().has_minimum_nanos());
+ EXPECT_TRUE(pbStats.timestamp_statistics().has_maximum_nanos());
+ EXPECT_EQ(5555 + 1, pbStats.timestamp_statistics().minimum_nanos());
+ EXPECT_EQ(9999 + 1, pbStats.timestamp_statistics().maximum_nanos());
tsStatsFromPb.reset(new TimestampColumnStatisticsImpl(pbStats, ctx));
EXPECT_EQ(50, tsStatsFromPb->getMinimum());
diff --git a/c++/test/TestCompression.cc b/c++/test/TestCompression.cc
index 2dba7b9a3..a77800a3d 100644
--- a/c++/test/TestCompression.cc
+++ b/c++/test/TestCompression.cc
@@ -195,10 +195,10 @@ namespace orc {
uint64_t block = 256;
proto::PostScript ps;
- ps.set_footerlength(197934);
+ ps.set_footer_length(197934);
ps.set_compression(protoKind);
- ps.set_metadatalength(100);
- ps.set_writerversion(789);
+ ps.set_metadata_length(100);
+ ps.set_writer_version(789);
ps.set_magic("protobuff_serialization");
for (uint32_t i = 0; i < 1024; ++i) {
ps.add_version(static_cast<uint32_t>(std::rand()));
@@ -219,10 +219,10 @@ namespace orc {
proto::PostScript ps2;
ps2.ParseFromZeroCopyStream(decompressStream.get());
- EXPECT_EQ(ps.footerlength(), ps2.footerlength());
+ EXPECT_EQ(ps.footer_length(), ps2.footer_length());
EXPECT_EQ(ps.compression(), ps2.compression());
- EXPECT_EQ(ps.metadatalength(), ps2.metadatalength());
- EXPECT_EQ(ps.writerversion(), ps2.writerversion());
+ EXPECT_EQ(ps.metadata_length(), ps2.metadata_length());
+ EXPECT_EQ(ps.writer_version(), ps2.writer_version());
EXPECT_EQ(ps.magic(), ps2.magic());
for (int i = 0; i < 1024; ++i) {
EXPECT_EQ(ps.version(i), ps2.version(i));
diff --git a/c++/test/TestPredicateLeaf.cc b/c++/test/TestPredicateLeaf.cc
index e0ab293d3..2703776e3 100644
--- a/c++/test/TestPredicateLeaf.cc
+++ b/c++/test/TestPredicateLeaf.cc
@@ -74,20 +74,20 @@ namespace orc {
static proto::ColumnStatistics createBooleanStats(uint64_t n, uint64_t
trueCount,
bool hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(n);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(n);
- proto::BucketStatistics* boolStats = colStats.mutable_bucketstatistics();
+ proto::BucketStatistics* boolStats = colStats.mutable_bucket_statistics();
boolStats->add_count(trueCount);
return colStats;
}
static proto::ColumnStatistics createIntStats(int64_t min, int64_t max, bool
hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(10);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(10);
- proto::IntegerStatistics* intStats = colStats.mutable_intstatistics();
+ proto::IntegerStatistics* intStats = colStats.mutable_int_statistics();
intStats->set_minimum(min);
intStats->set_maximum(max);
return colStats;
@@ -95,10 +95,10 @@ namespace orc {
static proto::ColumnStatistics createDoubleStats(double min, double max,
bool hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(10);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(10);
- proto::DoubleStatistics* doubleStats = colStats.mutable_doublestatistics();
+ proto::DoubleStatistics* doubleStats =
colStats.mutable_double_statistics();
const auto& curr_sum = min + max;
doubleStats->set_minimum(min);
doubleStats->set_maximum(max);
@@ -109,10 +109,10 @@ namespace orc {
static proto::ColumnStatistics createDecimalStats(Decimal min, Decimal max,
bool hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(10);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(10);
- proto::DecimalStatistics* decimalStats =
colStats.mutable_decimalstatistics();
+ proto::DecimalStatistics* decimalStats =
colStats.mutable_decimal_statistics();
decimalStats->set_minimum(min.toString(true));
decimalStats->set_maximum(max.toString(true));
return colStats;
@@ -120,10 +120,10 @@ namespace orc {
static proto::ColumnStatistics createDateStats(int32_t min, int32_t max,
bool hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(10);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(10);
- proto::DateStatistics* dateStats = colStats.mutable_datestatistics();
+ proto::DateStatistics* dateStats = colStats.mutable_date_statistics();
dateStats->set_minimum(min);
dateStats->set_maximum(max);
return colStats;
@@ -132,12 +132,12 @@ namespace orc {
static proto::ColumnStatistics createTimestampStats(int64_t min, int64_t max,
bool hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(10);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(10);
- proto::TimestampStatistics* tsStats =
colStats.mutable_timestampstatistics();
- tsStats->set_minimumutc(min);
- tsStats->set_maximumutc(max);
+ proto::TimestampStatistics* tsStats =
colStats.mutable_timestamp_statistics();
+ tsStats->set_minimum_utc(min);
+ tsStats->set_maximum_utc(max);
return colStats;
}
@@ -145,24 +145,24 @@ namespace orc {
int64_t maxSecond,
int32_t maxNano,
bool hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(10);
-
- proto::TimestampStatistics* tsStats =
colStats.mutable_timestampstatistics();
- tsStats->set_minimumutc(minSecond * 1000 + minNano / 1000000);
- tsStats->set_maximumutc(maxSecond * 1000 + maxNano / 1000000);
- tsStats->set_minimumnanos((minNano % 1000000) + 1);
- tsStats->set_maximumnanos((maxNano % 1000000) + 1);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(10);
+
+ proto::TimestampStatistics* tsStats =
colStats.mutable_timestamp_statistics();
+ tsStats->set_minimum_utc(minSecond * 1000 + minNano / 1000000);
+ tsStats->set_maximum_utc(maxSecond * 1000 + maxNano / 1000000);
+ tsStats->set_minimum_nanos((minNano % 1000000) + 1);
+ tsStats->set_maximum_nanos((maxNano % 1000000) + 1);
return colStats;
}
static proto::ColumnStatistics createStringStats(std::string min,
std::string max,
bool hasNull = false) {
proto::ColumnStatistics colStats;
- colStats.set_hasnull(hasNull);
- colStats.set_numberofvalues(10);
+ colStats.set_has_null(hasNull);
+ colStats.set_number_of_values(10);
- proto::StringStatistics* strStats = colStats.mutable_stringstatistics();
+ proto::StringStatistics* strStats = colStats.mutable_string_statistics();
strStats->set_minimum(min);
strStats->set_maximum(max);
return colStats;
diff --git a/c++/test/TestSargsApplier.cc b/c++/test/TestSargsApplier.cc
index ebf8f0a64..2ba927a5f 100644
--- a/c++/test/TestSargsApplier.cc
+++ b/c++/test/TestSargsApplier.cc
@@ -56,8 +56,8 @@ namespace orc {
static proto::ColumnStatistics createIntStats(int64_t min, int64_t max, bool
hasNull = false) {
proto::ColumnStatistics statistics;
- statistics.set_hasnull(hasNull);
- auto intStats = statistics.mutable_intstatistics();
+ statistics.set_has_null(hasNull);
+ auto intStats = statistics.mutable_int_statistics();
intStats->set_minimum(min);
intStats->set_maximum(max);
return statistics;
@@ -117,10 +117,10 @@ namespace orc {
{
orc::proto::StripeStatistics stripeStats;
proto::ColumnStatistics structStatistics;
- structStatistics.set_hasnull(false);
- *stripeStats.add_colstats() = structStatistics;
- *stripeStats.add_colstats() = createIntStats(0L, 10L);
- *stripeStats.add_colstats() = createIntStats(0L, 50L);
+ structStatistics.set_has_null(false);
+ *stripeStats.add_col_stats() = structStatistics;
+ *stripeStats.add_col_stats() = createIntStats(0L, 10L);
+ *stripeStats.add_col_stats() = createIntStats(0L, 50L);
ReaderMetrics metrics;
SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135,
&metrics);
EXPECT_FALSE(applier.evaluateStripeStatistics(stripeStats, 1));
@@ -131,10 +131,10 @@ namespace orc {
{
orc::proto::StripeStatistics stripeStats;
proto::ColumnStatistics structStatistics;
- structStatistics.set_hasnull(false);
- *stripeStats.add_colstats() = structStatistics;
- *stripeStats.add_colstats() = createIntStats(0L, 50L);
- *stripeStats.add_colstats() = createIntStats(0L, 50L);
+ structStatistics.set_has_null(false);
+ *stripeStats.add_col_stats() = structStatistics;
+ *stripeStats.add_col_stats() = createIntStats(0L, 50L);
+ *stripeStats.add_col_stats() = createIntStats(0L, 50L);
ReaderMetrics metrics;
SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135,
&metrics);
EXPECT_TRUE(applier.evaluateStripeStatistics(stripeStats, 1));
@@ -145,7 +145,7 @@ namespace orc {
{
orc::proto::Footer footer;
proto::ColumnStatistics structStatistics;
- structStatistics.set_hasnull(false);
+ structStatistics.set_has_null(false);
*footer.add_statistics() = structStatistics;
*footer.add_statistics() = createIntStats(0L, 10L);
*footer.add_statistics() = createIntStats(0L, 50L);
@@ -159,7 +159,7 @@ namespace orc {
{
orc::proto::Footer footer;
proto::ColumnStatistics structStatistics;
- structStatistics.set_hasnull(false);
+ structStatistics.set_has_null(false);
*footer.add_statistics() = structStatistics;
*footer.add_statistics() = createIntStats(0L, 50L);
*footer.add_statistics() = createIntStats(0L, 30L);
@@ -173,7 +173,7 @@ namespace orc {
{
orc::proto::Footer footer;
proto::ColumnStatistics structStatistics;
- structStatistics.set_hasnull(false);
+ structStatistics.set_has_null(false);
*footer.add_statistics() = structStatistics;
*footer.add_statistics() = createIntStats(0L, 50L);
*footer.add_statistics() = createIntStats(0L, 50L);
diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc
index c1d7e360f..c9ac2f285 100644
--- a/c++/test/TestType.cc
+++ b/c++/test/TestType.cc
@@ -393,14 +393,14 @@ namespace orc {
illStructType.set_kind(proto::Type_Kind_STRUCT);
structType.set_kind(proto::Type_Kind_STRUCT);
structType.add_subtypes(0); // construct a loop back to root
- structType.add_fieldnames("root");
+ structType.add_field_names("root");
illStructType.add_subtypes(1);
- illStructType.add_fieldnames("f1");
+ illStructType.add_field_names("f1");
illStructType.add_subtypes(2);
*(footer.add_types()) = illStructType;
*(footer.add_types()) = structType;
testCorruptHelper(illStructType, footer,
- "Illegal STRUCT type that contains less fieldnames than
subtypes");
+ "Illegal STRUCT type that contains less field_names than
subtypes");
}
void expectParseError(const proto::Footer& footer, const char* errMsg) {
@@ -421,33 +421,33 @@ namespace orc {
rootType.set_kind(proto::Type_Kind_STRUCT);
rootType.add_subtypes(1); // add a non existent type id
- rootType.add_fieldnames("f1");
+ rootType.add_field_names("f1");
*(footer.add_types()) = rootType;
expectParseError(footer, "Footer is corrupt: types(1) not exists");
footer.clear_types();
rootType.clear_subtypes();
- rootType.clear_fieldnames();
+ rootType.clear_field_names();
proto::Type structType;
structType.set_kind(proto::Type_Kind_STRUCT);
structType.add_subtypes(0); // construct a loop back to root
- structType.add_fieldnames("root");
+ structType.add_field_names("root");
rootType.add_subtypes(1);
- rootType.add_fieldnames("f1");
+ rootType.add_field_names("f1");
*(footer.add_types()) = rootType;
*(footer.add_types()) = structType;
expectParseError(footer, "Footer is corrupt: malformed link from type 1 to
0");
footer.clear_types();
rootType.clear_subtypes();
- rootType.clear_fieldnames();
+ rootType.clear_field_names();
proto::Type listType;
listType.set_kind(proto::Type_Kind_LIST);
proto::Type mapType;
mapType.set_kind(proto::Type_Kind_MAP);
proto::Type unionType;
unionType.set_kind(proto::Type_Kind_UNION);
- rootType.add_fieldnames("f1");
+ rootType.add_field_names("f1");
rootType.add_subtypes(1); // 0 -> 1
listType.add_subtypes(2); // 1 -> 2
mapType.add_subtypes(3); // 2 -> 3
@@ -460,15 +460,15 @@ namespace orc {
footer.clear_types();
rootType.clear_subtypes();
- rootType.clear_fieldnames();
+ rootType.clear_field_names();
proto::Type intType;
intType.set_kind(proto::Type_Kind_INT);
proto::Type strType;
strType.set_kind(proto::Type_Kind_STRING);
rootType.add_subtypes(2);
- rootType.add_fieldnames("f2");
+ rootType.add_field_names("f2");
rootType.add_subtypes(1);
- rootType.add_fieldnames("f1");
+ rootType.add_field_names("f1");
*(footer.add_types()) = rootType;
*(footer.add_types()) = intType;
*(footer.add_types()) = strType;
@@ -476,7 +476,7 @@ namespace orc {
footer.clear_types();
rootType.clear_subtypes();
- rootType.clear_fieldnames();
+ rootType.clear_field_names();
rootType.set_kind(proto::Type_Kind_STRUCT);
rootType.add_subtypes(1);
*(footer.add_types()) = rootType;
@@ -485,7 +485,7 @@ namespace orc {
"Footer is corrupt: STRUCT type 0 has 1 subTypes, but has
0 fieldNames");
// Should pass the check after adding the field name
footer.clear_types();
- rootType.add_fieldnames("f1");
+ rootType.add_field_names("f1");
*(footer.add_types()) = rootType;
*(footer.add_types()) = intType;
checkProtoTypes(footer);
diff --git a/cmake_modules/ThirdpartyToolchain.cmake
b/cmake_modules/ThirdpartyToolchain.cmake
index d92c926eb..58ec26221 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-set(ORC_FORMAT_VERSION "1.0.0-alpha")
+set(ORC_FORMAT_VERSION "1.0.0-beta")
set(LZ4_VERSION "1.9.3")
set(SNAPPY_VERSION "1.1.7")
set(ZLIB_VERSION "1.2.11")
@@ -73,7 +73,7 @@ endif ()
# ORC Format
ExternalProject_Add (orc-format_ep
URL
"https://github.com/apache/orc-format/archive/refs/tags/v${ORC_FORMAT_VERSION}.tar.gz"
- URL_HASH
SHA256=d04e878feec01dd9a3ce20553c0bfc70e856a319fe8693725a699bb077d0d286
+ URL_HASH
SHA256=28184fac3f182be5ead6e31972222b57754532ae94a89c13e9b9733378b60149
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 8eae7a7cd..bb6bc9e79 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -4157,9 +4157,9 @@ public class TestVectorOrcFile {
String m = e.getMessage();
assertTrue(m.contains("version1999.orc was written by a future ORC
version 19.99."));
assertTrue(m.contains("This file is not readable by this version of
ORC."));
- assertTrue(m.contains("Postscript: footerLength: 19 compression: NONE " +
- "compressionBlockSize: 65536 version: 19 version: 99 metadataLength:
0 " +
- "writerVersion: 1"));
+ assertTrue(m.contains("Postscript: footer_length: 19 compression: NONE "
+
+ "compression_block_size: 65536 version: 19 version: 99
metadata_length: 0 " +
+ "writer_version: 1"));
}
}
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
index 809d6f3b6..bfc444489 100644
--- a/tools/test/TestFileMetadata.cc
+++ b/tools/test/TestFileMetadata.cc
@@ -30,50 +30,50 @@ TEST(TestFileMetadata, testRaw) {
const std::string expected = "Raw file tail: " + file +
"\n"
"postscript {\n"
- " footerLength: 288\n"
+ " footer_length: 288\n"
" compression: NONE\n"
" version: 0\n"
" version: 12\n"
- " metadataLength: 526\n"
+ " metadata_length: 526\n"
" magic: \"ORC\"\n"
"}\n"
"footer {\n"
- " headerLength: 3\n"
- " contentLength: 245568\n"
+ " header_length: 3\n"
+ " content_length: 245568\n"
" stripes {\n"
" offset: 3\n"
- " indexLength: 137\n"
- " dataLength: 45282\n"
- " footerLength: 149\n"
- " numberOfRows: 5000\n"
+ " index_length: 137\n"
+ " data_length: 45282\n"
+ " footer_length: 149\n"
+ " number_of_rows: 5000\n"
" }\n"
" stripes {\n"
" offset: 45571\n"
- " indexLength: 137\n"
- " dataLength: 45282\n"
- " footerLength: 149\n"
- " numberOfRows: 5000\n"
+ " index_length: 137\n"
+ " data_length: 45282\n"
+ " footer_length: 149\n"
+ " number_of_rows: 5000\n"
" }\n"
" stripes {\n"
" offset: 91139\n"
- " indexLength: 137\n"
- " dataLength: 45282\n"
- " footerLength: 149\n"
- " numberOfRows: 5000\n"
+ " index_length: 137\n"
+ " data_length: 45282\n"
+ " footer_length: 149\n"
+ " number_of_rows: 5000\n"
" }\n"
" stripes {\n"
" offset: 136707\n"
- " indexLength: 138\n"
- " dataLength: 45283\n"
- " footerLength: 149\n"
- " numberOfRows: 5000\n"
+ " index_length: 138\n"
+ " data_length: 45283\n"
+ " footer_length: 149\n"
+ " number_of_rows: 5000\n"
" }\n"
" stripes {\n"
" offset: 200000\n"
- " indexLength: 137\n"
- " dataLength: 45282\n"
- " footerLength: 149\n"
- " numberOfRows: 5000\n"
+ " index_length: 137\n"
+ " data_length: 45282\n"
+ " footer_length: 149\n"
+ " number_of_rows: 5000\n"
" }\n"
" types {\n"
" kind: STRUCT\n"
@@ -82,11 +82,11 @@ TEST(TestFileMetadata, testRaw) {
" subtypes: 3\n"
" subtypes: 4\n"
" subtypes: 5\n"
- " fieldNames: \"userid\"\n"
- " fieldNames: \"string1\"\n"
- " fieldNames: \"subtype\"\n"
- " fieldNames: \"decimal1\"\n"
- " fieldNames: \"ts\"\n"
+ " field_names: \"userid\"\n"
+ " field_names: \"string1\"\n"
+ " field_names: \"subtype\"\n"
+ " field_names: \"decimal1\"\n"
+ " field_names: \"ts\"\n"
" }\n"
" types {\n"
" kind: LONG\n"
@@ -103,49 +103,49 @@ TEST(TestFileMetadata, testRaw) {
" types {\n"
" kind: TIMESTAMP\n"
" }\n"
- " numberOfRows: 25000\n"
+ " number_of_rows: 25000\n"
" statistics {\n"
- " numberOfValues: 25000\n"
+ " number_of_values: 25000\n"
" }\n"
" statistics {\n"
- " numberOfValues: 25000\n"
- " intStatistics {\n"
+ " number_of_values: 25000\n"
+ " int_statistics {\n"
" minimum: 2\n"
" maximum: 100\n"
" sum: 2499619\n"
" }\n"
" }\n"
" statistics {\n"
- " numberOfValues: 25000\n"
- " stringStatistics {\n"
+ " number_of_values: 25000\n"
+ " string_statistics {\n"
" minimum: \"bar\"\n"
" maximum: \"zebra\"\n"
" sum: 124990\n"
" }\n"
" }\n"
" statistics {\n"
- " numberOfValues: 25000\n"
- " doubleStatistics {\n"
+ " number_of_values: 25000\n"
+ " double_statistics {\n"
" minimum: 0.8\n"
" maximum: 80\n"
" sum: 200051.40000000002\n"
" }\n"
" }\n"
" statistics {\n"
- " numberOfValues: 25000\n"
- " decimalStatistics {\n"
+ " number_of_values: 25000\n"
+ " decimal_statistics {\n"
" minimum: \"0\"\n"
" maximum: \"5.5\"\n"
" sum: \"16.6\"\n"
" }\n"
" }\n"
" statistics {\n"
- " numberOfValues: 25000\n"
+ " number_of_values: 25000\n"
" }\n"
- " rowIndexStride: 10000\n"
+ " row_index_stride: 10000\n"
"}\n"
- "fileLength: 246402\n"
- "postscriptLength: 19\n";
+ "file_length: 246402\n"
+ "postscript_length: 19\n";
std::string output;
std::string error;