This is an automated email from the ASF dual-hosted git repository. gangwu pushed a commit to branch ORC-611 in repository https://gitbox.apache.org/repos/asf/orc.git
commit 7c863a02e32c4962f2a46d4e722f0b1eae88bffc Author: Gang Wu <[email protected]> AuthorDate: Tue Oct 27 17:05:12 2020 +0800 rename minimumNano to minimumNanos --- c++/include/orc/Statistics.hh | 4 ++-- c++/src/Statistics.cc | 10 +++++---- c++/src/Statistics.hh | 46 ++++++++++++++++++++-------------------- c++/src/sargs/PredicateLeaf.cc | 2 +- c++/test/TestColumnStatistics.cc | 40 +++++++++++++++++----------------- 5 files changed, 52 insertions(+), 50 deletions(-) diff --git a/c++/include/orc/Statistics.hh b/c++/include/orc/Statistics.hh index 7c6c03d..c7b0781 100644 --- a/c++/include/orc/Statistics.hh +++ b/c++/include/orc/Statistics.hh @@ -356,13 +356,13 @@ namespace orc { * Get the last 6 digits of nanosecond of minimum timestamp. * @return last 6 digits of nanosecond of minimum timestamp. */ - virtual int32_t getMinimumNano() const = 0; + virtual int32_t getMinimumNanos() const = 0; /** * Get the last 6 digits of nanosecond of maximum timestamp. * @return last 6 digits of nanosecond of maximum timestamp. */ - virtual int32_t getMaximumNano() const = 0; + virtual int32_t getMaximumNanos() const = 0; }; class Statistics { diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc index 5fdacb2..20e2fc9 100644 --- a/c++/src/Statistics.cc +++ b/c++/src/Statistics.cc @@ -317,8 +317,8 @@ namespace orc { _stats.setMaximum(0); _lowerBound = 0; _upperBound = 0; - _minimumNano = DEFAULT_MIN_NANOS; - _maximumNano = DEFAULT_MAX_NANOS; + _minimumNanos = DEFAULT_MIN_NANOS; + _maximumNanos = DEFAULT_MAX_NANOS; }else{ const proto::TimestampStatistics& stats = pb.timestampstatistics(); _stats.setHasMinimum( @@ -329,9 +329,11 @@ namespace orc { (stats.has_maximum() && (statContext.writerTimezone != nullptr))); _hasLowerBound = stats.has_minimumutc() || stats.has_minimum(); _hasUpperBound = stats.has_maximumutc() || stats.has_maximum(); - _minimumNano = stats.has_minimumnanos() ? + // to be consistent with java side, non-default minimumnanos and maximumnanos + // are added by one in their serialized form. + _minimumNanos = stats.has_minimumnanos() ? stats.minimumnanos() - 1 : DEFAULT_MIN_NANOS; - _maximumNano = stats.has_maximumnanos() ? + _maximumNanos = stats.has_maximumnanos() ? stats.maximumnanos() - 1 : DEFAULT_MAX_NANOS; // Timestamp stats are stored in milliseconds diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh index 43a48da..434a0ad 100644 --- a/c++/src/Statistics.hh +++ b/c++/src/Statistics.hh @@ -1214,8 +1214,8 @@ namespace orc { bool _hasUpperBound; int64_t _lowerBound; int64_t _upperBound; - int32_t _minimumNano; // last 6 digits of nanosecond of minimum timestamp - int32_t _maximumNano; // last 6 digits of nanosecond of maximum timestamp + int32_t _minimumNanos; // last 6 digits of nanosecond of minimum timestamp + int32_t _maximumNanos; // last 6 digits of nanosecond of maximum timestamp static constexpr int32_t DEFAULT_MIN_NANOS = 0; static constexpr int32_t DEFAULT_MAX_NANOS = 999999; @@ -1289,18 +1289,18 @@ namespace orc { _stats.setHasMaximum(true); _stats.setMinimum(milli); _stats.setMaximum(milli); - _maximumNano = _minimumNano = nano; + _maximumNanos = _minimumNanos = nano; } else { if (milli <= _stats.getMinimum()) { - if (milli < _stats.getMinimum() || nano < _minimumNano) { - _minimumNano = nano; + if (milli < _stats.getMinimum() || nano < _minimumNanos) { + _minimumNanos = nano; } _stats.setMinimum(milli); } if (milli >= _stats.getMaximum()) { - if (milli > _stats.getMaximum() || nano > _maximumNano) { - _maximumNano = nano; + if (milli > _stats.getMaximum() || nano > _maximumNanos) { + _maximumNanos = nano; } _stats.setMaximum(milli); } @@ -1320,20 +1320,20 @@ namespace orc { _stats.setHasMaximum(true); _stats.setMinimum(tsStats.getMinimum()); _stats.setMaximum(tsStats.getMaximum()); - _minimumNano = tsStats.getMinimumNano(); - _maximumNano = tsStats.getMaximumNano(); + _minimumNanos = tsStats.getMinimumNanos(); + _maximumNanos = tsStats.getMaximumNanos(); } else { if (tsStats.getMaximum() >= _stats.getMaximum()) { if (tsStats.getMaximum() > _stats.getMaximum() || - tsStats.getMaximumNano() > _maximumNano) { - _maximumNano = tsStats.getMaximumNano(); + tsStats.getMaximumNanos() > _maximumNanos) { + _maximumNanos = tsStats.getMaximumNanos(); } _stats.setMaximum(tsStats.getMaximum()); } if (tsStats.getMinimum() <= _stats.getMinimum()) { if (tsStats.getMinimum() < _stats.getMinimum() || - tsStats.getMinimumNano() < _minimumNano) { - _minimumNano = tsStats.getMinimumNano(); + tsStats.getMinimumNanos() < _minimumNanos) { + _minimumNanos = tsStats.getMinimumNanos(); } _stats.setMinimum(tsStats.getMinimum()); } @@ -1343,8 +1343,8 @@ namespace orc { void reset() override { _stats.reset(); - _minimumNano = DEFAULT_MIN_NANOS; - _maximumNano = DEFAULT_MAX_NANOS; + _minimumNanos = DEFAULT_MIN_NANOS; + _maximumNanos = DEFAULT_MAX_NANOS; } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { @@ -1356,11 +1356,11 @@ namespace orc { if (_stats.hasMinimum()) { tsStats->set_minimumutc(_stats.getMinimum()); tsStats->set_maximumutc(_stats.getMaximum()); - if (_minimumNano != DEFAULT_MIN_NANOS) { - tsStats->set_minimumnanos(_minimumNano + 1); + if (_minimumNanos != DEFAULT_MIN_NANOS) { + tsStats->set_minimumnanos(_minimumNanos + 1); } - if (_maximumNano != DEFAULT_MAX_NANOS) { - tsStats->set_maximumnanos(_maximumNano + 1); + if (_maximumNanos != DEFAULT_MAX_NANOS) { + tsStats->set_maximumnanos(_maximumNanos + 1); } } else { tsStats->clear_minimumutc(); @@ -1446,17 +1446,17 @@ namespace orc { } } - int32_t getMinimumNano() const override { + int32_t getMinimumNanos() const override { if (hasMinimum()) { - return _minimumNano; + return _minimumNanos; } else { throw ParseError("Minimum is not defined."); } } - int32_t getMaximumNano() const override { + int32_t getMaximumNanos() const override { if (hasMaximum()) { - return _maximumNano; + return _maximumNanos; } else { throw ParseError("Maximum is not defined."); } diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc index 7378f54..cede6d1 100644 --- a/c++/src/sargs/PredicateLeaf.cc +++ b/c++/src/sargs/PredicateLeaf.cc @@ -510,7 +510,7 @@ namespace orc { break; } case PredicateDataType::STRING: { - ///FIXME: check lowerBound and upperBound as well + ///TODO: check lowerBound and upperBound as well if (colStats.has_stringstatistics() && colStats.stringstatistics().has_minimum() && colStats.stringstatistics().has_maximum()) { diff --git a/c++/test/TestColumnStatistics.cc b/c++/test/TestColumnStatistics.cc index 11a5af8..4d187b9 100644 --- a/c++/test/TestColumnStatistics.cc +++ b/c++/test/TestColumnStatistics.cc @@ -257,14 +257,14 @@ namespace orc { tsStats->update(100); EXPECT_EQ(100, tsStats->getMaximum()); EXPECT_EQ(100, tsStats->getMinimum()); - EXPECT_EQ(0, tsStats->getMinimumNano()); - EXPECT_EQ(999999, tsStats->getMaximumNano()); + EXPECT_EQ(0, tsStats->getMinimumNanos()); + EXPECT_EQ(999999, tsStats->getMaximumNanos()); tsStats->update(150); EXPECT_EQ(150, tsStats->getMaximum()); EXPECT_EQ(100, tsStats->getMinimum()); - EXPECT_EQ(0, tsStats->getMinimumNano()); - EXPECT_EQ(999999, tsStats->getMaximumNano()); + EXPECT_EQ(0, tsStats->getMinimumNanos()); + EXPECT_EQ(999999, tsStats->getMaximumNanos()); // test merge std::unique_ptr<TimestampColumnStatisticsImpl> other( @@ -276,8 +276,8 @@ namespace orc { tsStats->merge(*other); EXPECT_EQ(160, tsStats->getMaximum()); EXPECT_EQ(90, tsStats->getMinimum()); - EXPECT_EQ(0, tsStats->getMinimumNano()); - EXPECT_EQ(999999, tsStats->getMaximumNano()); + EXPECT_EQ(0, tsStats->getMinimumNanos()); + EXPECT_EQ(999999, tsStats->getMaximumNanos()); } TEST(ColumnStatistics, dateColumnStatistics) { @@ -400,9 +400,9 @@ namespace orc { tsStats->increase(1); } EXPECT_EQ(102400, tsStats->getMaximum()); - EXPECT_EQ(1024000, tsStats->getMaximumNano()); + EXPECT_EQ(1024000, tsStats->getMaximumNanos()); EXPECT_EQ(100, tsStats->getMinimum()); - EXPECT_EQ(1000, tsStats->getMinimumNano()); + EXPECT_EQ(1000, tsStats->getMinimumNanos()); // update with same milli but different nanos tsStats->update(102400, 1024001); @@ -410,9 +410,9 @@ namespace orc { tsStats->update(100, 1001); tsStats->update(100, 999); EXPECT_EQ(102400, tsStats->getMaximum()); - EXPECT_EQ(1024001, tsStats->getMaximumNano()); + EXPECT_EQ(1024001, tsStats->getMaximumNanos()); EXPECT_EQ(100, tsStats->getMinimum()); - EXPECT_EQ(999, tsStats->getMinimumNano()); + EXPECT_EQ(999, tsStats->getMinimumNanos()); // test merge with no change std::unique_ptr<TimestampColumnStatisticsImpl> other1( @@ -423,9 +423,9 @@ namespace orc { } tsStats->merge(*other1); EXPECT_EQ(102400, tsStats->getMaximum()); - EXPECT_EQ(1024001, tsStats->getMaximumNano()); + EXPECT_EQ(1024001, tsStats->getMaximumNanos()); EXPECT_EQ(100, tsStats->getMinimum()); - EXPECT_EQ(999, tsStats->getMinimumNano()); + EXPECT_EQ(999, tsStats->getMinimumNanos()); // test merge with min/max change only in nano std::unique_ptr<TimestampColumnStatisticsImpl> other2( @@ -434,9 +434,9 @@ namespace orc { other2->update(100, 998); tsStats->merge(*other2); EXPECT_EQ(102400, tsStats->getMaximum()); - EXPECT_EQ(1024002, tsStats->getMaximumNano()); + EXPECT_EQ(1024002, tsStats->getMaximumNanos()); EXPECT_EQ(100, tsStats->getMinimum()); - EXPECT_EQ(998, tsStats->getMinimumNano()); + EXPECT_EQ(998, tsStats->getMinimumNanos()); // test merge with min/max change in milli std::unique_ptr<TimestampColumnStatisticsImpl> other3( @@ -445,9 +445,9 @@ namespace orc { other3->update(99, 1); tsStats->merge(*other3); EXPECT_EQ(102401, tsStats->getMaximum()); - EXPECT_EQ(1, tsStats->getMaximumNano()); + EXPECT_EQ(1, tsStats->getMaximumNanos()); EXPECT_EQ(99, tsStats->getMinimum()); - EXPECT_EQ(1, tsStats->getMinimumNano()); + EXPECT_EQ(1, tsStats->getMinimumNanos()); } TEST(ColumnStatistics, timestampColumnStatisticsProbubuf) { @@ -469,8 +469,8 @@ namespace orc { new TimestampColumnStatisticsImpl(pbStats, ctx)); EXPECT_EQ(100, tsStatsFromPb->getMinimum()); EXPECT_EQ(200, tsStatsFromPb->getMaximum()); - EXPECT_EQ(0, tsStatsFromPb->getMinimumNano()); - EXPECT_EQ(999999, tsStatsFromPb->getMaximumNano()); + EXPECT_EQ(0, tsStatsFromPb->getMinimumNanos()); + EXPECT_EQ(999999, tsStatsFromPb->getMaximumNanos()); tsStats->update(50, 5555); tsStats->update(500, 9999); @@ -486,8 +486,8 @@ namespace orc { tsStatsFromPb.reset(new TimestampColumnStatisticsImpl(pbStats, ctx)); EXPECT_EQ(50, tsStatsFromPb->getMinimum()); EXPECT_EQ(500, tsStatsFromPb->getMaximum()); - EXPECT_EQ(5555, tsStatsFromPb->getMinimumNano()); - EXPECT_EQ(9999, tsStatsFromPb->getMaximumNano()); + EXPECT_EQ(5555, tsStatsFromPb->getMinimumNanos()); + EXPECT_EQ(9999, tsStatsFromPb->getMaximumNanos()); } }
