This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new 3f0ee8d ORC-443: [C++] Code improvements in ColumnWriter.
3f0ee8d is described below
commit 3f0ee8d1084e413de0727208651e47dc3f11821c
Author: Fang Zheng <[email protected]>
AuthorDate: Mon Dec 3 10:40:35 2018 -0800
ORC-443: [C++] Code improvements in ColumnWriter.
Fixes #344
Signed-off-by: Gang Wu <[email protected]>
---
c++/src/ByteRLE.cc | 8 +-
c++/src/ColumnWriter.cc | 244 ++++++++++++++++++++++++++++--------------------
2 files changed, 148 insertions(+), 104 deletions(-)
diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc
index 2664019..4bf8b7a 100644
--- a/c++/src/ByteRLE.cc
+++ b/c++/src/ByteRLE.cc
@@ -40,7 +40,7 @@ namespace orc {
virtual ~ByteRleEncoderImpl() override;
/**
- * Encode the next batch of values
+ * Encode the next batch of values.
* @param data to be encoded
* @param numValues the number of values to be encoded
* @param notNull If the pointer is null, all values are read. If the
@@ -55,8 +55,8 @@ namespace orc {
virtual uint64_t getBufferSize() const override;
/**
- * Flushing underlying BufferedOutputStream
- */
+ * Flush underlying BufferedOutputStream.
+ */
virtual uint64_t flush() override;
virtual void recordPosition(PositionRecorder* recorder) const override;
@@ -122,7 +122,7 @@ namespace orc {
writeByte(
static_cast<char>(numLiterals - static_cast<int>(MINIMUM_REPEAT)));
writeByte(literals[0]);
- } else {
+ } else {
writeByte(static_cast<char>(-numLiterals));
for (int i = 0; i < numLiterals; ++i) {
writeByte(literals[i]);
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index c6c30f3..ef31c45 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -274,13 +274,13 @@ namespace orc {
ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
const StructVectorBatch* structBatch =
dynamic_cast<const StructVectorBatch *>(&rowBatch);
if (structBatch == nullptr) {
throw InvalidArgument("Failed to cast to StructVectorBatch");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
for (uint32_t i = 0; i < children.size(); ++i) {
children[i]->add(*structBatch->fields[i], offset, numValues);
}
@@ -289,15 +289,17 @@ namespace orc {
if (!structBatch->hasNulls) {
colIndexStatistics->increase(numValues);
} else {
+ uint64_t count = 0;
bool hasNull = false;
const char* notNull = structBatch->notNull.data() + offset;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull[i]) {
- colIndexStatistics->increase(1);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
}
+ colIndexStatistics->increase(count);
if (hasNull) {
colIndexStatistics->setHasNull(true);
}
@@ -445,13 +447,18 @@ namespace orc {
ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
-
const LongVectorBatch* longBatch =
dynamic_cast<const LongVectorBatch*>(&rowBatch);
if (longBatch == nullptr) {
throw InvalidArgument("Failed to cast to LongVectorBatch");
}
+ IntegerColumnStatisticsImpl* intStats =
+ dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
+ if (intStats == nullptr) {
+ throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl");
+ }
+
+ ColumnWriter::add(rowBatch, offset, numValues);
const int64_t* data = longBatch->data.data() + offset;
const char* notNull = longBatch->hasNulls ?
@@ -460,21 +467,17 @@ namespace orc {
rleEncoder->add(data, numValues, notNull);
// update stats
- IntegerColumnStatisticsImpl* intStats =
- dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
- if (intStats == nullptr) {
- throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl");
- }
-
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull == nullptr || notNull[i]) {
- intStats->increase(1);
+ ++count;
intStats->update(data[i], 1);
} else if (!hasNull) {
hasNull = true;
}
}
+ intStats->increase(count);
if (hasNull) {
intStats->setHasNull(true);
}
@@ -549,12 +552,17 @@ namespace orc {
void ByteColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
-
LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch);
if (byteBatch == nullptr) {
throw InvalidArgument("Failed to cast to LongVectorBatch");
}
+ IntegerColumnStatisticsImpl* intStats =
+ dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
+ if (intStats == nullptr) {
+ throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl");
+ }
+
+ ColumnWriter::add(rowBatch, offset, numValues);
int64_t* data = byteBatch->data.data() + offset;
const char* notNull = byteBatch->hasNulls ?
@@ -566,20 +574,17 @@ namespace orc {
}
byteRleEncoder->add(byteData, numValues, notNull);
- IntegerColumnStatisticsImpl* intStats =
- dynamic_cast<IntegerColumnStatisticsImpl*>(colIndexStatistics.get());
- if (intStats == nullptr) {
- throw InvalidArgument("Failed to cast to IntegerColumnStatisticsImpl");
- }
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull == nullptr || notNull[i]) {
- intStats->increase(1);
+ ++count;
intStats->update(static_cast<int64_t>(byteData[i]), 1);
} else if (!hasNull) {
hasNull = true;
}
}
+ intStats->increase(count);
if (hasNull) {
intStats->setHasNull(true);
}
@@ -654,12 +659,18 @@ namespace orc {
void BooleanColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
-
LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(&rowBatch);
if (byteBatch == nullptr) {
throw InvalidArgument("Failed to cast to LongVectorBatch");
}
+ BooleanColumnStatisticsImpl* boolStats =
+ dynamic_cast<BooleanColumnStatisticsImpl*>(colIndexStatistics.get());
+ if (boolStats == nullptr) {
+ throw InvalidArgument("Failed to cast to BooleanColumnStatisticsImpl");
+ }
+
+ ColumnWriter::add(rowBatch, offset, numValues);
+
int64_t* data = byteBatch->data.data() + offset;
const char* notNull = byteBatch->hasNulls ?
byteBatch->notNull.data() + offset : nullptr;
@@ -670,20 +681,17 @@ namespace orc {
}
rleEncoder->add(byteData, numValues, notNull);
- BooleanColumnStatisticsImpl* boolStats =
- dynamic_cast<BooleanColumnStatisticsImpl*>(colIndexStatistics.get());
- if (boolStats == nullptr) {
- throw InvalidArgument("Failed to cast to BooleanColumnStatisticsImpl");
- }
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull == nullptr || notNull[i]) {
- boolStats->increase(1);
+ ++count;
boolStats->update(byteData[i] != 0, 1);
} else if (!hasNull) {
hasNull = true;
}
}
+ boolStats->increase(count);
if (hasNull) {
boolStats->setHasNull(true);
}
@@ -774,25 +782,26 @@ namespace orc {
void DoubleColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
const DoubleVectorBatch* dblBatch =
dynamic_cast<const DoubleVectorBatch*>(&rowBatch);
if (dblBatch == nullptr) {
throw InvalidArgument("Failed to cast to DoubleVectorBatch");
}
-
- const double* doubleData = dblBatch->data.data() + offset;
- const char* notNull = dblBatch->hasNulls ?
- dblBatch->notNull.data() + offset : nullptr;
-
DoubleColumnStatisticsImpl* doubleStats =
dynamic_cast<DoubleColumnStatisticsImpl*>(colIndexStatistics.get());
if (doubleStats == nullptr) {
throw InvalidArgument("Failed to cast to DoubleColumnStatisticsImpl");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
+ const double* doubleData = dblBatch->data.data() + offset;
+ const char* notNull = dblBatch->hasNulls ?
+ dblBatch->notNull.data() + offset : nullptr;
+
size_t bytes = isFloat ? 4 : 8;
char* data = buffer.data();
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
@@ -802,13 +811,13 @@ namespace orc {
encodeFloatNum<double, int64_t>(doubleData[i], data);
}
dataStream->write(data, bytes);
-
- doubleStats->increase(1);
+ ++count;
doubleStats->update(doubleData[i]);
} else if (!hasNull) {
hasNull = true;
}
}
+ doubleStats->increase(count);
if (hasNull) {
doubleStats->setHasNull(true);
}
@@ -1071,13 +1080,20 @@ namespace orc {
void StringColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
const StringVectorBatch* stringBatch =
dynamic_cast<const StringVectorBatch*>(&rowBatch);
if (stringBatch == nullptr) {
throw InvalidArgument("Failed to cast to StringVectorBatch");
}
+ StringColumnStatisticsImpl* strStats =
+ dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get());
+ if (strStats == nullptr) {
+ throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl");
+ }
+
+ ColumnWriter::add(rowBatch, offset, numValues);
+
char *const * data = stringBatch->data.data() + offset;
const int64_t* length = stringBatch->length.data() + offset;
const char* notNull = stringBatch->hasNulls ?
@@ -1087,26 +1103,24 @@ namespace orc {
directLengthEncoder->add(length, numValues, notNull);
}
- StringColumnStatisticsImpl* strStats =
- dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get());
- if (strStats == nullptr) {
- throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl");
- }
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
+ const size_t len = static_cast<size_t>(length[i]);
if (useDictionary) {
- size_t index = dictionary.insert(data[i],
static_cast<size_t>(length[i]));
+ size_t index = dictionary.insert(data[i], len);
dictionary.idxInDictBuffer.push_back(static_cast<int64_t>(index));
} else {
- directDataStream->write(data[i], static_cast<size_t>(length[i]));
+ directDataStream->write(data[i], len);
}
- strStats->update(data[i], static_cast<size_t>(length[i]));
- strStats->increase(1);
+ strStats->update(data[i], len);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
}
+ strStats->increase(count);
if (hasNull) {
strStats->setHasNull(true);
}
@@ -1432,23 +1446,25 @@ namespace orc {
void CharColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
StringVectorBatch* charsBatch =
dynamic_cast<StringVectorBatch*>(&rowBatch);
if (charsBatch == nullptr) {
throw InvalidArgument("Failed to cast to StringVectorBatch");
}
- char** data = charsBatch->data.data() + offset;
- int64_t* length = charsBatch->length.data() + offset;
- const char* notNull = charsBatch->hasNulls ?
- charsBatch->notNull.data() + offset : nullptr;
-
StringColumnStatisticsImpl* strStats =
dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get());
if (strStats == nullptr) {
throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
+ char** data = charsBatch->data.data() + offset;
+ int64_t* length = charsBatch->length.data() + offset;
+ const char* notNull = charsBatch->hasNulls ?
+ charsBatch->notNull.data() + offset : nullptr;
+
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
@@ -1477,7 +1493,7 @@ namespace orc {
}
strStats->update(charData, static_cast<size_t>(length[i]));
- strStats->increase(1);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
@@ -1487,6 +1503,7 @@ namespace orc {
directLengthEncoder->add(length, numValues, notNull);
}
+ strStats->increase(count);
if (hasNull) {
strStats->setHasNull(true);
}
@@ -1513,23 +1530,25 @@ namespace orc {
void VarCharColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
StringVectorBatch* charsBatch =
dynamic_cast<StringVectorBatch*>(&rowBatch);
if (charsBatch == nullptr) {
throw InvalidArgument("Failed to cast to StringVectorBatch");
}
- char* const* data = charsBatch->data.data() + offset;
- int64_t* length = charsBatch->length.data() + offset;
- const char* notNull = charsBatch->hasNulls ?
- charsBatch->notNull.data() + offset : nullptr;
-
StringColumnStatisticsImpl* strStats =
dynamic_cast<StringColumnStatisticsImpl*>(colIndexStatistics.get());
if (strStats == nullptr) {
throw InvalidArgument("Failed to cast to StringColumnStatisticsImpl");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
+ char* const* data = charsBatch->data.data() + offset;
+ int64_t* length = charsBatch->length.data() + offset;
+ const char* notNull = charsBatch->hasNulls ?
+ charsBatch->notNull.data() + offset : nullptr;
+
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
@@ -1545,7 +1564,7 @@ namespace orc {
}
strStats->update(data[i], static_cast<size_t>(length[i]));
- strStats->increase(1);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
@@ -1555,6 +1574,7 @@ namespace orc {
directLengthEncoder->add(length, numValues, notNull);
}
+ strStats->increase(count);
if (hasNull) {
strStats->setHasNull(true);
}
@@ -1577,16 +1597,10 @@ namespace orc {
void BinaryColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
-
StringVectorBatch* binBatch = dynamic_cast<StringVectorBatch*>(&rowBatch);
if (binBatch == nullptr) {
throw InvalidArgument("Failed to cast to StringVectorBatch");
}
- char** data = binBatch->data.data() + offset;
- int64_t* length = binBatch->length.data() + offset;
- const char* notNull = binBatch->hasNulls ?
- binBatch->notNull.data() + offset : nullptr;
BinaryColumnStatisticsImpl* binStats =
dynamic_cast<BinaryColumnStatisticsImpl*>(colIndexStatistics.get());
@@ -1594,6 +1608,14 @@ namespace orc {
throw InvalidArgument("Failed to cast to BinaryColumnStatisticsImpl");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
+ char** data = binBatch->data.data() + offset;
+ int64_t* length = binBatch->length.data() + offset;
+ const char* notNull = binBatch->hasNulls ?
+ binBatch->notNull.data() + offset : nullptr;
+
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
uint64_t unsignedLength = static_cast<uint64_t>(length[i]);
@@ -1601,12 +1623,13 @@ namespace orc {
directDataStream->write(data[i], unsignedLength);
binStats->update(unsignedLength);
- binStats->increase(1);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
}
directLengthEncoder->add(length, numValues, notNull);
+ binStats->increase(count);
if (hasNull) {
binStats->setHasNull(true);
}
@@ -1689,29 +1712,32 @@ namespace orc {
void TimestampColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
TimestampVectorBatch* tsBatch =
dynamic_cast<TimestampVectorBatch*>(&rowBatch);
if (tsBatch == nullptr) {
throw InvalidArgument("Failed to cast to TimestampVectorBatch");
}
- const char* notNull = tsBatch->hasNulls ?
- tsBatch->notNull.data() + offset : nullptr;
- int64_t *secs = tsBatch->data.data() + offset;
- int64_t *nanos = tsBatch->nanoseconds.data() + offset;
-
TimestampColumnStatisticsImpl* tsStats =
dynamic_cast<TimestampColumnStatisticsImpl*>(colIndexStatistics.get());
if (tsStats == nullptr) {
throw InvalidArgument("Failed to cast to TimestampColumnStatisticsImpl");
}
+
+ ColumnWriter::add(rowBatch, offset, numValues);
+
+ const char* notNull = tsBatch->hasNulls ?
+ tsBatch->notNull.data() + offset : nullptr;
+ int64_t *secs = tsBatch->data.data() + offset;
+ int64_t *nanos = tsBatch->nanoseconds.data() + offset;
+
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull == nullptr || notNull[i]) {
// TimestampVectorBatch already stores data in UTC
int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000;
- tsStats->increase(1);
+ ++count;
tsStats->update(millsUTC);
if (secs[i] < 0 && nanos[i] != 0) {
@@ -1724,6 +1750,7 @@ namespace orc {
hasNull = true;
}
}
+ tsStats->increase(count);
if (hasNull) {
tsStats->setHasNull(true);
}
@@ -1791,33 +1818,37 @@ namespace orc {
void DateColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
const LongVectorBatch* longBatch =
dynamic_cast<const LongVectorBatch*>(&rowBatch);
if (longBatch == nullptr) {
throw InvalidArgument("Failed to cast to LongVectorBatch");
}
+ DateColumnStatisticsImpl* dateStats =
+ dynamic_cast<DateColumnStatisticsImpl*>(colIndexStatistics.get());
+ if (dateStats == nullptr) {
+ throw InvalidArgument("Failed to cast to DateColumnStatisticsImpl");
+ }
+
+ ColumnWriter::add(rowBatch, offset, numValues);
+
const int64_t* data = longBatch->data.data() + offset;
const char* notNull = longBatch->hasNulls ?
longBatch->notNull.data() + offset : nullptr;
rleEncoder->add(data, numValues, notNull);
- DateColumnStatisticsImpl* dateStats =
- dynamic_cast<DateColumnStatisticsImpl*>(colIndexStatistics.get());
- if (dateStats == nullptr) {
- throw InvalidArgument("Failed to cast to DateColumnStatisticsImpl");
- }
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
- dateStats->increase(1);
+ ++count;
dateStats->update(static_cast<int32_t>(data[i]));
} else if (!hasNull) {
hasNull = true;
}
}
+ dateStats->increase(count);
if (hasNull) {
dateStats->setHasNull(true);
}
@@ -1882,22 +1913,25 @@ namespace orc {
void Decimal64ColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
const Decimal64VectorBatch* decBatch =
dynamic_cast<const Decimal64VectorBatch*>(&rowBatch);
if (decBatch == nullptr) {
throw InvalidArgument("Failed to cast to Decimal64VectorBatch");
}
- const char* notNull = decBatch->hasNulls ?
- decBatch->notNull.data() + offset : nullptr;
- const int64_t* values = decBatch->values.data() + offset;
DecimalColumnStatisticsImpl* decStats =
dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
if (decStats == nullptr) {
throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
+ const char* notNull = decBatch->hasNulls ?
+ decBatch->notNull.data() + offset : nullptr;
+ const int64_t* values = decBatch->values.data() + offset;
+
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
@@ -1914,13 +1948,13 @@ namespace orc {
}
}
valueStream->write(buffer, static_cast<size_t>(data - buffer));
-
- decStats->increase(1);
+ ++count;
decStats->update(Decimal(values[i], static_cast<int32_t>(scale)));
} else if (!hasNull) {
hasNull = true;
}
}
+ decStats->increase(count);
if (hasNull) {
decStats->setHasNull(true);
}
@@ -2003,24 +2037,27 @@ namespace orc {
void Decimal128ColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
const Decimal128VectorBatch* decBatch =
dynamic_cast<const Decimal128VectorBatch*>(&rowBatch);
if (decBatch == nullptr) {
throw InvalidArgument("Failed to cast to Decimal128VectorBatch");
}
- const char* notNull = decBatch->hasNulls ?
- decBatch->notNull.data() + offset : nullptr;
- const Int128* values = decBatch->values.data() + offset;
DecimalColumnStatisticsImpl* decStats =
dynamic_cast<DecimalColumnStatisticsImpl*>(colIndexStatistics.get());
if (decStats == nullptr) {
throw InvalidArgument("Failed to cast to DecimalColumnStatisticsImpl");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
+ const char* notNull = decBatch->hasNulls ?
+ decBatch->notNull.data() + offset : nullptr;
+ const Int128* values = decBatch->values.data() + offset;
+
// The current encoding of decimal columns stores the integer
representation
// of the value as an unbounded length zigzag encoded base 128 varint.
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (!notNull || notNull[i]) {
@@ -2037,12 +2074,13 @@ namespace orc {
}
valueStream->write(buffer, static_cast<size_t>(data - buffer));
- decStats->increase(1);
+ ++count;
decStats->update(Decimal(values[i], static_cast<int32_t>(scale)));
} else if (!hasNull) {
hasNull = true;
}
}
+ decStats->increase(count);
if (hasNull) {
decStats->setHasNull(true);
}
@@ -2125,13 +2163,13 @@ namespace orc {
void ListColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
-
ListVectorBatch* listBatch = dynamic_cast<ListVectorBatch*>(&rowBatch);
if (listBatch == nullptr) {
throw InvalidArgument("Failed to cast to ListVectorBatch");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
int64_t* offsets = listBatch->offsets.data() + offset;
const char* notNull = listBatch->hasNulls ?
listBatch->notNull.data() + offset : nullptr;
@@ -2154,14 +2192,16 @@ namespace orc {
if (!notNull) {
colIndexStatistics->increase(numValues);
} else {
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull[i]) {
- colIndexStatistics->increase(1);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
}
+ colIndexStatistics->increase(count);
if (hasNull) {
colIndexStatistics->setHasNull(true);
}
@@ -2344,13 +2384,13 @@ namespace orc {
void MapColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
-
MapVectorBatch* mapBatch = dynamic_cast<MapVectorBatch*>(&rowBatch);
if (mapBatch == nullptr) {
throw InvalidArgument("Failed to cast to MapVectorBatch");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
int64_t* offsets = mapBatch->offsets.data() + offset;
const char* notNull = mapBatch->hasNulls ?
mapBatch->notNull.data() + offset : nullptr;
@@ -2377,14 +2417,16 @@ namespace orc {
if (!notNull) {
colIndexStatistics->increase(numValues);
} else {
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull[i]) {
- colIndexStatistics->increase(1);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
}
+ colIndexStatistics->increase(count);
if (hasNull) {
colIndexStatistics->setHasNull(true);
}
@@ -2595,13 +2637,13 @@ namespace orc {
void UnionColumnWriter::add(ColumnVectorBatch& rowBatch,
uint64_t offset,
uint64_t numValues) {
- ColumnWriter::add(rowBatch, offset, numValues);
-
UnionVectorBatch* unionBatch = dynamic_cast<UnionVectorBatch*>(&rowBatch);
if (unionBatch == nullptr) {
throw InvalidArgument("Failed to cast to UnionVectorBatch");
}
+ ColumnWriter::add(rowBatch, offset, numValues);
+
const char* notNull = unionBatch->hasNulls ?
unionBatch->notNull.data() + offset : nullptr;
unsigned char * tags = unionBatch->tags.data() + offset;
@@ -2632,14 +2674,16 @@ namespace orc {
if (!notNull) {
colIndexStatistics->increase(numValues);
} else {
+ uint64_t count = 0;
bool hasNull = false;
for (uint64_t i = 0; i < numValues; ++i) {
if (notNull[i]) {
- colIndexStatistics->increase(1);
+ ++count;
} else if (!hasNull) {
hasNull = true;
}
}
+ colIndexStatistics->increase(count);
if (hasNull) {
colIndexStatistics->setHasNull(true);
}