This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 5ce80aeba ORC-1387: [C++] Support schema evolution from decimal to
numeric/decimal
5ce80aeba is described below
commit 5ce80aeba2be6bf48190a211cd18aed86e777c45
Author: ffacs <[email protected]>
AuthorDate: Thu Oct 12 08:59:05 2023 +0800
ORC-1387: [C++] Support schema evolution from decimal to numeric/decimal
### What changes were proposed in this pull request?
Support schema evolution from decimal to {boolean, byte, short, int, long,
float, double, decimal}
### Why are the changes needed?
to support schema evolution
### How was this patch tested?
UT passed
Closes #1629 from ffacs/ORC-1387.
Authored-by: ffacs <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
c++/include/orc/Int128.hh | 7 +-
c++/src/ConvertColumnReader.cc | 228 ++++++++++++++++++++++++++++++++++--
c++/src/Int128.cc | 7 ++
c++/src/SchemaEvolution.cc | 9 +-
c++/test/TestConvertColumnReader.cc | 150 ++++++++++++++++++++++++
c++/test/TestInt128.cc | 35 ++++++
c++/test/TestSchemaEvolution.cc | 19 +++
7 files changed, 440 insertions(+), 15 deletions(-)
diff --git a/c++/include/orc/Int128.hh b/c++/include/orc/Int128.hh
index 7c88e49c8..bcb4a58e2 100644
--- a/c++/include/orc/Int128.hh
+++ b/c++/include/orc/Int128.hh
@@ -291,7 +291,7 @@ namespace orc {
}
/**
- * Convert the value to a long and
+ * Convert the value to a long and throw std::range_error on overflow.
*/
int64_t toLong() const {
if (fitsInLong()) {
@@ -300,6 +300,11 @@ namespace orc {
throw std::range_error("Int128 too large to convert to long");
}
+ /**
+ * Convert the value to a double, the return value may not be precise.
+ */
+ double toDouble() const;
+
/**
* Return the base 10 string representation of the integer.
*/
diff --git a/c++/src/ConvertColumnReader.cc b/c++/src/ConvertColumnReader.cc
index 6718fa1cd..a1e29ba58 100644
--- a/c++/src/ConvertColumnReader.cc
+++ b/c++/src/ConvertColumnReader.cc
@@ -466,6 +466,133 @@ namespace orc {
}
}
+ template <typename FileTypeBatch, typename ReadTypeBatch, typename ReadType>
+ class DecimalToNumericColumnReader : public ConvertColumnReader {
+ public:
+ DecimalToNumericColumnReader(const Type& _readType, const Type& fileType,
StripeStreams& stripe,
+ bool _throwOnOverflow)
+ : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+ precision = fileType.getPrecision();
+ scale = fileType.getScale();
+ factor = 1;
+ for (int i = 0; i < scale; i++) {
+ factor *= 10;
+ }
+ }
+
+ void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull)
override {
+ ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+ const auto& srcBatch = *SafeCastBatchTo<const
FileTypeBatch*>(data.get());
+ auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+ for (uint64_t i = 0; i < numValues; ++i) {
+ if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+ if constexpr (std::is_floating_point_v<ReadType>) {
+ convertDecimalToDouble(dstBatch, i, srcBatch);
+ } else {
+ convertDecimalToInteger(dstBatch, i, srcBatch);
+ }
+ }
+ }
+ }
+
+ private:
+ void convertDecimalToInteger(ReadTypeBatch& dstBatch, uint64_t idx,
+ const FileTypeBatch& srcBatch) {
+ using FileType = decltype(srcBatch.values[idx]);
+ Int128 result = scaleDownInt128ByPowerOfTen(srcBatch.values[idx], scale);
+ if (!result.fitsInLong()) {
+ handleOverflow<FileType, ReadType>(dstBatch, idx, throwOnOverflow);
+ return;
+ }
+ convertNumericElement<ReadType, int64_t>(result.toLong(),
dstBatch.data[idx], dstBatch, idx,
+ throwOnOverflow);
+ }
+
+ void convertDecimalToDouble(ReadTypeBatch& dstBatch, uint64_t idx,
+ const FileTypeBatch& srcBatch) {
+ double doubleValue = Int128(srcBatch.values[idx]).toDouble();
+ dstBatch.data[idx] = static_cast<ReadType>(doubleValue) /
static_cast<ReadType>(factor);
+ }
+
+ int32_t precision;
+ int32_t scale;
+ int64_t factor;
+ };
+
+ template <typename FileTypeBatch>
+ class DecimalToNumericColumnReader<FileTypeBatch, BooleanVectorBatch, bool>
+ : public ConvertColumnReader {
+ public:
+ DecimalToNumericColumnReader(const Type& _readType, const Type& fileType,
StripeStreams& stripe,
+ bool _throwOnOverflow)
+ : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+ void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull)
override {
+ ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+ const auto& srcBatch = *SafeCastBatchTo<const
FileTypeBatch*>(data.get());
+ auto& dstBatch = *SafeCastBatchTo<BooleanVectorBatch*>(&rowBatch);
+ for (uint64_t i = 0; i < numValues; ++i) {
+ if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+ dstBatch.data[i] = srcBatch.values[i] == 0 ? 0 : 1;
+ }
+ }
+ }
+ };
+
+ template <typename FileTypeBatch, typename ReadTypeBatch>
+ class DecimalConvertColumnReader : public ConvertColumnReader {
+ public:
+ DecimalConvertColumnReader(const Type& _readType, const Type& fileType,
StripeStreams& stripe,
+ bool _throwOnOverflow)
+ : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+ fromPrecision = fileType.getPrecision();
+ fromScale = fileType.getScale();
+ toPrecision = _readType.getPrecision();
+ toScale = _readType.getScale();
+ }
+
+ void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull)
override {
+ ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+ const auto& srcBatch = *SafeCastBatchTo<const
FileTypeBatch*>(data.get());
+ auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+ for (uint64_t i = 0; i < numValues; ++i) {
+ if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+ convertDecimalToDecimal(dstBatch, i, srcBatch);
+ }
+ }
+ }
+
+ private:
+ void convertDecimalToDecimal(ReadTypeBatch& dstBatch, uint64_t idx,
+ const FileTypeBatch& srcBatch) {
+ using FileType = decltype(srcBatch.values[idx]);
+ using ReadType = decltype(dstBatch.values[idx]);
+
+ auto [overflows, resultI128] =
+ convertDecimal(srcBatch.values[idx], fromScale, toPrecision,
toScale);
+ if (overflows) {
+ handleOverflow<FileType, ReadType>(dstBatch, idx, throwOnOverflow);
+ }
+ if constexpr (std::is_same_v<ReadTypeBatch, Decimal64VectorBatch>) {
+ if (!resultI128.fitsInLong()) {
+ handleOverflow<FileType, ReadType>(dstBatch, idx, throwOnOverflow);
+ } else {
+ dstBatch.values[idx] = resultI128.toLong();
+ }
+ } else {
+ dstBatch.values[idx] = resultI128;
+ }
+ }
+
+ int32_t fromPrecision;
+ int32_t fromScale;
+ int32_t toPrecision;
+ int32_t toScale;
+ };
+
#define DEFINE_NUMERIC_CONVERT_READER(FROM, TO, TYPE) \
using FROM##To##TO##ColumnReader = \
NumericConvertColumnReader<FROM##VectorBatch, TO##VectorBatch, TYPE>;
@@ -482,6 +609,18 @@ namespace orc {
#define DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(FROM) \
using FROM##ToTimestampColumnReader =
NumericToTimestampColumnReader<FROM##VectorBatch>;
+#define DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(TO, TYPE)
\
+ using Decimal64##To##TO##ColumnReader =
\
+ DecimalToNumericColumnReader<Decimal64VectorBatch, TO##VectorBatch,
TYPE>; \
+ using Decimal128##To##TO##ColumnReader =
\
+ DecimalToNumericColumnReader<Decimal128VectorBatch, TO##VectorBatch,
TYPE>;
+
+#define DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(TO) \
+ using Decimal64##To##TO##ColumnReader = \
+ DecimalConvertColumnReader<Decimal64VectorBatch, TO##VectorBatch>; \
+ using Decimal128##To##TO##ColumnReader = \
+ DecimalConvertColumnReader<Decimal128VectorBatch, TO##VectorBatch>;
+
DEFINE_NUMERIC_CONVERT_READER(Boolean, Byte, int8_t)
DEFINE_NUMERIC_CONVERT_READER(Boolean, Short, int16_t)
DEFINE_NUMERIC_CONVERT_READER(Boolean, Int, int32_t)
@@ -568,21 +707,48 @@ namespace orc {
DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Float)
DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER(Double)
+ // Decimal to Numeric
+ DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Boolean, bool)
+ DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Byte, int8_t)
+ DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Short, int16_t)
+ DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Int, int32_t)
+ DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Long, int64_t)
+ DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Float, float)
+ DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER(Double, double)
+
+ // Decimal to Decimal
+ DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(Decimal64)
+ DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(Decimal128)
+
+#define CREATE_READER(NAME) \
+ return std::make_unique<NAME>(_readType, fileType, stripe, throwOnOverflow);
+
#define CASE_CREATE_READER(TYPE, CONVERT) \
case TYPE: \
- return std::make_unique<CONVERT##ColumnReader>(_readType, fileType,
stripe, throwOnOverflow);
+ CREATE_READER(CONVERT##ColumnReader)
const static int32_t MAX_PRECISION_64 = 18;
-#define CASE_CREATE_DECIMAL_READER(FROM)
\
- case DECIMAL: {
\
- if (_readType.getPrecision() > 0 && _readType.getPrecision() <=
MAX_PRECISION_64) { \
- return std::make_unique<FROM##ToDecimal64ColumnReader>(_readType,
fileType, stripe, \
- throwOnOverflow);
\
- } else {
\
- return std::make_unique<FROM##ToDecimal128ColumnReader>(_readType,
fileType, stripe, \
-
throwOnOverflow); \
- }
\
+ static inline bool isDecimal64(const Type& type) {
+ return type.getPrecision() > 0 && type.getPrecision() <= MAX_PRECISION_64;
+ }
+
+#define CASE_CREATE_FROM_DECIMAL_READER(TYPE, TO) \
+ case TYPE: { \
+ if (isDecimal64(fileType)) { \
+ CREATE_READER(Decimal64To##TO##ColumnReader) \
+ } else { \
+ CREATE_READER(Decimal128To##TO##ColumnReader) \
+ } \
+ }
+
+#define CASE_CREATE_DECIMAL_READER(FROM) \
+ case DECIMAL: { \
+ if (isDecimal64(_readType)) { \
+ CREATE_READER(FROM##ToDecimal64ColumnReader) \
+ } else { \
+ CREATE_READER(FROM##ToDecimal128ColumnReader) \
+ } \
}
#define CASE_EXCEPTION
\
@@ -776,7 +942,44 @@ namespace orc {
case MAP:
case STRUCT:
case UNION:
- case DECIMAL:
+ case DECIMAL: {
+ switch (_readType.getKind()) {
+ CASE_CREATE_FROM_DECIMAL_READER(BOOLEAN, Boolean)
+ CASE_CREATE_FROM_DECIMAL_READER(BYTE, Byte)
+ CASE_CREATE_FROM_DECIMAL_READER(SHORT, Short)
+ CASE_CREATE_FROM_DECIMAL_READER(INT, Int)
+ CASE_CREATE_FROM_DECIMAL_READER(LONG, Long)
+ CASE_CREATE_FROM_DECIMAL_READER(FLOAT, Float)
+ CASE_CREATE_FROM_DECIMAL_READER(DOUBLE, Double)
+ case DECIMAL: {
+ if (isDecimal64(fileType)) {
+ if (isDecimal64(_readType)) {
+ CREATE_READER(Decimal64ToDecimal64ColumnReader)
+ } else {
+ CREATE_READER(Decimal64ToDecimal128ColumnReader)
+ }
+ } else {
+ if (isDecimal64(_readType)) {
+ CREATE_READER(Decimal128ToDecimal64ColumnReader)
+ } else {
+ CREATE_READER(Decimal128ToDecimal128ColumnReader)
+ }
+ }
+ }
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ case TIMESTAMP:
+ case TIMESTAMP_INSTANT:
+ case BINARY:
+ case LIST:
+ case MAP:
+ case STRUCT:
+ case UNION:
+ case DATE:
+ CASE_EXCEPTION
+ }
+ }
case DATE:
case VARCHAR:
case CHAR:
@@ -789,6 +992,9 @@ namespace orc {
#undef DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER
#undef DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER
#undef DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER
+#undef DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER
+#undef DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER
+#undef CASE_CREATE_FROM_DECIMAL_READER
#undef CASE_CREATE_READER
#undef CASE_EXCEPTION
diff --git a/c++/src/Int128.cc b/c++/src/Int128.cc
index 327930975..3c159f377 100644
--- a/c++/src/Int128.cc
+++ b/c++/src/Int128.cc
@@ -435,6 +435,13 @@ namespace orc {
return buf.str();
}
+ double Int128::toDouble() const {
+ if (fitsInLong()) {
+ return static_cast<double>(toLong());
+ }
+ return static_cast<double>(lowbits) +
std::ldexp(static_cast<double>(highbits), 64);
+ }
+
const static int32_t MAX_PRECISION_64 = 18;
const static int32_t MAX_PRECISION_128 = 38;
const static int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1] = {1,
diff --git a/c++/src/SchemaEvolution.cc b/c++/src/SchemaEvolution.cc
index cb6f0d002..b8c4fd404 100644
--- a/c++/src/SchemaEvolution.cc
+++ b/c++/src/SchemaEvolution.cc
@@ -82,8 +82,8 @@ namespace orc {
if (fileType.getKind() == CHAR || fileType.getKind() == VARCHAR) {
ret.isValid = readType.getMaximumLength() ==
fileType.getMaximumLength();
} else if (fileType.getKind() == DECIMAL) {
- ret.isValid = readType.getPrecision() == fileType.getPrecision() &&
- readType.getScale() == fileType.getScale();
+ ret.needConvert = readType.getPrecision() != fileType.getPrecision() ||
+ readType.getScale() != fileType.getScale();
}
} else {
switch (fileType.getKind()) {
@@ -98,7 +98,10 @@ namespace orc {
isDecimal(readType) ||
isTimestamp(readType);
break;
}
- case DECIMAL:
+ case DECIMAL: {
+ ret.isValid = ret.needConvert = isNumeric(readType);
+ break;
+ }
case STRING:
case CHAR:
case VARCHAR:
diff --git a/c++/test/TestConvertColumnReader.cc
b/c++/test/TestConvertColumnReader.cc
index d0d690cf1..4aabce047 100644
--- a/c++/test/TestConvertColumnReader.cc
+++ b/c++/test/TestConvertColumnReader.cc
@@ -522,4 +522,154 @@ namespace orc {
}
}
+ TEST(ConvertColumnReader, TestConvertDecimalToNumeric) {
+ constexpr int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024;
+ constexpr int TEST_CASES = 1024;
+ MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+ std::unique_ptr<Type> fileType(Type::buildTypeFromString(
+
"struct<c1:decimal(10,2),c2:decimal(10,4),c3:decimal(20,4),c4:decimal(20,4)>"));
+ std::shared_ptr<Type> readType(
+
Type::buildTypeFromString("struct<c1:boolean,c2:smallint,c3:int,c4:double>"));
+ WriterOptions options;
+ options.setUseTightNumericVector(true);
+ auto writer = createWriter(*fileType, &memStream, options);
+ auto batch = writer->createRowBatch(TEST_CASES);
+ auto structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+ auto& c1 = dynamic_cast<Decimal64VectorBatch&>(*structBatch->fields[0]);
+ auto& c2 = dynamic_cast<Decimal64VectorBatch&>(*structBatch->fields[1]);
+ auto& c3 = dynamic_cast<Decimal128VectorBatch&>(*structBatch->fields[2]);
+ auto& c4 = dynamic_cast<Decimal128VectorBatch&>(*structBatch->fields[3]);
+
+ for (uint32_t i = 0; i < TEST_CASES / 2; i++) {
+ int64_t flag = i % 2 ? 1 : -1;
+ c1.values[i] = flag * (!(i % 2) ? static_cast<int64_t>(0) :
static_cast<int64_t>(i * 123));
+ c2.values[i] = flag * (static_cast<int64_t>(i * 10000) + i);
+ c3.values[i] = flag * (static_cast<int64_t>(i * 10000) + i);
+ c4.values[i] = flag * (static_cast<int64_t>(i * 10000) + i);
+ }
+ for (uint32_t i = TEST_CASES / 2; i < TEST_CASES; i++) {
+ c1.values[i] = 0;
+ c2.values[i] =
(static_cast<int64_t>(std::numeric_limits<int16_t>::max()) + i) * 10000 + i;
+ c3.values[i] =
(static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + i) * 10000 + i;
+ c4.values[i] = 0;
+ }
+
+ structBatch->numElements = c1.numElements = c2.numElements =
c3.numElements = c4.numElements =
+ TEST_CASES;
+ writer->add(*batch);
+ writer->close();
+
+ auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(),
memStream.getLength());
+ auto pool = getDefaultPool();
+ auto reader = createReader(*pool, std::move(inStream));
+ RowReaderOptions rowReaderOptions;
+ rowReaderOptions.setUseTightNumericVector(true);
+ rowReaderOptions.setReadType(readType);
+ auto rowReader = reader->createRowReader(rowReaderOptions);
+ auto readBatch = rowReader->createRowBatch(TEST_CASES);
+ EXPECT_EQ(true, rowReader->next(*readBatch));
+
+ auto& readStructBatch = dynamic_cast<StructVectorBatch&>(*readBatch);
+ auto& readC1 =
dynamic_cast<BooleanVectorBatch&>(*readStructBatch.fields[0]);
+ auto& readC2 = dynamic_cast<ShortVectorBatch&>(*readStructBatch.fields[1]);
+ auto& readC3 = dynamic_cast<IntVectorBatch&>(*readStructBatch.fields[2]);
+ auto& readC4 =
dynamic_cast<DoubleVectorBatch&>(*readStructBatch.fields[3]);
+ EXPECT_EQ(TEST_CASES, readBatch->numElements);
+ for (int i = 0; i < TEST_CASES / 2; i++) {
+ size_t idx = static_cast<size_t>(i);
+ EXPECT_TRUE(readC1.notNull[idx]) << i;
+ EXPECT_TRUE(readC2.notNull[idx]) << i;
+ EXPECT_TRUE(readC3.notNull[idx]) << i;
+ EXPECT_TRUE(readC4.notNull[idx]) << i;
+
+ int64_t flag = i % 2 ? 1 : -1;
+ EXPECT_EQ(!(i % 2) ? 0 : 1, readC1.data[idx]) << i;
+ EXPECT_EQ(flag * i, readC2.data[idx]) << i;
+ EXPECT_EQ(flag * i, readC3.data[idx]) << i;
+ EXPECT_DOUBLE_EQ(1.0001 * flag * i, readC4.data[idx]) << i;
+ }
+ for (int i = TEST_CASES / 2; i < TEST_CASES; i++) {
+ size_t idx = static_cast<size_t>(i);
+ EXPECT_TRUE(readC1.notNull[idx]) << i;
+ EXPECT_FALSE(readC2.notNull[idx]) << i;
+ EXPECT_FALSE(readC3.notNull[idx]) << i;
+ EXPECT_TRUE(readC4.notNull[idx]) << i;
+ }
+ }
+
+ TEST(ConvertColumnReader, TestConvertDecimalToDecimal) {
+ constexpr int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024;
+ constexpr int TEST_CASES = 1024;
+ MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+ std::unique_ptr<Type> fileType(Type::buildTypeFromString(
+
"struct<c1:decimal(10,4),c2:decimal(10,4),c3:decimal(20,4),c4:decimal(20,4)>"));
+ std::shared_ptr<Type> readType(Type::buildTypeFromString(
+
"struct<c1:decimal(9,5),c2:decimal(20,5),c3:decimal(10,3),c4:decimal(19,3)>"));
+ WriterOptions options;
+ options.setUseTightNumericVector(true);
+ auto writer = createWriter(*fileType, &memStream, options);
+ auto batch = writer->createRowBatch(TEST_CASES);
+ auto structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+ auto& c1 = dynamic_cast<Decimal64VectorBatch&>(*structBatch->fields[0]);
+ auto& c2 = dynamic_cast<Decimal64VectorBatch&>(*structBatch->fields[1]);
+ auto& c3 = dynamic_cast<Decimal128VectorBatch&>(*structBatch->fields[2]);
+ auto& c4 = dynamic_cast<Decimal128VectorBatch&>(*structBatch->fields[3]);
+
+ for (uint32_t i = 0; i < TEST_CASES / 2; i++) {
+ int64_t flag = i % 2 ? 1 : -1;
+ c1.values[i] = flag * (static_cast<int64_t>(i * 10000) + i);
+ c2.values[i] = flag * (static_cast<int64_t>(i * 10000) + i);
+ c3.values[i] = flag * (static_cast<int64_t>(i * 10000) + i);
+ c4.values[i] = flag * (static_cast<int64_t>(i * 10000) + i);
+ }
+ for (uint32_t i = TEST_CASES / 2; i < TEST_CASES; i++) {
+ c1.values[i] = 100000000ll + i;
+ c2.values[i] = 100000000ll + i;
+ c3.values[i] = (Int128("100000000000") += i);
+ c4.values[i] = (Int128("100000000000000000000") += i);
+ }
+
+ structBatch->numElements = c1.numElements = c2.numElements =
c3.numElements = c4.numElements =
+ TEST_CASES;
+ writer->add(*batch);
+ writer->close();
+
+ auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(),
memStream.getLength());
+ auto pool = getDefaultPool();
+ auto reader = createReader(*pool, std::move(inStream));
+ RowReaderOptions rowReaderOptions;
+ rowReaderOptions.setUseTightNumericVector(true);
+ rowReaderOptions.setReadType(readType);
+ auto rowReader = reader->createRowReader(rowReaderOptions);
+ auto readBatch = rowReader->createRowBatch(TEST_CASES);
+ EXPECT_EQ(true, rowReader->next(*readBatch));
+
+ auto& readStructBatch = dynamic_cast<StructVectorBatch&>(*readBatch);
+ auto& readC1 =
dynamic_cast<Decimal64VectorBatch&>(*readStructBatch.fields[0]);
+ auto& readC2 =
dynamic_cast<Decimal128VectorBatch&>(*readStructBatch.fields[1]);
+ auto& readC3 =
dynamic_cast<Decimal64VectorBatch&>(*readStructBatch.fields[2]);
+ auto& readC4 =
dynamic_cast<Decimal128VectorBatch&>(*readStructBatch.fields[3]);
+ EXPECT_EQ(TEST_CASES, readBatch->numElements);
+ for (int i = 0; i < TEST_CASES / 2; i++) {
+ size_t idx = static_cast<size_t>(i);
+ EXPECT_TRUE(readC1.notNull[idx]) << i;
+ EXPECT_TRUE(readC2.notNull[idx]) << i;
+ EXPECT_TRUE(readC3.notNull[idx]) << i;
+ EXPECT_TRUE(readC4.notNull[idx]) << i;
+
+ int64_t flag = i % 2 ? 1 : -1;
+ EXPECT_EQ(readC1.values[idx], flag * (i * 100000 + i * 10));
+ EXPECT_EQ(readC2.values[idx].toLong(), flag * (i * 100000 + i * 10));
+ EXPECT_EQ(readC3.values[idx], flag * (i * 1000 + (i + 5) / 10));
+ EXPECT_EQ(readC4.values[idx].toLong(), flag * (i * 1000 + (i + 5) / 10));
+ }
+ for (int i = TEST_CASES / 2; i < TEST_CASES; i++) {
+ size_t idx = static_cast<size_t>(i);
+ EXPECT_FALSE(readC1.notNull[idx]) << i;
+ EXPECT_TRUE(readC2.notNull[idx]) << i;
+ EXPECT_FALSE(readC3.notNull[idx]) << i;
+ EXPECT_FALSE(readC4.notNull[idx]) << i;
+ }
+ }
+
} // namespace orc
diff --git a/c++/test/TestInt128.cc b/c++/test/TestInt128.cc
index ca67522d8..54dcff456 100644
--- a/c++/test/TestInt128.cc
+++ b/c++/test/TestInt128.cc
@@ -1201,4 +1201,39 @@ namespace orc {
<< pair.second.toString();
}
+ TEST(Int128, testConvertToDouble) {
+ // fit in long
+ EXPECT_DOUBLE_EQ(Int128("0").toDouble(), 0);
+ EXPECT_DOUBLE_EQ(Int128("1").toDouble(), 1);
+ EXPECT_DOUBLE_EQ(Int128("-1").toDouble(), -1);
+ EXPECT_DOUBLE_EQ(Int128("-123").toDouble(), -123);
+ EXPECT_DOUBLE_EQ(Int128("123").toDouble(), 123);
+ EXPECT_DOUBLE_EQ(Int128("-123456").toDouble(), -123456);
+ EXPECT_DOUBLE_EQ(Int128("123456").toDouble(), 123456);
+ EXPECT_DOUBLE_EQ(Int128("-123456789").toDouble(), -123456789);
+ EXPECT_DOUBLE_EQ(Int128("123456789").toDouble(), 123456789);
+ EXPECT_DOUBLE_EQ(Int128("-123456789012").toDouble(), -123456789012.0);
+ EXPECT_DOUBLE_EQ(Int128("123456789012").toDouble(), 123456789012.0);
+ EXPECT_DOUBLE_EQ(Int128("-123456789012345").toDouble(),
-123456789012345.0);
+ EXPECT_DOUBLE_EQ(Int128("123456789012345").toDouble(), 123456789012345.0);
+ EXPECT_DOUBLE_EQ(Int128("-123456789012345678").toDouble(),
-123456789012345678.0);
+ EXPECT_DOUBLE_EQ(Int128("123456789012345678").toDouble(),
123456789012345678.0);
+ EXPECT_DOUBLE_EQ(Int128("-9223372036854775808").toDouble(),
-9223372036854775808.0);
+ EXPECT_DOUBLE_EQ(Int128("9223372036854775807").toDouble(),
9223372036854775807.0);
+ // Not fit in long
+ EXPECT_DOUBLE_EQ(Int128("100000000000000000000").toDouble(), 1e20);
+ EXPECT_DOUBLE_EQ(Int128("-100000000000000000000").toDouble(), -1e20);
+ EXPECT_DOUBLE_EQ(Int128("-12345678901234567890").toDouble(),
-12345678901234567890.0);
+ EXPECT_DOUBLE_EQ(Int128("-123456789012345678901").toDouble(),
-123456789012345678901.0);
+ EXPECT_DOUBLE_EQ(Int128("123456789012345678901").toDouble(),
123456789012345678901.0);
+ EXPECT_DOUBLE_EQ(Int128("-1234567890123456789012").toDouble(),
-1234567890123456789012.0);
+ EXPECT_DOUBLE_EQ(Int128("1234567890123456789012").toDouble(),
1234567890123456789012.0);
+ EXPECT_DOUBLE_EQ(Int128("-12345678901234567890123").toDouble(),
-12345678901234567890123.0);
+ EXPECT_DOUBLE_EQ(Int128("12345678901234567890123").toDouble(),
12345678901234567890123.0);
+ EXPECT_DOUBLE_EQ(Int128("12345678901234567890123456789").toDouble(),
+ 12345678901234567890123456789.0);
+ EXPECT_DOUBLE_EQ(Int128("-12345678901234567890123456789").toDouble(),
+ -12345678901234567890123456789.0);
+ }
+
} // namespace orc
diff --git a/c++/test/TestSchemaEvolution.cc b/c++/test/TestSchemaEvolution.cc
index 61169b91d..f57e40086 100644
--- a/c++/test/TestSchemaEvolution.cc
+++ b/c++/test/TestSchemaEvolution.cc
@@ -113,6 +113,25 @@ namespace orc {
}
}
+ // conversion from decimal to numeric
+ for (size_t i = 12; i <= 13; i++) {
+ for (size_t j = 0; j <= 6; j++) {
+ canConvert[i][j] = true;
+ needConvert[i][j] = true;
+ }
+ }
+
+ // conversion from decimal to decimal
+ for (size_t i = 12; i <= 13; i++) {
+ for (size_t j = 12; j <= 13; j++) {
+ canConvert[i][j] = true;
+ needConvert[i][j] = false;
+ if (i != j) {
+ needConvert[i][j] = true;
+ }
+ }
+ }
+
for (size_t i = 0; i < typesSize; i++) {
for (size_t j = 0; j < typesSize; j++) {
testConvertReader(types[i], types[j], canConvert[i][j],
needConvert[i][j]);