wgtmac commented on code in PR #1500:
URL: https://github.com/apache/orc/pull/1500#discussion_r1218823977


##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,327 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, 
uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() != STRING) {
+        if (readType.getMaximumLength() < 5) {
+          throw SchemaEvolutionError("Invalid maximum length for boolean type: 
" +
+                                     
std::to_string(readType.getMaximumLength()));
+        }
+        if (readType.getKind() == CHAR) {
+          trueValue.resize(readType.getMaximumLength(), ' ');
+          falseValue.resize(readType.getMaximumLength(), ' ');
+        }
+      }
+    }
+
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+
+    private:
+      std::string trueValue;

Review Comment:
   trueValue and falseValue can actually be const static variable



##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,327 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, 
uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() != STRING) {

Review Comment:
   ```suggestion
         if (readType.getKind() == CHAR || readType.getKind() == VARCHAR) {
   ```



##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,327 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, 
uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() != STRING) {
+        if (readType.getMaximumLength() < 5) {
+          throw SchemaEvolutionError("Invalid maximum length for boolean type: 
" +
+                                     
std::to_string(readType.getMaximumLength()));
+        }
+        if (readType.getKind() == CHAR) {
+          trueValue.resize(readType.getMaximumLength(), ' ');
+          falseValue.resize(readType.getMaximumLength(), ' ');
+        }
+      }
+    }
+
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+
+    private:
+      std::string trueValue;
+      std::string falseValue;
+  };
+
+  size_t 
BooleanToStringVariantColumnReader::convertToStrBuffer(ColumnVectorBatch& 
rowBatch,
+                                                                uint64_t 
numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const 
BooleanVectorBatch*>(data.get());
+    // cast the bool value to string and truncate to the max length
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        strBuffer[i] = (srcBatch.data[i] ? trueValue : falseValue);
+        size += strBuffer[i].size();
+      }
+    }
+    return size;
+  }
+
+  template <typename FileTypeBatch>
+  class NumericToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    NumericToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {}
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+  };
+
+  template <typename FileTypeBatch>
+  size_t NumericToStringVariantColumnReader<FileTypeBatch>::convertToStrBuffer(
+      ColumnVectorBatch& rowBatch, uint64_t numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+    if (readType.getKind() == STRING) {
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          size += strBuffer[i].size();
+        }
+      }
+    } else if (readType.getKind() == VARCHAR) {
+      const auto maxLength = readType.getMaximumLength();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          if (strBuffer[i].size() > maxLength) {
+            handleOverflow<decltype(srcBatch.data[i]), std::string>(rowBatch, 
i, throwOnOverflow);
+          } else {
+            size += strBuffer[i].size();
+          }
+        }
+      }
+    } else {

Review Comment:
   ```suggestion
       } else if (readType.getKind() == CHAR) {
   ```
   And explicitly throw in the else branch



##########
c++/src/Int128.cc:
##########
@@ -488,4 +488,42 @@ namespace orc {
     return value;
   }
 
+  std::pair<bool, Int128> convertDecimal(Int128 value, int32_t fromScale,

Review Comment:
   Please add some tests for this new function.



##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,327 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, 
uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() != STRING) {
+        if (readType.getMaximumLength() < 5) {
+          throw SchemaEvolutionError("Invalid maximum length for boolean type: 
" +
+                                     
std::to_string(readType.getMaximumLength()));
+        }
+        if (readType.getKind() == CHAR) {
+          trueValue.resize(readType.getMaximumLength(), ' ');
+          falseValue.resize(readType.getMaximumLength(), ' ');
+        }
+      }
+    }
+
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+
+    private:
+      std::string trueValue;
+      std::string falseValue;
+  };
+
+  size_t 
BooleanToStringVariantColumnReader::convertToStrBuffer(ColumnVectorBatch& 
rowBatch,
+                                                                uint64_t 
numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const 
BooleanVectorBatch*>(data.get());
+    // cast the bool value to string and truncate to the max length
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        strBuffer[i] = (srcBatch.data[i] ? trueValue : falseValue);
+        size += strBuffer[i].size();

Review Comment:
   Though unlikely, an overflow check is good to be here and all additions 
below.



##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,327 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, 
uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() != STRING) {
+        if (readType.getMaximumLength() < 5) {
+          throw SchemaEvolutionError("Invalid maximum length for boolean type: 
" +
+                                     
std::to_string(readType.getMaximumLength()));
+        }
+        if (readType.getKind() == CHAR) {
+          trueValue.resize(readType.getMaximumLength(), ' ');
+          falseValue.resize(readType.getMaximumLength(), ' ');
+        }
+      }
+    }
+
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+
+    private:
+      std::string trueValue;
+      std::string falseValue;
+  };
+
+  size_t 
BooleanToStringVariantColumnReader::convertToStrBuffer(ColumnVectorBatch& 
rowBatch,
+                                                                uint64_t 
numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const 
BooleanVectorBatch*>(data.get());
+    // cast the bool value to string and truncate to the max length
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        strBuffer[i] = (srcBatch.data[i] ? trueValue : falseValue);
+        size += strBuffer[i].size();
+      }
+    }
+    return size;
+  }
+
+  template <typename FileTypeBatch>
+  class NumericToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    NumericToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {}
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+  };
+
+  template <typename FileTypeBatch>
+  size_t NumericToStringVariantColumnReader<FileTypeBatch>::convertToStrBuffer(
+      ColumnVectorBatch& rowBatch, uint64_t numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+    if (readType.getKind() == STRING) {
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          size += strBuffer[i].size();
+        }
+      }
+    } else if (readType.getKind() == VARCHAR) {
+      const auto maxLength = readType.getMaximumLength();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          if (strBuffer[i].size() > maxLength) {
+            handleOverflow<decltype(srcBatch.data[i]), std::string>(rowBatch, 
i, throwOnOverflow);
+          } else {
+            size += strBuffer[i].size();
+          }
+        }
+      }
+    } else {
+      const auto maxLength = readType.getMaximumLength();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          if (strBuffer[i].size() > maxLength) {
+            handleOverflow<decltype(srcBatch.data[i]), std::string>(rowBatch, 
i, throwOnOverflow);
+          } else {
+            strBuffer[i].resize(maxLength, ' ');
+            size += strBuffer[i].size();
+          }
+        }
+      }
+    }
+    return size;
+  }
+
+  template <typename FileTypeBatch, typename ReadTypeBatch, bool 
isFLoatingFileType>
+  class NumericToDecimalColumnReader : public ConvertColumnReader {
+   public:
+    NumericToDecimalColumnReader(const Type& _readType, const Type& fileType, 
StripeStreams& stripe,
+                                 bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+      precision = static_cast<int32_t>(readType.getPrecision());
+      scale = static_cast<int32_t>(readType.getScale());
+      scaleMultiplier = 1;
+      bool overflow = false;
+      upperBound = scaleUpInt128ByPowerOfTen(1, precision, overflow);
+      for (int i = 0; i < scale; i++) {
+        scaleMultiplier *= 10;
+      }
+    }
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const 
FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      dstBatch.precision = precision;
+      dstBatch.scale = scale;
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          if constexpr (isFLoatingFileType) {
+            convertDoubleToDecimal(dstBatch, i, srcBatch.data[i]);
+          } else {
+            convertIntegerToDecimal(dstBatch, i, srcBatch.data[i]);
+          }
+        }
+      }
+    }
+
+   private:
+    template <typename SrcType>
+    void convertDoubleToDecimal(ReadTypeBatch& dstBatch, uint64_t idx, SrcType 
value) {

Review Comment:
   Can we extract its core logic into Int128.hh and make sure it is covered by 
some tests?



##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,327 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, 
uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() != STRING) {
+        if (readType.getMaximumLength() < 5) {
+          throw SchemaEvolutionError("Invalid maximum length for boolean type: 
" +
+                                     
std::to_string(readType.getMaximumLength()));
+        }
+        if (readType.getKind() == CHAR) {
+          trueValue.resize(readType.getMaximumLength(), ' ');
+          falseValue.resize(readType.getMaximumLength(), ' ');
+        }
+      }
+    }
+
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+
+    private:
+      std::string trueValue;
+      std::string falseValue;
+  };
+
+  size_t 
BooleanToStringVariantColumnReader::convertToStrBuffer(ColumnVectorBatch& 
rowBatch,
+                                                                uint64_t 
numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const 
BooleanVectorBatch*>(data.get());
+    // cast the bool value to string and truncate to the max length
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        strBuffer[i] = (srcBatch.data[i] ? trueValue : falseValue);
+        size += strBuffer[i].size();
+      }
+    }
+    return size;
+  }
+
+  template <typename FileTypeBatch>
+  class NumericToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    NumericToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {}
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+  };
+
+  template <typename FileTypeBatch>
+  size_t NumericToStringVariantColumnReader<FileTypeBatch>::convertToStrBuffer(
+      ColumnVectorBatch& rowBatch, uint64_t numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
+    if (readType.getKind() == STRING) {
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          size += strBuffer[i].size();
+        }
+      }
+    } else if (readType.getKind() == VARCHAR) {
+      const auto maxLength = readType.getMaximumLength();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          if (strBuffer[i].size() > maxLength) {
+            handleOverflow<decltype(srcBatch.data[i]), std::string>(rowBatch, 
i, throwOnOverflow);
+          } else {
+            size += strBuffer[i].size();
+          }
+        }
+      }
+    } else {
+      const auto maxLength = readType.getMaximumLength();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          strBuffer[i] = std::to_string(srcBatch.data[i]);
+          if (strBuffer[i].size() > maxLength) {
+            handleOverflow<decltype(srcBatch.data[i]), std::string>(rowBatch, 
i, throwOnOverflow);
+          } else {
+            strBuffer[i].resize(maxLength, ' ');
+            size += strBuffer[i].size();
+          }
+        }
+      }
+    }
+    return size;
+  }
+
+  template <typename FileTypeBatch, typename ReadTypeBatch, bool 
isFLoatingFileType>
+  class NumericToDecimalColumnReader : public ConvertColumnReader {
+   public:
+    NumericToDecimalColumnReader(const Type& _readType, const Type& fileType, 
StripeStreams& stripe,
+                                 bool _throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {
+      precision = static_cast<int32_t>(readType.getPrecision());
+      scale = static_cast<int32_t>(readType.getScale());
+      scaleMultiplier = 1;
+      bool overflow = false;
+      upperBound = scaleUpInt128ByPowerOfTen(1, precision, overflow);
+      for (int i = 0; i < scale; i++) {
+        scaleMultiplier *= 10;
+      }
+    }
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const 
FileTypeBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      dstBatch.precision = precision;
+      dstBatch.scale = scale;
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          if constexpr (isFLoatingFileType) {

Review Comment:
   ```suggestion
             if constexpr (isFloatingFileType) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to