wgtmac commented on code in PR #1931:
URL: https://github.com/apache/orc/pull/1931#discussion_r1600981212
##########
c++/src/ConvertColumnReader.cc:
##########
@@ -694,6 +695,112 @@ namespace orc {
const int32_t scale_;
};
+ template <typename ReadTypeBatch, typename ReadType>
+ class StringVariantToNumericColumnReader : public ConvertColumnReader {
+ public:
+ StringVariantToNumericColumnReader(const Type& readType, const Type&
fileType,
+ StripeStreams& stripe, bool
throwOnOverflow)
+ : ConvertColumnReader(readType, fileType, stripe, throwOnOverflow) {}
+
+ void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull)
override {
+ ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+ const auto& srcBatch = *SafeCastBatchTo<const
StringVectorBatch*>(data.get());
+ auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+ for (uint64_t i = 0; i < numValues; ++i) {
+ if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+ if constexpr (std::is_floating_point_v<ReadType>) {
+ convertToDouble(dstBatch, srcBatch, i);
+ } else {
+ convertToInteger(dstBatch, srcBatch, i);
+ }
+ }
+ }
+ }
+
+ private:
+ void convertToInteger(ReadTypeBatch& dstBatch, const StringVectorBatch&
srcBatch,
+ uint64_t idx) {
+ int64_t longValue = 0;
+ try {
+ longValue = std::stoll(std::string(srcBatch.data[idx],
srcBatch.length[idx]));
+ } catch (...) {
Review Comment:
Should we explicitly catch `std::invalid_argument` and `std::out_of_range`
so that we can get a clearer error message?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]