ffacs commented on code in PR #1949:
URL: https://github.com/apache/orc/pull/1949#discussion_r1664221402
##########
c++/src/ConvertColumnReader.cc:
##########
@@ -801,6 +821,196 @@ namespace orc {
}
};
+ class StringVariantToTimestampColumnReader : public
ConvertToTimestampColumnReader {
+ public:
+ StringVariantToTimestampColumnReader(const Type& readType, const Type&
fileType,
+ StripeStreams& stripe, bool
throwOnOverflow)
+ : ConvertToTimestampColumnReader(readType, fileType, stripe,
throwOnOverflow) {}
+
+ void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull)
override {
+ ConvertToTimestampColumnReader::next(rowBatch, numValues, notNull);
+
+ const auto& srcBatch = *SafeCastBatchTo<const
StringVectorBatch*>(data.get());
+ auto& dstBatch = *SafeCastBatchTo<TimestampVectorBatch*>(&rowBatch);
+
+ for (uint64_t i = 0; i < numValues; ++i) {
+ if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+ convertToTimestamp(dstBatch, i, std::string(srcBatch.data[i],
srcBatch.length[i]));
+ }
+ }
+ }
+
+ private:
+ // Algorithm: http://howardhinnant.github.io/date_algorithms.html
+ int64_t days_from_epoch(int32_t y, int32_t m, int32_t d) {
+ y -= m <= 2;
+ int32_t era = y / 400;
+ int32_t yoe = y - era * 400; // [0,
399]
+ int32_t doy = (153 * (m + (m > 2 ? -3 : 9)) + 2) / 5 + d - 1; // [0,
365]
+ int32_t doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; // [0,
146096]
+ return 1ll * era * 146097 + doe - 719468;
+ }
+
+ std::optional<std::pair<int64_t, int64_t>> tryBestToParseFromString(
+ const std::string& timeStr) {
+ // timestamp_instant: yyyy-mm-dd hh:mm:ss[.xxx] timezone
+ // timestamp : yyyy-mm-dd hh:mm:ss[.xxx]
+ int32_t year, month, day, hour, min, sec, nanos = 0;
+ int32_t matched = std::sscanf(timeStr.c_str(), "%4d-%2d-%2d
%2d:%2d:%2d.%d", &year, &month,
Review Comment:
https://github.com/apache/orc/blob/517776f3e7f8745c365cc0fc69d7cabbdc34d714/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java#L1248-L1275
If I didn't misunderstood, the format for converting is same with java code.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]