ffacs commented on code in PR #1500:
URL: https://github.com/apache/orc/pull/1500#discussion_r1209453953


##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,289 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      // cache converted string in the buffer
+      auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+      // contact string values to blob buffer of vector batch
+      auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+      dstBatch.blob.resize(totalLength);
+      char* blob = dstBatch.blob.data();
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          const auto size = strBuffer[i].size();
+          ::memcpy(blob, strBuffer[i].c_str(), size);
+          dstBatch.data[i] = blob;
+          dstBatch.length[i] = static_cast<int32_t>(size);
+          blob += size;
+        }
+      }
+      strBuffer.clear();
+    }
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {}
+
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+  };
+
+  size_t 
BooleanToStringVariantColumnReader::convertToStrBuffer(ColumnVectorBatch& 
rowBatch,
+                                                                uint64_t 
numValues) {
+    size_t size = 0;
+    strBuffer.resize(numValues);
+    const auto& srcBatch = *SafeCastBatchTo<const 
BooleanVectorBatch*>(data.get());
+    std::string trueValue = "TRUE";

Review Comment:
   > Does it follow the Java implementation?
   
   
https://github.com/apache/orc/blob/ec2ea9c6aff8b8515452df651f08695639c18cbb/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java#L1118-L1133
   
https://github.com/apache/orc/blob/ec2ea9c6aff8b8515452df651f08695639c18cbb/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java#L123-L146
   I think so, but i am not good at java, please take a look



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to