ffacs commented on code in PR #1500:
URL: https://github.com/apache/orc/pull/1500#discussion_r1221864975


##########
c++/src/ConvertColumnReader.cc:
##########
@@ -186,10 +186,327 @@ namespace orc {
     }
   };
 
+  class ConvertToStringVariantColumnReader : public ConvertColumnReader {
+   public:
+    ConvertToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
+
+    virtual size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t 
numValues) = 0;
+
+   protected:
+    std::vector<std::string> strBuffer;
+  };
+
+  void ConvertToStringVariantColumnReader::next(ColumnVectorBatch& rowBatch, 
uint64_t numValues,
+                                                char* notNull) {
+    ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+    // cache converted string in the buffer
+    auto totalLength = convertToStrBuffer(rowBatch, numValues);
+
+    // contact string values to blob buffer of vector batch
+    auto& dstBatch = *SafeCastBatchTo<StringVectorBatch*>(&rowBatch);
+    dstBatch.blob.resize(totalLength);
+    char* blob = dstBatch.blob.data();
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+        const auto size = strBuffer[i].size();
+        ::memcpy(blob, strBuffer[i].c_str(), size);
+        dstBatch.data[i] = blob;
+        dstBatch.length[i] = static_cast<int32_t>(size);
+        blob += size;
+      }
+    }
+    strBuffer.clear();
+  }
+
+  class BooleanToStringVariantColumnReader : public 
ConvertToStringVariantColumnReader {
+   public:
+    BooleanToStringVariantColumnReader(const Type& _readType, const Type& 
fileType,
+                                       StripeStreams& stripe, bool 
_throwOnOverflow)
+        : ConvertToStringVariantColumnReader(_readType, fileType, stripe, 
_throwOnOverflow) {
+      trueValue = "TRUE";
+      falseValue = "FALSE";
+      if (readType.getKind() != STRING) {
+        if (readType.getMaximumLength() < 5) {
+          throw SchemaEvolutionError("Invalid maximum length for boolean type: 
" +
+                                     
std::to_string(readType.getMaximumLength()));
+        }
+        if (readType.getKind() == CHAR) {
+          trueValue.resize(readType.getMaximumLength(), ' ');
+          falseValue.resize(readType.getMaximumLength(), ' ');
+        }
+      }
+    }
+
+    size_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) 
override;
+
+    private:
+      std::string trueValue;

Review Comment:
   It can't be const static because type 'CHAR' may modify them.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to