Github user sureshsubbiah commented on a diff in the pull request:
https://github.com/apache/trafodion/pull/1417#discussion_r167412178
--- Diff: core/sql/executor/ExHdfsScan.cpp ---
@@ -118,15 +119,39 @@ ExHdfsScanTcb::ExHdfsScanTcb(
, dataModCheckDone_(FALSE)
, loggingErrorDiags_(NULL)
, loggingFileName_(NULL)
+ , hdfsClient_(NULL)
+ , hdfsScan_(NULL)
+ , hdfsStats_(NULL)
, hdfsFileInfoListAsArray_(glob->getDefaultHeap(),
hdfsScanTdb.getHdfsFileInfoList()->numEntries())
{
Space * space = (glob ? glob->getSpace() : 0);
CollHeap * heap = (glob ? glob->getDefaultHeap() : 0);
+ useLibhdfsScan_ = hdfsScanTdb.getUseLibhdfsScan();
+ if (isSequenceFile())
+ useLibhdfsScan_ = TRUE;
lobGlob_ = NULL;
- const int readBufSize = (Int32)hdfsScanTdb.hdfsBufSize_;
- hdfsScanBuffer_ = new(space) char[ readBufSize + 1 ];
- hdfsScanBuffer_[readBufSize] = '\0';
-
+ hdfsScanBufMaxSize_ = hdfsScanTdb.hdfsBufSize_;
+ headRoom_ = (Int32)hdfsScanTdb.rangeTailIOSize_;
+
+ if (useLibhdfsScan_) {
+ hdfsScanBuffer_ = new(heap) char[ hdfsScanBufMaxSize_ + 1 ];
+ hdfsScanBuffer_[hdfsScanBufMaxSize_] = '\0';
+ } else {
+ hdfsScanBufBacking_[0] = new (heap) BYTE[hdfsScanBufMaxSize_ + 2 *
(headRoom_)];
--- End diff --
Could we please test this logic with extremely wide rows. Currently we have
a limitation that the maximum row size cannot be larger than
hdfsScanBufMaxSize_. It will be a good test to have 10 rows of this size, say
in 2 or more files and check if we can process it. Logic seems good, this is a
test suggestion. As you know scanBufNaxSize can re reduced with a cqd, to avoid
having to deal with rows that are several MBs wide.
---