[TRAFODION-1912] not replace \r with space, but move the return pointer to handle the last column as NULL case and this will not change the raw data, I feel a better fix
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/ffbe0913 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/ffbe0913 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/ffbe0913 Branch: refs/heads/master Commit: ffbe091344426b19e451dee687d6347a40bbf1f9 Parents: 64ef7db Author: Liu Ming <[email protected]> Authored: Thu Apr 7 05:10:21 2016 +0000 Committer: Liu Ming <[email protected]> Committed: Thu Apr 7 05:10:21 2016 +0000 ---------------------------------------------------------------------- core/sql/executor/ExHdfsScan.cpp | 21 ++++++++++++--------- core/sql/executor/ExHdfsScan.h | 18 ++++++++++++------ 2 files changed, 24 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/ffbe0913/core/sql/executor/ExHdfsScan.cpp ---------------------------------------------------------------------- diff --git a/core/sql/executor/ExHdfsScan.cpp b/core/sql/executor/ExHdfsScan.cpp index 6bed78e..1593899 100644 --- a/core/sql/executor/ExHdfsScan.cpp +++ b/core/sql/executor/ExHdfsScan.cpp @@ -367,6 +367,7 @@ ExWorkProcRetcode ExHdfsScanTcb::work() char cursorId[8]; HdfsFileInfo *hdfo = NULL; Lng32 openType = 0; + int changedLen = 0; while (!qparent_.down->isEmpty()) { @@ -720,7 +721,7 @@ ExWorkProcRetcode ExHdfsScanTcb::work() // Position in the hdfsScanBuffer_ to the // first record delimiter. hdfsBufNextRow_ = hdfs_strchr(hdfsScanBuffer_, - hdfsScanTdb().recordDelimiter_, hdfsScanBuffer_+trailingPrevRead_+ bytesRead_, checkRangeDelimiter_, hdfsScanTdb().getHiveScanMode()); + hdfsScanTdb().recordDelimiter_, hdfsScanBuffer_+trailingPrevRead_+ bytesRead_, checkRangeDelimiter_, hdfsScanTdb().getHiveScanMode(), &changedLen); // May be that the record is too long? Or data isn't ascii? // Or delimiter is incorrect. if (! hdfsBufNextRow_) @@ -739,7 +740,8 @@ ExWorkProcRetcode ExHdfsScanTcb::work() break; } - hdfsBufNextRow_ += 1; // point past record delimiter. + hdfsBufNextRow_ += 1 + changedLen; // point past record delimiter. + //add changedLen since hdfs_strchr will remove the pointer ahead to remove the \r } else hdfsBufNextRow_ = hdfsScanBuffer_; @@ -1384,6 +1386,7 @@ char * ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err, char *sourceData = hdfsBufNextRow_; char *sourceRowEnd = NULL; char *sourceColEnd = NULL; + int changedLen = 0; NABoolean isTrailingMissingColumn = FALSE; ExpTupleDesc * asciiSourceTD = hdfsScanTdb().workCriDesc_->getTupleDescriptor(hdfsScanTdb().asciiTuppIndex_); @@ -1395,8 +1398,8 @@ char * ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err, hdfsLoggingRow_ = hdfsBufNextRow_; if (asciiSourceTD->numAttrs() == 0) { - sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, checkRangeDelimiter_, mode); - hdfsLoggingRowEnd_ = sourceRowEnd; + sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, checkRangeDelimiter_, mode, &changedLen); + hdfsLoggingRowEnd_ = sourceRowEnd + changedLen; if (!sourceRowEnd) return NULL; @@ -1430,7 +1433,7 @@ char * ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err, attr = NULL; if (!isTrailingMissingColumn) { - sourceColEnd = hdfs_strchr(sourceData, rd, cd, sourceDataEnd, checkRangeDelimiter_, &rdSeen,mode); + sourceColEnd = hdfs_strchr(sourceData, rd, cd, sourceDataEnd, checkRangeDelimiter_, &rdSeen,mode, &changedLen); if (sourceColEnd == NULL) { if (rdSeen || (sourceRowEnd == NULL)) return NULL; @@ -1438,9 +1441,9 @@ char * ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err, return sourceRowEnd+1; } short len = 0; - len = sourceColEnd - sourceData; + len = sourceColEnd - sourceData ; if (rdSeen) { - sourceRowEnd = sourceColEnd; + sourceRowEnd = sourceColEnd + changedLen; hdfsLoggingRowEnd_ = sourceRowEnd; if ((endOfRequestedRange_) && (sourceRowEnd >= endOfRequestedRange_)) { @@ -1493,9 +1496,9 @@ char * ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err, // rowDelimiter is encountered // So try to find the record delimiter if (sourceRowEnd == NULL) { - sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, checkRangeDelimiter_,mode); + sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, checkRangeDelimiter_,mode, &changedLen); if (sourceRowEnd) { - hdfsLoggingRowEnd_ = sourceRowEnd; + hdfsLoggingRowEnd_ = sourceRowEnd + changedLen; //changedLen is when hdfs_strchr move the return pointer to remove the extra \r if ((endOfRequestedRange_) && (sourceRowEnd >= endOfRequestedRange_ )) { checkRangeDelimiter_ = TRUE; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/ffbe0913/core/sql/executor/ExHdfsScan.h ---------------------------------------------------------------------- diff --git a/core/sql/executor/ExHdfsScan.h b/core/sql/executor/ExHdfsScan.h index 7bc7d5e..3924473 100644 --- a/core/sql/executor/ExHdfsScan.h +++ b/core/sql/executor/ExHdfsScan.h @@ -432,10 +432,12 @@ protected: #define RANGE_DELIMITER '\002' -inline char *hdfs_strchr(char *s, int c, const char *end, NABoolean checkRangeDelimiter, int mode = 0) +inline char *hdfs_strchr(char *s, int c, const char *end, NABoolean checkRangeDelimiter, int mode , int *changedLen) { char *curr = (char *)s; int count=0; + //changedLen is lenght of \r which removed by this function + *changedLen = 0; if( (mode & HIVE_MODE_DOSFORMAT ) == 0) { while (curr < end) { @@ -455,9 +457,10 @@ inline char *hdfs_strchr(char *s, int c, const char *end, NABoolean checkRangeDe { if(count>0 && c == '\n') { - if(s[count-1] == '\r') s[count-1] = ' '; + if(s[count-1] == '\r') + *changedLen = 1; } - return curr; + return curr - *changedLen; } if (checkRangeDelimiter &&*curr == RANGE_DELIMITER) return NULL; @@ -469,20 +472,23 @@ inline char *hdfs_strchr(char *s, int c, const char *end, NABoolean checkRangeDe } -inline char *hdfs_strchr(char *s, int rd, int cd, const char *end, NABoolean checkRangeDelimiter, NABoolean *rdSeen, int mode = 0) +inline char *hdfs_strchr(char *s, int rd, int cd, const char *end, NABoolean checkRangeDelimiter, NABoolean *rdSeen, int mode, int* changedLen) { char *curr = (char *)s; int count = 0; + //changedLen is lenght of \r which removed by this function + *changedLen = 0; if( (mode & HIVE_MODE_DOSFORMAT)>0 ) //check outside the while loop to make it faster { while (curr < end) { if (*curr == rd) { if(count>0 && rd == '\n') { - if(s[count-1] == '\r') s[count-1] = ' '; + if(s[count-1] == '\r') + *changedLen = 1; } *rdSeen = TRUE; - return curr; + return curr - *changedLen; } else if (*curr == cd) {
