[TRAFODION-1912] not replace \r with space, but move the return pointer
to handle the last column as NULL case
and this will not change the raw data, I feel a better fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/ffbe0913
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/ffbe0913
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/ffbe0913

Branch: refs/heads/master
Commit: ffbe091344426b19e451dee687d6347a40bbf1f9
Parents: 64ef7db
Author: Liu Ming <[email protected]>
Authored: Thu Apr 7 05:10:21 2016 +0000
Committer: Liu Ming <[email protected]>
Committed: Thu Apr 7 05:10:21 2016 +0000

----------------------------------------------------------------------
 core/sql/executor/ExHdfsScan.cpp | 21 ++++++++++++---------
 core/sql/executor/ExHdfsScan.h   | 18 ++++++++++++------
 2 files changed, 24 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/ffbe0913/core/sql/executor/ExHdfsScan.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExHdfsScan.cpp b/core/sql/executor/ExHdfsScan.cpp
index 6bed78e..1593899 100644
--- a/core/sql/executor/ExHdfsScan.cpp
+++ b/core/sql/executor/ExHdfsScan.cpp
@@ -367,6 +367,7 @@ ExWorkProcRetcode ExHdfsScanTcb::work()
   char cursorId[8];
   HdfsFileInfo *hdfo = NULL;
   Lng32 openType = 0;
+  int changedLen = 0;
 
   while (!qparent_.down->isEmpty())
     {
@@ -720,7 +721,7 @@ ExWorkProcRetcode ExHdfsScanTcb::work()
                // Position in the hdfsScanBuffer_ to the
                // first record delimiter.  
                hdfsBufNextRow_ = hdfs_strchr(hdfsScanBuffer_,
-                                         hdfsScanTdb().recordDelimiter_, 
hdfsScanBuffer_+trailingPrevRead_+ bytesRead_, checkRangeDelimiter_, 
hdfsScanTdb().getHiveScanMode());
+                                         hdfsScanTdb().recordDelimiter_, 
hdfsScanBuffer_+trailingPrevRead_+ bytesRead_, checkRangeDelimiter_, 
hdfsScanTdb().getHiveScanMode(), &changedLen);
                // May be that the record is too long? Or data isn't ascii?
                // Or delimiter is incorrect.
                if (! hdfsBufNextRow_)
@@ -739,7 +740,8 @@ ExWorkProcRetcode ExHdfsScanTcb::work()
                    break;
                  }
                
-               hdfsBufNextRow_ += 1;   // point past record delimiter.
+               hdfsBufNextRow_ += 1 + changedLen;   // point past record 
delimiter.
+               //add changedLen since hdfs_strchr will remove the pointer 
ahead to remove the \r
              }
            else
              hdfsBufNextRow_ = hdfsScanBuffer_;
@@ -1384,6 +1386,7 @@ char * 
ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err,
   char *sourceData = hdfsBufNextRow_;
   char *sourceRowEnd = NULL; 
   char *sourceColEnd = NULL;
+  int changedLen = 0;
   NABoolean isTrailingMissingColumn = FALSE;
   ExpTupleDesc * asciiSourceTD =
      
hdfsScanTdb().workCriDesc_->getTupleDescriptor(hdfsScanTdb().asciiTuppIndex_);
@@ -1395,8 +1398,8 @@ char * 
ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err,
   hdfsLoggingRow_ = hdfsBufNextRow_;
   if (asciiSourceTD->numAttrs() == 0)
   {
-     sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, 
checkRangeDelimiter_, mode);
-     hdfsLoggingRowEnd_  = sourceRowEnd;
+     sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, 
checkRangeDelimiter_, mode, &changedLen);
+     hdfsLoggingRowEnd_  = sourceRowEnd + changedLen;
 
      if (!sourceRowEnd)
        return NULL; 
@@ -1430,7 +1433,7 @@ char * 
ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err,
         attr = NULL;
  
       if (!isTrailingMissingColumn) {
-         sourceColEnd = hdfs_strchr(sourceData, rd, cd, sourceDataEnd, 
checkRangeDelimiter_, &rdSeen,mode);
+         sourceColEnd = hdfs_strchr(sourceData, rd, cd, sourceDataEnd, 
checkRangeDelimiter_, &rdSeen,mode, &changedLen);
          if (sourceColEnd == NULL) {
             if (rdSeen || (sourceRowEnd == NULL))
                return NULL;
@@ -1438,9 +1441,9 @@ char * 
ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err,
                return sourceRowEnd+1;
          }
          short len = 0;
-        len = sourceColEnd - sourceData;
+        len = sourceColEnd - sourceData ;
          if (rdSeen) {
-            sourceRowEnd = sourceColEnd; 
+            sourceRowEnd = sourceColEnd + changedLen; 
             hdfsLoggingRowEnd_  = sourceRowEnd;
             if ((endOfRequestedRange_) && 
                    (sourceRowEnd >= endOfRequestedRange_)) {
@@ -1493,9 +1496,9 @@ char * 
ExHdfsScanTcb::extractAndTransformAsciiSourceToSqlRow(int &err,
   // rowDelimiter is encountered
   // So try to find the record delimiter
   if (sourceRowEnd == NULL) {
-     sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, 
checkRangeDelimiter_,mode);
+     sourceRowEnd = hdfs_strchr(sourceData, rd, sourceDataEnd, 
checkRangeDelimiter_,mode, &changedLen);
      if (sourceRowEnd) {
-        hdfsLoggingRowEnd_  = sourceRowEnd;
+        hdfsLoggingRowEnd_  = sourceRowEnd + changedLen; //changedLen is when 
hdfs_strchr move the return pointer to remove the extra \r
         if ((endOfRequestedRange_) &&
               (sourceRowEnd >= endOfRequestedRange_ )) {
            checkRangeDelimiter_ = TRUE;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/ffbe0913/core/sql/executor/ExHdfsScan.h
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExHdfsScan.h b/core/sql/executor/ExHdfsScan.h
index 7bc7d5e..3924473 100644
--- a/core/sql/executor/ExHdfsScan.h
+++ b/core/sql/executor/ExHdfsScan.h
@@ -432,10 +432,12 @@ protected:
 
 #define RANGE_DELIMITER '\002'
 
-inline char *hdfs_strchr(char *s, int c, const char *end, NABoolean 
checkRangeDelimiter, int mode = 0)
+inline char *hdfs_strchr(char *s, int c, const char *end, NABoolean 
checkRangeDelimiter, int mode , int *changedLen)
 {
   char *curr = (char *)s;
   int count=0;
+  //changedLen is lenght of \r which removed by this function
+  *changedLen = 0;
   if( (mode & HIVE_MODE_DOSFORMAT ) == 0)
   {
    while (curr < end) {
@@ -455,9 +457,10 @@ inline char *hdfs_strchr(char *s, int c, const char *end, 
NABoolean checkRangeDe
      {
          if(count>0 && c == '\n')
          {
-           if(s[count-1] == '\r') s[count-1] = ' '; 
+           if(s[count-1] == '\r') 
+             *changedLen = 1;
          }
-         return curr;
+         return curr - *changedLen;
       }
       if (checkRangeDelimiter &&*curr == RANGE_DELIMITER)
          return NULL;
@@ -469,20 +472,23 @@ inline char *hdfs_strchr(char *s, int c, const char *end, 
NABoolean checkRangeDe
 }
 
 
-inline char *hdfs_strchr(char *s, int rd, int cd, const char *end, NABoolean 
checkRangeDelimiter, NABoolean *rdSeen, int mode = 0)
+inline char *hdfs_strchr(char *s, int rd, int cd, const char *end, NABoolean 
checkRangeDelimiter, NABoolean *rdSeen, int mode, int* changedLen)
 {
   char *curr = (char *)s;
   int count = 0;
+  //changedLen is lenght of \r which removed by this function
+  *changedLen = 0;
   if( (mode & HIVE_MODE_DOSFORMAT)>0 )  //check outside the while loop to make 
it faster
   {
     while (curr < end) {
       if (*curr == rd) {
          if(count>0 && rd == '\n')
          {
-             if(s[count-1] == '\r') s[count-1] = ' ';
+             if(s[count-1] == '\r') 
+               *changedLen = 1;
          }
          *rdSeen = TRUE;
-         return curr;
+         return curr - *changedLen;
       }
       else
       if (*curr == cd) {

Reply via email to