This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 03346cf  HIVE-22784 : Boundary condition to check if there is nothing 
to truncate in StringExpr functions (Ramesh Kumar via Gopal V)
03346cf is described below

commit 03346cf6ce4cf69b12f82c831a1675321591afde
Author: Ramesh Kumar Thangarajan <[email protected]>
AuthorDate: Wed Jan 29 11:10:27 2020 -0800

    HIVE-22784 : Boundary condition to check if there is nothing to truncate in 
StringExpr functions (Ramesh Kumar via Gopal V)
    
    Signed-off-by: Ashutosh Chauhan <[email protected]>
---
 .../expressions/TestVectorStringExpressions.java   |   6 +-
 .../ql/exec/vector/expressions/StringExpr.java     | 138 ++++-----------------
 2 files changed, 26 insertions(+), 118 deletions(-)

diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
index 5ab5610..6eed84e 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
@@ -1818,6 +1818,11 @@ public class TestVectorStringExpressions {
       resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, 
blanksLeft.length, 5);
       Assert.assertTrue(resultLen == blanksLeft.length);
 
+      // Truncate everything and nothing to trim
+      Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, 
blanksLeft.length) == 5);
+      resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, 
blanksLeft.length, 0);
+      Assert.assertTrue(resultLen == 0);
+
       // Simple trims.
       Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, 
blanksRight.length) == 5);
       resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0, 
blanksRight.length, 5);
@@ -2211,7 +2216,6 @@ public class TestVectorStringExpressions {
       resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17, 
11);
       Assert.assertTrue(resultLen == 12);
       Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 
resultLen) == 8);
-
   }
 
   @Test
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
index cc485ff..b6d3184 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
@@ -200,8 +200,12 @@ public class StringExpr {
    * return the new byte length.
    */
   public static int truncate(byte[] bytes, int start, int length, int 
maxLength) {
-    int end = start + length;
+    if (length <= maxLength) {
+      // no change in length
+      return length;
+    }
 
+    int end = start + length;
     // count characters forward
     int j = start;
     int charCount = 0;
@@ -223,24 +227,10 @@ public class StringExpr {
    * place the result into element i of a vector.
    */
   public static void truncate(BytesColumnVector outV, int i, byte[] bytes, int 
start, int length, int maxLength) {
-    int end = start + length;
-
-    // count characters forward
-    int j = start;
-    int charCount = 0;
-    while(j < end) {
-      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
-      if ((bytes[j] & 0xc0) != 0x80) {
-        if (charCount == maxLength) {
-          break;
-        }
-        ++charCount;
-      }
-      j++;
-    }
+    final int newLength = truncate(bytes, start, length, maxLength);
 
     // set output vector
-    outV.setVal(i, bytes, start, (j - start));
+    outV.setVal(i, bytes, start, newLength);
   }
 
   /*
@@ -248,25 +238,12 @@ public class StringExpr {
    * return a byte array with only truncated bytes.
    */
   public static byte[] truncateScalar(byte[] bytes, int maxLength) {
-    int end = bytes.length;
+    int newLength = truncate(bytes, 0, bytes.length, maxLength);
 
-    // count characters forward
-    int j = 0;
-    int charCount = 0;
-    while(j < end) {
-      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
-      if ((bytes[j] & 0xc0) != 0x80) {
-        if (charCount == maxLength) {
-          break;
-        }
-        ++charCount;
-      }
-      j++;
-    }
-    if (j == end) {
+    if (newLength == bytes.length) {
       return bytes;
     } else {
-      return Arrays.copyOf(bytes, j);
+      return Arrays.copyOf(bytes, newLength);
     }
   }
 
@@ -275,36 +252,15 @@ public class StringExpr {
    * return the new byte length.
    */
   public static int rightTrimAndTruncate(byte[] bytes, int start, int length, 
int maxLength) {
-    int end = start + length;
+    int newLength = truncate(bytes, start, length, maxLength);
 
-    // count characters forward and watch for final run of pads
-    int j = start;
-    int charCount = 0;
-    int padRunStart = -1;
-    while(j < end) {
-      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
-      if ((bytes[j] & 0xc0) != 0x80) {
-        if (charCount == maxLength) {
-          break;
-        }
-        if (bytes[j] == 0x20) {
-          if (padRunStart == -1) {
-            padRunStart = j;
-          }
-        } else {
-          padRunStart = -1;
-        }
-        ++charCount;
-      } else {
-        padRunStart = -1;
+    for (int i = start + newLength - 1; i >= start; i--) {
+      if (bytes[i] != 0x20) {
+        return i - start + 1;
       }
-      j++;
-    }
-    if (padRunStart != -1) {
-      return (padRunStart - start);
-    } else {
-      return (j - start);
     }
+
+    return 0;
   }
 
   /*
@@ -312,37 +268,10 @@ public class StringExpr {
    * place the result into element i of a vector.
    */
   public static void rightTrimAndTruncate(BytesColumnVector outV, int i, 
byte[] bytes, int start, int length, int maxLength) {
-    int end = start + length;
+    final int newLength = rightTrimAndTruncate(bytes, start, length, 
maxLength);
 
-    // count characters forward and watch for final run of pads
-    int j = start;
-    int charCount = 0;
-    int padRunStart = -1;
-    while(j < end) {
-      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
-      if ((bytes[j] & 0xc0) != 0x80) {
-        if (charCount == maxLength) {
-          break;
-        }
-        if (bytes[j] == 0x20) {
-          if (padRunStart == -1) {
-            padRunStart = j;
-          }
-        } else {
-          padRunStart = -1;
-        }
-        ++charCount;
-      } else {
-        padRunStart = -1;
-      }
-      j++;
-    }
     // set output vector
-    if (padRunStart != -1) {
-      outV.setVal(i, bytes, start, (padRunStart - start));
-    } else {
-      outV.setVal(i, bytes, start, (j - start) );
-    }
+    outV.setVal(i, bytes, start, newLength);
   }
 
   /*
@@ -350,37 +279,12 @@ public class StringExpr {
    * return a byte array with only the trimmed and truncated bytes.
    */
   public static byte[] rightTrimAndTruncateScalar(byte[] bytes, int maxLength) 
{
-    int end = bytes.length;
+    int newLength = rightTrimAndTruncate(bytes, 0, bytes.length, maxLength);
 
-    // count characters forward and watch for final run of pads
-    int j = 0;
-    int charCount = 0;
-    int padRunStart = -1;
-    while(j < end) {
-      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
-      if ((bytes[j] & 0xc0) != 0x80) {
-        if (charCount == maxLength) {
-          break;
-        }
-        if (bytes[j] == 0x20) {
-          if (padRunStart == -1) {
-            padRunStart = j;
-          }
-        } else {
-          padRunStart = -1;
-        }
-        ++charCount;
-      } else {
-        padRunStart = -1;
-      }
-      j++;
-    }
-    if (padRunStart != -1) {
-      return Arrays.copyOf(bytes, padRunStart);
-    } else if (j == end) {
+    if (newLength == bytes.length) {
       return bytes;
     } else {
-      return Arrays.copyOf(bytes, j);
+      return Arrays.copyOf(bytes, newLength);
     }
   }
 

Reply via email to