This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 03346cf HIVE-22784 : Boundary condition to check if there is nothing
to truncate in StringExpr functions (Ramesh Kumar via Gopal V)
03346cf is described below
commit 03346cf6ce4cf69b12f82c831a1675321591afde
Author: Ramesh Kumar Thangarajan <[email protected]>
AuthorDate: Wed Jan 29 11:10:27 2020 -0800
HIVE-22784 : Boundary condition to check if there is nothing to truncate in
StringExpr functions (Ramesh Kumar via Gopal V)
Signed-off-by: Ashutosh Chauhan <[email protected]>
---
.../expressions/TestVectorStringExpressions.java | 6 +-
.../ql/exec/vector/expressions/StringExpr.java | 138 ++++-----------------
2 files changed, 26 insertions(+), 118 deletions(-)
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
index 5ab5610..6eed84e 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
@@ -1818,6 +1818,11 @@ public class TestVectorStringExpressions {
resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0,
blanksLeft.length, 5);
Assert.assertTrue(resultLen == blanksLeft.length);
+ // Truncate everything and nothing to trim
+ Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0,
blanksLeft.length) == 5);
+ resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0,
blanksLeft.length, 0);
+ Assert.assertTrue(resultLen == 0);
+
// Simple trims.
Assert.assertTrue(StringExpr.characterCount(blanksRight, 0,
blanksRight.length) == 5);
resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0,
blanksRight.length, 5);
@@ -2211,7 +2216,6 @@ public class TestVectorStringExpressions {
resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17,
11);
Assert.assertTrue(resultLen == 12);
Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7,
resultLen) == 8);
-
}
@Test
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
index cc485ff..b6d3184 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
@@ -200,8 +200,12 @@ public class StringExpr {
* return the new byte length.
*/
public static int truncate(byte[] bytes, int start, int length, int
maxLength) {
- int end = start + length;
+ if (length <= maxLength) {
+ // no change in length
+ return length;
+ }
+ int end = start + length;
// count characters forward
int j = start;
int charCount = 0;
@@ -223,24 +227,10 @@ public class StringExpr {
* place the result into element i of a vector.
*/
public static void truncate(BytesColumnVector outV, int i, byte[] bytes, int
start, int length, int maxLength) {
- int end = start + length;
-
- // count characters forward
- int j = start;
- int charCount = 0;
- while(j < end) {
- // UTF-8 continuation bytes have 2 high bits equal to 0x80.
- if ((bytes[j] & 0xc0) != 0x80) {
- if (charCount == maxLength) {
- break;
- }
- ++charCount;
- }
- j++;
- }
+ final int newLength = truncate(bytes, start, length, maxLength);
// set output vector
- outV.setVal(i, bytes, start, (j - start));
+ outV.setVal(i, bytes, start, newLength);
}
/*
@@ -248,25 +238,12 @@ public class StringExpr {
* return a byte array with only truncated bytes.
*/
public static byte[] truncateScalar(byte[] bytes, int maxLength) {
- int end = bytes.length;
+ int newLength = truncate(bytes, 0, bytes.length, maxLength);
- // count characters forward
- int j = 0;
- int charCount = 0;
- while(j < end) {
- // UTF-8 continuation bytes have 2 high bits equal to 0x80.
- if ((bytes[j] & 0xc0) != 0x80) {
- if (charCount == maxLength) {
- break;
- }
- ++charCount;
- }
- j++;
- }
- if (j == end) {
+ if (newLength == bytes.length) {
return bytes;
} else {
- return Arrays.copyOf(bytes, j);
+ return Arrays.copyOf(bytes, newLength);
}
}
@@ -275,36 +252,15 @@ public class StringExpr {
* return the new byte length.
*/
public static int rightTrimAndTruncate(byte[] bytes, int start, int length,
int maxLength) {
- int end = start + length;
+ int newLength = truncate(bytes, start, length, maxLength);
- // count characters forward and watch for final run of pads
- int j = start;
- int charCount = 0;
- int padRunStart = -1;
- while(j < end) {
- // UTF-8 continuation bytes have 2 high bits equal to 0x80.
- if ((bytes[j] & 0xc0) != 0x80) {
- if (charCount == maxLength) {
- break;
- }
- if (bytes[j] == 0x20) {
- if (padRunStart == -1) {
- padRunStart = j;
- }
- } else {
- padRunStart = -1;
- }
- ++charCount;
- } else {
- padRunStart = -1;
+ for (int i = start + newLength - 1; i >= start; i--) {
+ if (bytes[i] != 0x20) {
+ return i - start + 1;
}
- j++;
- }
- if (padRunStart != -1) {
- return (padRunStart - start);
- } else {
- return (j - start);
}
+
+ return 0;
}
/*
@@ -312,37 +268,10 @@ public class StringExpr {
* place the result into element i of a vector.
*/
public static void rightTrimAndTruncate(BytesColumnVector outV, int i,
byte[] bytes, int start, int length, int maxLength) {
- int end = start + length;
+ final int newLength = rightTrimAndTruncate(bytes, start, length,
maxLength);
- // count characters forward and watch for final run of pads
- int j = start;
- int charCount = 0;
- int padRunStart = -1;
- while(j < end) {
- // UTF-8 continuation bytes have 2 high bits equal to 0x80.
- if ((bytes[j] & 0xc0) != 0x80) {
- if (charCount == maxLength) {
- break;
- }
- if (bytes[j] == 0x20) {
- if (padRunStart == -1) {
- padRunStart = j;
- }
- } else {
- padRunStart = -1;
- }
- ++charCount;
- } else {
- padRunStart = -1;
- }
- j++;
- }
// set output vector
- if (padRunStart != -1) {
- outV.setVal(i, bytes, start, (padRunStart - start));
- } else {
- outV.setVal(i, bytes, start, (j - start) );
- }
+ outV.setVal(i, bytes, start, newLength);
}
/*
@@ -350,37 +279,12 @@ public class StringExpr {
* return a byte array with only the trimmed and truncated bytes.
*/
public static byte[] rightTrimAndTruncateScalar(byte[] bytes, int maxLength)
{
- int end = bytes.length;
+ int newLength = rightTrimAndTruncate(bytes, 0, bytes.length, maxLength);
- // count characters forward and watch for final run of pads
- int j = 0;
- int charCount = 0;
- int padRunStart = -1;
- while(j < end) {
- // UTF-8 continuation bytes have 2 high bits equal to 0x80.
- if ((bytes[j] & 0xc0) != 0x80) {
- if (charCount == maxLength) {
- break;
- }
- if (bytes[j] == 0x20) {
- if (padRunStart == -1) {
- padRunStart = j;
- }
- } else {
- padRunStart = -1;
- }
- ++charCount;
- } else {
- padRunStart = -1;
- }
- j++;
- }
- if (padRunStart != -1) {
- return Arrays.copyOf(bytes, padRunStart);
- } else if (j == end) {
+ if (newLength == bytes.length) {
return bytes;
} else {
- return Arrays.copyOf(bytes, j);
+ return Arrays.copyOf(bytes, newLength);
}
}