Repository: spark
Updated Branches:
  refs/heads/master d2b8b63b9 -> 12e740bba


[SPARK-22130][CORE] UTF8String.trim() scans " " twice

## What changes were proposed in this pull request?

This PR allows us to scan a string including only white space (e.g. `"     "`) 
once while the current implementation scans twice (right to left, and then left 
to right).

## How was this patch tested?

Existing test suites

Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com>

Closes #19355 from kiszk/SPARK-22130.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/12e740bb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/12e740bb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/12e740bb

Branch: refs/heads/master
Commit: 12e740bba110c6ab017c73c5ef940cce39dd45b7
Parents: d2b8b63
Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com>
Authored: Wed Sep 27 23:19:10 2017 +0900
Committer: hyukjinkwon <gurwls...@gmail.com>
Committed: Wed Sep 27 23:19:10 2017 +0900

----------------------------------------------------------------------
 .../java/org/apache/spark/unsafe/types/UTF8String.java   | 11 +++++------
 .../org/apache/spark/unsafe/types/UTF8StringSuite.java   |  3 +++
 2 files changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/12e740bb/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index ce4a06b..b0d0c44 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -498,17 +498,16 @@ public final class UTF8String implements 
Comparable<UTF8String>, Externalizable,
 
   public UTF8String trim() {
     int s = 0;
-    int e = this.numBytes - 1;
     // skip all of the space (0x20) in the left side
     while (s < this.numBytes && getByte(s) == 0x20) s++;
-    // skip all of the space (0x20) in the right side
-    while (e >= 0 && getByte(e) == 0x20) e--;
-    if (s > e) {
+    if (s == this.numBytes) {
       // empty string
       return EMPTY_UTF8;
-    } else {
-      return copyUTF8String(s, e);
     }
+    // skip all of the space (0x20) in the right side
+    int e = this.numBytes - 1;
+    while (e > s && getByte(e) == 0x20) e--;
+    return copyUTF8String(s, e);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/12e740bb/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
----------------------------------------------------------------------
diff --git 
a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
 
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 7b03d2c..9b303fa 100644
--- 
a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ 
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -222,10 +222,13 @@ public class UTF8StringSuite {
 
   @Test
   public void trims() {
+    assertEquals(fromString("1"), fromString("1").trim());
+
     assertEquals(fromString("hello"), fromString("  hello ").trim());
     assertEquals(fromString("hello "), fromString("  hello ").trimLeft());
     assertEquals(fromString("  hello"), fromString("  hello ").trimRight());
 
+    assertEquals(EMPTY_UTF8, EMPTY_UTF8.trim());
     assertEquals(EMPTY_UTF8, fromString("  ").trim());
     assertEquals(EMPTY_UTF8, fromString("  ").trimLeft());
     assertEquals(EMPTY_UTF8, fromString("  ").trimRight());


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to