Repository: hadoop Updated Branches: refs/heads/branch-2.6 bddc6cd3f -> 534cdc842
HADOOP-13192. org.apache.hadoop.util.LineReader cannot handle multibyte delimiters correctly. Contributed by binde. (cherry picked from commit fc6b50cc574e144fd314dea6c11987c6a384bfa6) (cherry picked from commit 39ea0891d2b1369ec8c1ea4312489309e1a85227) (cherry picked from commit e19cd05a878aafc94cc0de36ac6638d7977c6e6e) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/534cdc84 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/534cdc84 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/534cdc84 Branch: refs/heads/branch-2.6 Commit: 534cdc8420ce0d2d63ac365505e963511be481dc Parents: bddc6cd Author: Akira Ajisaka <[email protected]> Authored: Mon Jun 20 17:07:26 2016 +0900 Committer: Sangjin Lee <[email protected]> Committed: Tue Sep 13 20:35:37 2016 -0700 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 3 + .../java/org/apache/hadoop/util/LineReader.java | 5 +- .../org/apache/hadoop/util/TestLineReader.java | 59 ++++++++++++-------- 3 files changed, 44 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/534cdc84/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 3e4a74e..4ddcd4d 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -51,6 +51,9 @@ Release 2.6.5 - UNRELEASED HADOOP-13052. ChecksumFileSystem mishandles crc file permissions. (Daryn Sharp via kihwal) + HADOOP-13192. org.apache.hadoop.util.LineReader cannot handle multibyte + delimiters correctly. (binde via aajisaka) + Release 2.6.4 - 2016-02-11 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/534cdc84/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java index 153953d..e20a7c1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java @@ -318,7 +318,10 @@ public class LineReader implements Closeable { break; } } else if (delPosn != 0) { - bufferPosn--; + bufferPosn -= delPosn; + if(bufferPosn < -1) { + bufferPosn = -1; + } delPosn = 0; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/534cdc84/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java index 9d909bc..52f8b9f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java @@ -58,7 +58,7 @@ public class TestLineReader { * Check Condition * In the second key value pair, the value should contain * "</" from currentToken and - * "id>" from next token + * "id>" from next token */ Delimiter="</entity>"; @@ -80,20 +80,21 @@ public class TestLineReader { String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken; int BufferSize=64 * 1024; - int numberOfCharToFillTheBuffer=BufferSize-CurrentBufferTailToken.length(); + int numberOfCharToFillTheBuffer = + BufferSize - CurrentBufferTailToken.length(); StringBuilder fillerString=new StringBuilder(); - for (int i=0;i<numberOfCharToFillTheBuffer;i++) { + for (int i=0; i<numberOfCharToFillTheBuffer; i++) { fillerString.append('a'); // char 'a' as a filler for the test string } TestData = fillerString + TestPartOfInput; lineReader = new LineReader( - new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes()); + new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); line = new Text(); - lineReader.readLine(line); - Assert.assertEquals(fillerString.toString(),line.toString()); + lineReader.readLine(line); + Assert.assertEquals(fillerString.toString(), line.toString()); lineReader.readLine(line); Assert.assertEquals(Expected, line.toString()); @@ -107,35 +108,49 @@ public class TestLineReader { Delimiter = "record"; StringBuilder TestStringBuilder = new StringBuilder(); - TestStringBuilder.append(Delimiter+"Kerala "); - TestStringBuilder.append(Delimiter+"Bangalore"); - TestStringBuilder.append(Delimiter+" North Korea"); - TestStringBuilder.append(Delimiter+Delimiter+ + TestStringBuilder.append(Delimiter + "Kerala "); + TestStringBuilder.append(Delimiter + "Bangalore"); + TestStringBuilder.append(Delimiter + " North Korea"); + TestStringBuilder.append(Delimiter + Delimiter+ "Guantanamo"); - TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're' + TestStringBuilder.append(Delimiter + "ecord" + + "recor" + "core"); //~EOF with 're' TestData=TestStringBuilder.toString(); lineReader = new LineReader( - new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes()); - - lineReader.readLine(line); - Assert.assertEquals("",line.toString()); - lineReader.readLine(line); - Assert.assertEquals("Kerala ",line.toString()); + new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); + + lineReader.readLine(line); + Assert.assertEquals("", line.toString()); + lineReader.readLine(line); + Assert.assertEquals("Kerala ", line.toString()); lineReader.readLine(line); - Assert.assertEquals("Bangalore",line.toString()); + Assert.assertEquals("Bangalore", line.toString()); lineReader.readLine(line); - Assert.assertEquals(" North Korea",line.toString()); + Assert.assertEquals(" North Korea", line.toString()); lineReader.readLine(line); - Assert.assertEquals("",line.toString()); + Assert.assertEquals("", line.toString()); lineReader.readLine(line); - Assert.assertEquals("Guantanamo",line.toString()); + Assert.assertEquals("Guantanamo", line.toString()); lineReader.readLine(line); - Assert.assertEquals(("ecord"+"recor"+"core"),line.toString()); + Assert.assertEquals(("ecord"+"recor"+"core"), line.toString()); + + // Test 3 + // The test scenario is such that, + // aaaabccc split by aaab + TestData = "aaaabccc"; + Delimiter = "aaab"; + lineReader = new LineReader( + new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); + + lineReader.readLine(line); + Assert.assertEquals("a", line.toString()); + lineReader.readLine(line); + Assert.assertEquals("ccc", line.toString()); } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
