Repository: nifi Updated Branches: refs/heads/master 095c04eda -> ec868362f
NIFI-3495 fixed the index issue with TextLineDemarcator This closes #1518. Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/ec868362 Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/ec868362 Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/ec868362 Branch: refs/heads/master Commit: ec868362f3317a79b6518c780af1b9debb843f32 Parents: 095c04e Author: Oleg Zhurakousky <[email protected]> Authored: Thu Feb 16 21:05:59 2017 -0500 Committer: Mark Payne <[email protected]> Committed: Fri Feb 17 10:27:40 2017 -0500 ---------------------------------------------------------------------- .../nifi/stream/io/util/TextLineDemarcator.java | 32 +++++++++++++++----- .../stream/io/util/TextLineDemarcatorTest.java | 32 ++++++++++++++++++++ 2 files changed, 57 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/ec868362/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java ---------------------------------------------------------------------- diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java index 7c918b4..2b5c7fe 100644 --- a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java @@ -110,7 +110,7 @@ public class TextLineDemarcator { for (i = this.index; i < this.bufferLength; i++) { byteVal = this.buffer[i]; lineLength++; - int crlfLength = isEol(byteVal, i); + int crlfLength = computeEol(byteVal, i + 1); if (crlfLength > 0) { i += crlfLength; if (crlfLength == 2) { @@ -120,7 +120,6 @@ public class TextLineDemarcator { if (startsWith != null) { token = this.extractDataToken(lineLength); } - this.index = i; this.mark = this.index; break lineLoop; } @@ -150,21 +149,40 @@ public class TextLineDemarcator { return offsetInfo; } - private int isEol(byte currentByte, int currentIndex) { + /** + * Determines if the line terminates. Returns int specifying the length of + * the CRLF (i.e., only CR or LF or CR and LF) and therefore can only have + * values of: + * 0 - not the end of the line + * 1 - the end of the line either via CR or LF + * 2 - the end of the line with both CR and LF + * + * It performs the read ahead on the buffer if need to. + */ + private int computeEol(byte currentByte, int providedIndex) { + int actualIndex = providedIndex - 1; + boolean readAhead = false; int crlfLength = 0; if (currentByte == '\n') { crlfLength = 1; } else if (currentByte == '\r') { - if ((currentIndex + 1) >= this.bufferLength) { - this.index = currentIndex + 1; + if (providedIndex >= this.bufferLength) { + this.index = this.bufferLength; this.fill(); + providedIndex = this.index; + readAhead = true; } crlfLength = 1; - if (currentIndex < this.buffer.length - 1) { - currentByte = this.buffer[currentIndex + 1]; + if (providedIndex < this.buffer.length - 1) { + currentByte = this.buffer[providedIndex]; crlfLength = currentByte == '\n' ? 2 : 1; } } + + if (crlfLength > 0) { + this.index = readAhead ? this.index + (crlfLength - 1) : (actualIndex + crlfLength); + } + return crlfLength; } http://git-wip-us.apache.org/repos/asf/nifi/blob/ec868362/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java ---------------------------------------------------------------------- diff --git a/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java index cd8b7c5..321c51b 100644 --- a/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java +++ b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java @@ -121,6 +121,38 @@ public class TextLineDemarcatorTest { } @Test + public void validateNiFi_3495() { + String str = "he\ra-to-a\rb-to-b\rc-to-c\r\nd-to-d"; + InputStream is = stringToIs(str); + TextLineDemarcator demarcator = new TextLineDemarcator(is, 10); + OffsetInfo info = demarcator.nextOffsetInfo(); + assertEquals(0, info.getStartOffset()); + assertEquals(3, info.getLength()); + assertEquals(1, info.getCrlfLength()); + + info = demarcator.nextOffsetInfo(); + assertEquals(3, info.getStartOffset()); + assertEquals(7, info.getLength()); + assertEquals(1, info.getCrlfLength()); + + info = demarcator.nextOffsetInfo(); + assertEquals(10, info.getStartOffset()); + assertEquals(7, info.getLength()); + assertEquals(1, info.getCrlfLength()); + + info = demarcator.nextOffsetInfo(); + assertEquals(17, info.getStartOffset()); + assertEquals(8, info.getLength()); + assertEquals(2, info.getCrlfLength()); + + info = demarcator.nextOffsetInfo(); + assertEquals(25, info.getStartOffset()); + assertEquals(6, info.getLength()); + assertEquals(0, info.getCrlfLength()); + + } + + @Test public void mixedCRLF() throws Exception { InputStream is = stringToIs("oleg\rjoe\njack\r\nstacymike\r\n"); TextLineDemarcator demarcator = new TextLineDemarcator(is, 4);
