Repository: nifi
Updated Branches:
  refs/heads/master 095c04eda -> ec868362f


NIFI-3495 fixed the index issue with TextLineDemarcator

This closes #1518.


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/ec868362
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/ec868362
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/ec868362

Branch: refs/heads/master
Commit: ec868362f3317a79b6518c780af1b9debb843f32
Parents: 095c04e
Author: Oleg Zhurakousky <[email protected]>
Authored: Thu Feb 16 21:05:59 2017 -0500
Committer: Mark Payne <[email protected]>
Committed: Fri Feb 17 10:27:40 2017 -0500

----------------------------------------------------------------------
 .../nifi/stream/io/util/TextLineDemarcator.java | 32 +++++++++++++++-----
 .../stream/io/util/TextLineDemarcatorTest.java  | 32 ++++++++++++++++++++
 2 files changed, 57 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/ec868362/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java
----------------------------------------------------------------------
diff --git 
a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java
 
b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java
index 7c918b4..2b5c7fe 100644
--- 
a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java
+++ 
b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/TextLineDemarcator.java
@@ -110,7 +110,7 @@ public class TextLineDemarcator {
                 for (i = this.index; i < this.bufferLength; i++) {
                     byteVal = this.buffer[i];
                     lineLength++;
-                    int crlfLength = isEol(byteVal, i);
+                    int crlfLength = computeEol(byteVal, i + 1);
                     if (crlfLength > 0) {
                         i += crlfLength;
                         if (crlfLength == 2) {
@@ -120,7 +120,6 @@ public class TextLineDemarcator {
                         if (startsWith != null) {
                             token = this.extractDataToken(lineLength);
                         }
-                        this.index = i;
                         this.mark = this.index;
                         break lineLoop;
                     }
@@ -150,21 +149,40 @@ public class TextLineDemarcator {
         return offsetInfo;
     }
 
-    private int isEol(byte currentByte, int currentIndex) {
+    /**
+     * Determines if the line terminates. Returns int specifying the length of
+     * the CRLF (i.e., only CR or LF or CR and LF) and therefore can only have
+     * values of:
+     *   0 - not the end of the line
+     *   1 - the end of the line either via CR or LF
+     *   2 - the end of the line with both CR and LF
+     *
+     * It performs the read ahead on the buffer if need to.
+     */
+    private int computeEol(byte currentByte, int providedIndex) {
+        int actualIndex = providedIndex - 1;
+        boolean readAhead = false;
         int crlfLength = 0;
         if (currentByte == '\n') {
             crlfLength = 1;
         } else if (currentByte == '\r') {
-            if ((currentIndex + 1) >= this.bufferLength) {
-                this.index = currentIndex + 1;
+            if (providedIndex >= this.bufferLength) {
+                this.index = this.bufferLength;
                 this.fill();
+                providedIndex = this.index;
+                readAhead = true;
             }
             crlfLength = 1;
-            if (currentIndex < this.buffer.length - 1) {
-                currentByte = this.buffer[currentIndex + 1];
+            if (providedIndex < this.buffer.length - 1) {
+                currentByte = this.buffer[providedIndex];
                 crlfLength = currentByte == '\n' ? 2 : 1;
             }
         }
+
+        if (crlfLength > 0) {
+            this.index = readAhead ? this.index + (crlfLength - 1) : 
(actualIndex + crlfLength);
+        }
+
         return crlfLength;
     }
 

http://git-wip-us.apache.org/repos/asf/nifi/blob/ec868362/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java
----------------------------------------------------------------------
diff --git 
a/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java
 
b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java
index cd8b7c5..321c51b 100644
--- 
a/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java
+++ 
b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/TextLineDemarcatorTest.java
@@ -121,6 +121,38 @@ public class TextLineDemarcatorTest {
     }
 
     @Test
+    public void validateNiFi_3495() {
+        String str = "he\ra-to-a\rb-to-b\rc-to-c\r\nd-to-d";
+        InputStream is = stringToIs(str);
+        TextLineDemarcator demarcator = new TextLineDemarcator(is, 10);
+        OffsetInfo info = demarcator.nextOffsetInfo();
+        assertEquals(0, info.getStartOffset());
+        assertEquals(3, info.getLength());
+        assertEquals(1, info.getCrlfLength());
+
+        info = demarcator.nextOffsetInfo();
+        assertEquals(3, info.getStartOffset());
+        assertEquals(7, info.getLength());
+        assertEquals(1, info.getCrlfLength());
+
+        info = demarcator.nextOffsetInfo();
+        assertEquals(10, info.getStartOffset());
+        assertEquals(7, info.getLength());
+        assertEquals(1, info.getCrlfLength());
+
+        info = demarcator.nextOffsetInfo();
+        assertEquals(17, info.getStartOffset());
+        assertEquals(8, info.getLength());
+        assertEquals(2, info.getCrlfLength());
+
+        info = demarcator.nextOffsetInfo();
+        assertEquals(25, info.getStartOffset());
+        assertEquals(6, info.getLength());
+        assertEquals(0, info.getCrlfLength());
+
+    }
+
+    @Test
     public void mixedCRLF() throws Exception {
         InputStream is = stringToIs("oleg\rjoe\njack\r\nstacymike\r\n");
         TextLineDemarcator demarcator = new TextLineDemarcator(is, 4);

Reply via email to