garydgregory commented on code in PR #502:
URL: https://github.com/apache/commons-csv/pull/502#discussion_r1872040489


##########
src/main/java/org/apache/commons/csv/CSVParser.java:
##########
@@ -200,6 +201,11 @@ public Builder setRecordNumber(final long recordNumber) {
             return asThis();
         }
 
+        public Builder setEnableByteTracking(final boolean enableByteTracking) 
{

Review Comment:
   Javadoc missing.
   



##########
src/main/java/org/apache/commons/csv/CSVParser.java:
##########
@@ -153,6 +153,7 @@ public static class Builder extends 
AbstractStreamBuilder<CSVParser, Builder> {
         private CSVFormat format;
         private long characterOffset;
         private long recordNumber = 1;
+        private boolean enableByteTracking = false;

Review Comment:
   Don't initialize to the default value.
   



##########
src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java:
##########
@@ -49,13 +53,27 @@ final class ExtendedBufferedReader extends 
UnsynchronizedBufferedReader {
     private long position;
     private long positionMark;
 
+    /** The number of bytes read so far. */
+    private long bytesRead;
+    private long bytesReadMark;
+
+    /** Encoder for calculating the number of bytes for each character read. */
+    private CharsetEncoder encoder;
+
     /**
      * Constructs a new instance using the default buffer size.
      */
     ExtendedBufferedReader(final Reader reader) {
         super(reader);
     }
 
+    ExtendedBufferedReader(final Reader reader, Charset charset, boolean 
enableByteTracking) {

Review Comment:
   Javadoc missing
   



##########
src/main/java/org/apache/commons/csv/Lexer.java:
##########
@@ -103,6 +103,15 @@ long getCharacterPosition() {
         return reader.getPosition();
     }
 
+    /**
+     * Returns the number of bytes read

Review Comment:
   "Returns" -> "Gets".
   



##########
src/main/java/org/apache/commons/csv/CSVRecord.java:
##########
@@ -144,6 +159,15 @@ public long getCharacterPosition() {
         return characterPosition;
     }
 
+    /**
+     * Returns the start byte of this record as a character byte in the source 
stream.

Review Comment:
   ping



##########
src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java:
##########
@@ -118,11 +137,53 @@ public int read() throws IOException {
             current == EOF && lastChar != CR && lastChar != LF && lastChar != 
EOF) {
             lineNumber++;
         }
+        if (encoder != null) {
+            this.bytesRead += getCharBytes(current);
+        }
         lastChar = current;
         position++;
         return lastChar;
     }
 
+    /**
+     * In Java, the {@code char} data type is based on the original Unicode

Review Comment:
   Getter get, IOW, the Javadoc should start with "Gets ...".



##########
src/test/java/org/apache/commons/csv/CSVParserTest.java:
##########
@@ -701,6 +701,77 @@ public void testGetHeaderComment_NoComment3() throws 
IOException {
         }
     }
 
+    @Test
+    public void testGetRecordThreeBytesRead() throws Exception {

Review Comment:
   Use `final`.



##########
src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java:
##########
@@ -118,11 +137,53 @@ public int read() throws IOException {
             current == EOF && lastChar != CR && lastChar != LF && lastChar != 
EOF) {
             lineNumber++;
         }
+        if (encoder != null) {

Review Comment:
   Test is missing `enableByteTracking`.



##########
src/test/java/org/apache/commons/csv/CSVParserTest.java:
##########
@@ -701,6 +701,77 @@ public void testGetHeaderComment_NoComment3() throws 
IOException {
         }
     }
 
+    @Test
+    public void testGetRecordThreeBytesRead() throws Exception {
+        String code = "id,date,val5,val4\n" +
+            "11111111111111,'4017-09-01',きちんと節分近くには咲いてる~,v4\n" +
+            "22222222222222,'4017-01-01',おはよう私の友人~,v4\n" +
+            "33333333333333,'4017-01-01',きる自然の力ってすごいな~,v4\n";
+        final CSVFormat format = CSVFormat.Builder.create()
+            .setDelimiter(',')
+            .setQuote('\'')
+            .get();
+        try (CSVParser parser = CSVParser.builder().setReader(new 
StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()
 ) {
+            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
+
+            assertEquals(0, parser.getRecordNumber());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(1, record.getRecordNumber());
+            assertEquals(code.indexOf('i'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 
record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(2, record.getRecordNumber());
+            assertEquals(code.indexOf('1'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 
record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(3, record.getRecordNumber());
+            assertEquals(code.indexOf('2'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 95);
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(4, record.getRecordNumber());
+            assertEquals(code.indexOf('3'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 154);
+        };
+
+    }
+
+    @Test
+    public void testGetRecordFourBytesRead() throws Exception {
+        String code = "id,a,b,c\n" +

Review Comment:
   Use `final`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to