Github user kaspersorensen commented on a diff in the pull request:

    https://github.com/apache/metamodel/pull/103#discussion_r64159697
  
    --- Diff: 
fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java 
---
    @@ -31,156 +31,261 @@
      * Reader capable of separating values based on a fixed width setting.
      */
     final class FixedWidthReader implements Closeable {
    +    private static final int END_OF_STREAM = -1;
    +    private static final int LINE_FEED = '\n';
    +    private static final int CARRIAGE_RETURN = '\r';
    +    private final BufferedInputStream _stream;
    +    private final String _charsetName;
    +    private final int _fixedValueWidth;
    +    private final int[] _valueWidths;
    +    private int _valueIndex = 0;
    +    private final boolean _headerPresent;
    +    private final boolean _eolPresent;
    +    private final boolean _failOnInconsistentLineWidth;
    +    private final int _expectedLineLength;
    +    private final boolean _constantWidth;
    +    private volatile int _rowNumber;
    +    private boolean _headerSkipped;
     
    -   private final BufferedReader _reader;
    -   private final int _fixedValueWidth;
    -   private final int[] _valueWidths;
    -   private final boolean _failOnInconsistentLineWidth;
    -   private final int expectedLineLength;
    -   private final boolean constantWidth;
    -   private volatile int _rowNumber;
    -
    -   public FixedWidthReader(Reader reader, int fixedValueWidth,
    -                   boolean failOnInconsistentLineWidth) {
    -           this(new BufferedReader(reader), fixedValueWidth,
    -                           failOnInconsistentLineWidth);
    -   }
    -
    -   public FixedWidthReader(BufferedReader reader, int fixedValueWidth,
    -                   boolean failOnInconsistentLineWidth) {
    -           _reader = reader;
    -           _fixedValueWidth = fixedValueWidth;
    -           _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
    -           _rowNumber = 0;
    -           _valueWidths = null;
    -
    -           constantWidth = true;
    -           expectedLineLength = -1;
    -   }
    -
    -   public FixedWidthReader(Reader reader, int[] valueWidths,
    -                   boolean failOnInconsistentLineWidth) {
    -           this(new BufferedReader(reader), valueWidths,
    -                           failOnInconsistentLineWidth);
    -   }
    -
    -   public FixedWidthReader(BufferedReader reader, int[] valueWidths,
    -                   boolean failOnInconsistentLineWidth) {
    -           _reader = reader;
    -           _fixedValueWidth = -1;
    -           _valueWidths = valueWidths;
    -           _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
    -           _rowNumber = 0;
    -
    -           constantWidth = false;
    -           int expectedLineLength = 0;
    -           if (_fixedValueWidth == -1) {
    -                   for (int i = 0; i < _valueWidths.length; i++) {
    -                           expectedLineLength += _valueWidths[i];
    -                   }
    -           }
    -           this.expectedLineLength = expectedLineLength;
    -   }
    -
    -   /***
    -    * Reads the next line in the file.
    -    * 
    -    * @return an array of values in the next line, or null if the end of 
the
    -    *         file has been reached.
    -    * 
    -    * @throws IllegalStateException
    -    *             if an exception occurs while reading the file.
    -    */
    -   public String[] readLine() throws IllegalStateException {
    -
    -           try {
    -                   final List<String> values = new ArrayList<String>();
    -                   final String line = _reader.readLine();
    -                   if (line == null) {
    -                           return null;
    -                   }
    -
    -                   StringBuilder nextValue = new StringBuilder();
    -
    -                   int valueIndex = 0;
    -
    -                   final CharacterIterator it = new 
StringCharacterIterator(line);
    -                   for (char c = it.first(); c != CharacterIterator.DONE; 
c = it
    -                                   .next()) {
    -                           nextValue.append(c);
    -
    -                           final int valueWidth;
    -                           if (constantWidth) {
    -                                   valueWidth = _fixedValueWidth;
    -                           } else {
    -                                   if (valueIndex >= _valueWidths.length) {
    -                                           if 
(_failOnInconsistentLineWidth) {
    -                                                   String[] result = 
values.toArray(new String[values
    -                                                                   
.size()]);
    -                                                   throw new 
InconsistentValueWidthException(result,
    -                                                                   line, 
_rowNumber + 1);
    -                                           } else {
    -                                                   // silently ignore the 
inconsistency
    -                                                   break;
    -                                           }
    -                                   }
    -                                   valueWidth = _valueWidths[valueIndex];
    -                           }
    -
    -                           if (nextValue.length() == valueWidth) {
    -                                   // write the value
    -                                   values.add(nextValue.toString().trim());
    -                                   nextValue = new StringBuilder();
    -                                   valueIndex++;
    -                           }
    -                   }
    -
    -                   if (nextValue.length() > 0) {
    -                           values.add(nextValue.toString().trim());
    -                   }
    -
    -                   String[] result = values.toArray(new 
String[values.size()]);
    -
    -                   if (!_failOnInconsistentLineWidth && !constantWidth) {
    -                           if (result.length != _valueWidths.length) {
    -                                   String[] correctedResult = new 
String[_valueWidths.length];
    -                                   for (int i = 0; i < result.length
    -                                                   && i < 
_valueWidths.length; i++) {
    -                                           correctedResult[i] = result[i];
    -                                   }
    -                                   result = correctedResult;
    -                           }
    -                   }
    -
    -                   if (_failOnInconsistentLineWidth) {
    -                           _rowNumber++;
    -                           if (constantWidth) {
    -                                   if (line.length() % _fixedValueWidth != 
0) {
    -                                           throw new 
InconsistentValueWidthException(result, line,
    -                                                           _rowNumber);
    -                                   }
    -                           } else {
    -                                   if (result.length != values.size()) {
    -                                           throw new 
InconsistentValueWidthException(result, line,
    -                                                           _rowNumber);
    -                                   }
    -
    -                                   if (line.length() != 
expectedLineLength) {
    -                                           throw new 
InconsistentValueWidthException(result, line,
    -                                                           _rowNumber);
    -                                   }
    -                           }
    -                   }
    -
    -                   return result;
    -           } catch (IOException e) {
    -                   throw new IllegalStateException(e);
    -           }
    -   }
    -
    -   @Override
    -   public void close() throws IOException {
    -           _reader.close();
    -   }
    +    public FixedWidthReader(InputStream stream, String charsetName, int 
fixedValueWidth,
    +            boolean failOnInconsistentLineWidth, boolean headerPresent, 
boolean eolPresent) {
    +        this(new BufferedInputStream(stream), charsetName, 
fixedValueWidth, failOnInconsistentLineWidth, headerPresent,
    +                eolPresent);
    +    }
     
    +    public FixedWidthReader(BufferedInputStream stream, String 
charsetName, int fixedValueWidth,
    +            boolean failOnInconsistentLineWidth, boolean headerPresent, 
boolean eolPresent) {
    +        _stream = stream;
    +        _charsetName = charsetName;
    +        _fixedValueWidth = fixedValueWidth;
    +        _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
    +        _headerPresent = headerPresent;
    +        _eolPresent = eolPresent;
    +        _rowNumber = 0;
    +        _valueWidths = null;
    +        _constantWidth = true;
    +        _expectedLineLength = -1;
    +    }
    +
    +    public FixedWidthReader(InputStream stream, String charsetName, int[] 
valueWidths,
    +            boolean failOnInconsistentLineWidth, boolean headerPresent, 
boolean eolPresent) {
    +        this(new BufferedInputStream(stream), charsetName, valueWidths, 
failOnInconsistentLineWidth, headerPresent,
    +                eolPresent);
    +    }
    +
    +    public FixedWidthReader(BufferedInputStream stream, String 
charsetName, int[] valueWidths,
    +            boolean failOnInconsistentLineWidth, boolean headerPresent, 
boolean eolPresent) {
    +        _stream = stream;
    +        _charsetName = charsetName;
    +        _fixedValueWidth = -1;
    +        _valueWidths = valueWidths;
    +        _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
    +        _headerPresent = headerPresent;
    +        _eolPresent = eolPresent;
    +        _rowNumber = 0;
    +        _constantWidth = false;
    +        int expectedLineLength = 0;
    +
    +        for (int i = 0; i < _valueWidths.length; i++) {
    +            expectedLineLength += _valueWidths[i];
    +        }
    +
    +        _expectedLineLength = expectedLineLength;
    +    }
    +
    +    /**
    +     * This reads and returns the next record from the file. Usually, it 
is a line but in case the new line characters
    +     * are not present, the length of the content depends on the 
column-widths setting.
    +     *
    +     * @return an array of values in the next line, or null if the end of 
the file has been reached.
    +     * @throws IllegalStateException if an exception occurs while reading 
the file.
    +     */
    +    public String[] readLine() throws IllegalStateException {
    +        try {
    +            if (shouldSkipHeader()) {
    +                skipHeader();
    +            }
    +
    +            return getValues();
    +        } catch (IOException e) {
    +            throw new IllegalStateException(e);
    +        }
    +    }
    +
    +    private boolean shouldSkipHeader() {
    +        return (_headerPresent && !_headerSkipped);
    +    }
    +
    +    private String[] getValues() throws IOException {
    +        final List<String> values = new ArrayList<>();
    +        final String singleRecordData = readSingleRecordData();
    +
    +        if (singleRecordData == null) {
    +            return null;
    +        }
    +
    +        processSingleRecordData(singleRecordData, values);
    +        String[] result = values.toArray(new String[values.size()]);
    +
    +        if (!_failOnInconsistentLineWidth && !_constantWidth) {
    +            result = correctResult(result);
    +        }
    +
    +        if (_failOnInconsistentLineWidth) {
    +            throwInconsistentValueException(singleRecordData, result, 
values.size());
    +        }
    +
    +        return result;
    +    }
    +
    +    private void throwInconsistentValueException(String singleRecordData, 
String[] result, int valuesSize) {
    +        InconsistentValueWidthException inconsistentValueException =
    +                buildInconsistentValueWidthException(singleRecordData, 
result, valuesSize);
    +
    +        if (inconsistentValueException != null) {
    +            throw inconsistentValueException;
    +        }
    +    }
    +
    +    private void processSingleRecordData(final String singleRecordData, 
final List<String> values) {
    +        StringBuilder nextValue = new StringBuilder();
    +        final CharacterIterator it = new 
StringCharacterIterator(singleRecordData);
    +        _valueIndex = 0;
    +
    +        for (char c = it.first(); c != CharacterIterator.DONE; c = 
it.next()) {
    +            processCharacter(c, nextValue, values, singleRecordData);
    +        }
    +
    +        if (nextValue.length() > 0) {
    +            addNewValueIfAppropriate(values, nextValue);
    +        }
    +    }
    +
    +    private String readSingleRecordData() throws IOException {
    +        if (isEOLAvailable()) {
    +            StringBuilder line = new StringBuilder();
    +            int ch;
    +
    +            for (ch = _stream.read(); !isEndingCharacter(ch); ch = 
_stream.read()) {
    +                line.append((char) ch);
    +            }
    +
    +            if (ch == CARRIAGE_RETURN) {
    +                readLineFeedIfFollows();
    +            }
    +
    +            return (line.length()) > 0 ? line.toString() : null;
    +        } else {
    +            byte[] buffer = new byte[_expectedLineLength];
    +            int bytesRead = _stream.read(buffer, 0, _expectedLineLength);
    +
    +            if (bytesRead < 0) {
    +                return null;
    +            }
    +
    +            return new String(buffer, _charsetName);
    +        }
    +    }
    +
    +    private void readLineFeedIfFollows() throws IOException {
    +        _stream.mark(1);
    +
    +        if (_stream.read() != LINE_FEED) {
    +            _stream.reset();
    +        }
    +    }
    +
    +    private boolean isEndingCharacter(int ch) {
    +        return (ch == CARRIAGE_RETURN || ch == LINE_FEED || ch == 
END_OF_STREAM);
    +    }
    +
    +    private boolean isEOLAvailable() {
    +        return _eolPresent;
    +    }
    +
    +    private void processCharacter(char c, StringBuilder nextValue, 
List<String> values, String recordData) {
    +        nextValue.append(c);
    +        final int valueWidth = getValueWidth(values, recordData);
    +
    +        if (nextValue.length() == valueWidth) {
    +            addNewValueIfAppropriate(values, nextValue);
    +            nextValue.setLength(0); // clear the buffer
    +
    +            if (_valueWidths != null) {
    +                _valueIndex = (_valueIndex + 1) % _valueWidths.length;
    +            }
    +        }
    +    }
    +
    +    private int getValueWidth(List<String> values, String recordData) {
    +        if (_constantWidth) {
    +            return _fixedValueWidth;
    +        } else {
    +            if (_valueIndex >= _valueWidths.length) {
    +                if (_failOnInconsistentLineWidth) {
    +                    String[] result = values.toArray(new 
String[values.size()]);
    +                    throw new InconsistentValueWidthException(result, 
recordData, _rowNumber + 1);
    +                } else {
    +                    return -1; // silently ignore the inconsistency
    +                }
    +            }
    +
    +            return _valueWidths[_valueIndex];
    +        }
    +    }
    +
    +    private void addNewValueIfAppropriate(List<String> values, 
StringBuilder nextValue) {
    +        if (_valueWidths != null) {
    +            if (values.size() < _valueWidths.length) {
    +                values.add(nextValue.toString().trim());
    +            }
    +        } else {
    +            values.add(nextValue.toString().trim());
    +        }
    +    }
    +
    +    private String[] correctResult(String[] result) {
    +        if (result.length != _valueWidths.length) {
    +            String[] correctedResult = new String[_valueWidths.length];
    +
    +            for (int i = 0; i < result.length && i < _valueWidths.length; 
i++) {
    +                correctedResult[i] = result[i];
    +            }
    +
    +            result = correctedResult;
    +        }
    +
    +        return result;
    +    }
    +
    +    private InconsistentValueWidthException 
buildInconsistentValueWidthException(String recordData, String[] result,
    --- End diff --
    
    This method seems backwards to me. It is called "build[something]" but will 
usually (in the good cases) return null. And then it is evaluated later on to 
be thrown if it is not null. Why this pattern is needed I don't know. Seems a 
much more normal way to do it was to have something like a 
"validate[something]" which will throw the exception if it is desirable.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

Reply via email to