Github user jhorcicka commented on a diff in the pull request:
https://github.com/apache/metamodel/pull/103#discussion_r73650071
--- Diff:
fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
---
@@ -18,78 +18,235 @@
*/
package org.apache.metamodel.fixedwidth;
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.IOException;
-import java.io.Reader;
+import java.io.InputStream;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.ArrayList;
+import java.util.List;
/**
* Reader capable of separating values based on a fixed width setting.
*/
-final public class FixedWidthReader implements Closeable {
-
- private final BufferedReader _reader;
- private final FixedWidthLineParser _parser;
-
- public FixedWidthReader(Reader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), fixedValueWidth,
- failOnInconsistentLineWidth);
- }
-
- public FixedWidthReader(BufferedReader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- final FixedWidthConfiguration fixedWidthConfiguration = new
FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null,
fixedValueWidth, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration, -1, 0);
- }
-
- public FixedWidthReader(Reader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), valueWidths,
- failOnInconsistentLineWidth);
- }
-
- public FixedWidthReader(BufferedReader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- int fixedValueWidth = -1;
- int expectedLineLength = 0;
- if (fixedValueWidth == -1) {
- for (int i = 0; i < valueWidths.length; i++) {
- expectedLineLength += valueWidths[i];
- }
- }
- final FixedWidthConfiguration fixedWidthConfiguration = new
FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null,
valueWidths, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration,
expectedLineLength, 0);
- }
-
-
- /***
- * Reads the next line in the file.
- *
- * @return an array of values in the next line, or null if the end of
the
- * file has been reached.
- *
- * @throws IllegalStateException
- * if an exception occurs while reading the file.
- */
- public String[] readLine() throws IllegalStateException {
- String line;
+class FixedWidthReader implements Closeable {
+ private static final int END_OF_STREAM = -1;
+ private static final int LINE_FEED = '\n';
+ private static final int CARRIAGE_RETURN = '\r';
+
+ protected final String _charsetName;
+ private final int _fixedValueWidth;
+ private final int[] _valueWidths;
+ private int _valueIndex = 0;
+ private final boolean _failOnInconsistentLineWidth;
+ private final boolean _constantWidth;
+ private volatile int _rowNumber;
+ protected final BufferedInputStream _stream;
+ protected final int _expectedLineLength;
+
+ public FixedWidthReader(InputStream stream, String charsetName, int
fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName,
fixedValueWidth, failOnInconsistentLineWidth);
+ }
+
+ private FixedWidthReader(BufferedInputStream stream, String
charsetName, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = fixedValueWidth;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _valueWidths = null;
+ _constantWidth = true;
+ _expectedLineLength = -1;
+ }
+
+ public FixedWidthReader(InputStream stream, String charsetName, int[]
valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName, valueWidths,
failOnInconsistentLineWidth);
+ }
+
+ FixedWidthReader(BufferedInputStream stream, String charsetName, int[]
valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = -1;
+ _valueWidths = valueWidths;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _constantWidth = false;
+ int expectedLineLength = 0;
+
+ for (final int _valueWidth : _valueWidths) {
+ expectedLineLength += _valueWidth;
+ }
+
+ _expectedLineLength = expectedLineLength;
+ }
+
+ /**
+ * This reads and returns the next record from the file. Usually, it
is a line but in case the new line characters
+ * are not present, the length of the content depends on the
column-widths setting.
+ *
+ * @return an array of values in the next line, or null if the end of
the file has been reached.
+ * @throws IllegalStateException if an exception occurs while reading
the file.
+ */
+ public String[] readLine() throws IllegalStateException {
try {
- line = _reader.readLine();
- return _parser.parseLine(line);
+ beforeReadLine();
+ _rowNumber++;
+ return getValues();
} catch (IOException e) {
throw new IllegalStateException(e);
}
- }
-
+ }
+
+ /**
+ * Empty hook that enables special behavior in sub-classed readers (by
overriding this method).
+ */
+ protected void beforeReadLine() {
+ return;
+ }
+
+ private String[] getValues() throws IOException {
+ final List<String> values = new ArrayList<>();
+ final String singleRecordData = readSingleRecordData();
+
+ if (singleRecordData == null) {
+ return null;
+ }
+
+ processSingleRecordData(singleRecordData, values);
+ String[] result = values.toArray(new String[values.size()]);
+
+ if (!_failOnInconsistentLineWidth && !_constantWidth) {
+ result = correctResult(result);
+ }
+
+ validateConsistentValue(singleRecordData, result, values.size());
+
+ return result;
+ }
+
+ private void validateConsistentValue(String recordData, String[]
result, int valuesSize) {
+ if (!_failOnInconsistentLineWidth) {
+ return;
+ }
+
+ InconsistentValueWidthException inconsistentValueException = null;
+
+ if (_constantWidth) {
+ if (recordData.length() % _fixedValueWidth != 0) {
+ inconsistentValueException = new
InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+ } else if (result.length != valuesSize || recordData.length() !=
_expectedLineLength) {
+ inconsistentValueException = new
InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+
+ if (inconsistentValueException != null) {
+ throw inconsistentValueException;
+ }
+ }
+
+ private void processSingleRecordData(final String singleRecordData,
final List<String> values) {
+ StringBuilder nextValue = new StringBuilder();
+ final CharacterIterator it = new
StringCharacterIterator(singleRecordData);
+ _valueIndex = 0;
+
+ for (char c = it.first(); c != CharacterIterator.DONE; c =
it.next()) {
+ processCharacter(c, nextValue, values, singleRecordData);
+ }
+
+ if (nextValue.length() > 0) {
+ addNewValueIfAppropriate(values, nextValue);
+ }
+ }
+
+ String readSingleRecordData() throws IOException {
+ StringBuilder line = new StringBuilder();
+ int ch;
+
+ for (ch = _stream.read(); !isEndingCharacter(ch); ch =
_stream.read()) {
+ line.append((char) ch);
+ }
+
+ if (ch == CARRIAGE_RETURN) {
+ readLineFeedIfFollows();
+ }
+
+ return (line.length()) > 0 ? line.toString() : null;
--- End diff --
Here, the returned String is charset-less which is actually a bug for
diacritics input. PR for this bug fix is here:
https://github.com/apache/metamodel/pull/121
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---