Repository: metamodel Updated Branches: refs/heads/5.x 02397db0a -> c4788a272
METAMODEL-1102 Separated FixedWidthLineParser Closes apache/metamodel#114 Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/250b12db Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/250b12db Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/250b12db Branch: refs/heads/5.x Commit: 250b12dbf5ed5f3620cdd693c67ffedc1e82ac1b Parents: 52c3daf Author: Claudia Pesu <claudia.p...@humaninference.com> Authored: Wed Jul 13 10:20:36 2016 +0200 Committer: Dennis Du Krøger <d...@hp23c.dk> Committed: Wed Jul 13 10:20:36 2016 +0200 ---------------------------------------------------------------------- CHANGES.md | 3 +- .../fixedwidth/FixedWidthLineParser.java | 121 ++++++++++++++++++ .../metamodel/fixedwidth/FixedWidthReader.java | 123 ++----------------- .../fixedwidth/FixedWidthLineParserTest.java | 66 ++++++++++ .../fixedwidth/FixedWidthReaderTest.java | 64 ++++++++-- .../src/test/resources/example_simple3.txt | 4 + 6 files changed, 260 insertions(+), 121 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metamodel/blob/250b12db/CHANGES.md ---------------------------------------------------------------------- diff --git a/CHANGES.md b/CHANGES.md index 3ff4ca0..65223ac 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,7 +5,8 @@ * [METAMODEL-1086] - Fixed encoding issue when CsvDataContext is instantiated with InputStream. * [METAMODEL-1094] - Added support for Apache Cassandra version 3.x. * [METAMODEL-1093] - Close compiled ResultSets. - + * [METAMODEL-1102] - Separated FixedWidthLineParser. + ### Apache MetaModel 4.5.3 * [METAMODEL-235] - Fixed a bug related to handling of null or missing values in ElasticSearch using REST client. http://git-wip-us.apache.org/repos/asf/metamodel/blob/250b12db/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthLineParser.java ---------------------------------------------------------------------- diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthLineParser.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthLineParser.java new file mode 100644 index 0000000..3746333 --- /dev/null +++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthLineParser.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.metamodel.fixedwidth; + +import java.io.IOException; +import java.text.CharacterIterator; +import java.text.StringCharacterIterator; +import java.util.ArrayList; +import java.util.List; + +public class FixedWidthLineParser { + + private final int _expectedLineLength; + private volatile int _rowNumber; + private final FixedWidthConfiguration _configuration; + + public FixedWidthLineParser(FixedWidthConfiguration configuration, int expectedLineLength, int rowNumber) { + _configuration = configuration; + _expectedLineLength = expectedLineLength; _rowNumber = rowNumber; + } + + + public String[] parseLine(String line) throws IOException { + final List<String> values = new ArrayList<String>(); + int[] valueWidths = _configuration.getValueWidths(); + + if (line == null) { + return null; + } + + StringBuilder nextValue = new StringBuilder(); + + int valueIndex = 0; + + final CharacterIterator it = new StringCharacterIterator(line); + for (char c = it.first(); c != CharacterIterator.DONE; c = it + .next()) { + nextValue.append(c); + + final int valueWidth; + if (_configuration.isConstantValueWidth()) { + valueWidth = _configuration.getFixedValueWidth(); + } else { + if (valueIndex >= valueWidths.length) { + if (_configuration.isFailOnInconsistentLineWidth()) { + String[] result = values.toArray(new String[values + .size()]); + throw new InconsistentValueWidthException(result, + line, _rowNumber + 1); + } else { + // silently ignore the inconsistency + break; + } + } + valueWidth = _configuration.getValueWidth(valueIndex); + } + + if (nextValue.length() == valueWidth) { + // write the value + values.add(nextValue.toString().trim()); + nextValue = new StringBuilder(); + valueIndex++; + } + } + + if (nextValue.length() > 0) { + values.add(nextValue.toString().trim()); + } + + String[] result = values.toArray(new String[values.size()]); + + if (!_configuration.isFailOnInconsistentLineWidth() && ! _configuration.isConstantValueWidth()) { + if (result.length != valueWidths.length) { + String[] correctedResult = new String[valueWidths.length]; + for (int i = 0; i < result.length + && i < valueWidths.length; i++) { + correctedResult[i] = result[i]; + } + result = correctedResult; + } + } + + if (_configuration.isFailOnInconsistentLineWidth()) { + _rowNumber++; + if (_configuration.isConstantValueWidth()) { + if (line.length() % _configuration.getFixedValueWidth() != 0) { + throw new InconsistentValueWidthException(result, line, + _rowNumber); + } + } else { + if (result.length != values.size()) { + throw new InconsistentValueWidthException(result, line, + _rowNumber); + } + + if (line.length() != _expectedLineLength) { + throw new InconsistentValueWidthException(result, line, + _rowNumber); + } + } + } + + return result; +} +} http://git-wip-us.apache.org/repos/asf/metamodel/blob/250b12db/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java ---------------------------------------------------------------------- diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java index 40dc145..d7a18cf 100644 --- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java +++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java @@ -22,10 +22,6 @@ import java.io.BufferedReader; import java.io.Closeable; import java.io.IOException; import java.io.Reader; -import java.text.CharacterIterator; -import java.text.StringCharacterIterator; -import java.util.ArrayList; -import java.util.List; /** * Reader capable of separating values based on a fixed width setting. @@ -33,12 +29,7 @@ import java.util.List; final public class FixedWidthReader implements Closeable { private final BufferedReader _reader; - private final int _fixedValueWidth; - private final int[] _valueWidths; - private final boolean _failOnInconsistentLineWidth; - private final int expectedLineLength; - private final boolean constantWidth; - private volatile int _rowNumber; + private final FixedWidthLineParser _parser; public FixedWidthReader(Reader reader, int fixedValueWidth, boolean failOnInconsistentLineWidth) { @@ -49,13 +40,9 @@ final public class FixedWidthReader implements Closeable { public FixedWidthReader(BufferedReader reader, int fixedValueWidth, boolean failOnInconsistentLineWidth) { _reader = reader; - _fixedValueWidth = fixedValueWidth; - _failOnInconsistentLineWidth = failOnInconsistentLineWidth; - _rowNumber = 0; - _valueWidths = null; - - constantWidth = true; - expectedLineLength = -1; + final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration( + FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, fixedValueWidth, failOnInconsistentLineWidth); + _parser = new FixedWidthLineParser(fixedWidthConfiguration, -1, 0); } public FixedWidthReader(Reader reader, int[] valueWidths, @@ -67,19 +54,16 @@ final public class FixedWidthReader implements Closeable { public FixedWidthReader(BufferedReader reader, int[] valueWidths, boolean failOnInconsistentLineWidth) { _reader = reader; - _fixedValueWidth = -1; - _valueWidths = valueWidths; - _failOnInconsistentLineWidth = failOnInconsistentLineWidth; - _rowNumber = 0; - - constantWidth = false; + int fixedValueWidth = -1; int expectedLineLength = 0; - if (_fixedValueWidth == -1) { - for (int i = 0; i < _valueWidths.length; i++) { - expectedLineLength += _valueWidths[i]; + if (fixedValueWidth == -1) { + for (int i = 0; i < valueWidths.length; i++) { + expectedLineLength += valueWidths[i]; } } - this.expectedLineLength = expectedLineLength; + final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration( + FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, valueWidths, failOnInconsistentLineWidth); + _parser = new FixedWidthLineParser(fixedWidthConfiguration, expectedLineLength, 0); } @@ -96,95 +80,12 @@ final public class FixedWidthReader implements Closeable { String line; try { line = _reader.readLine(); - return readLine(line); + return _parser.parseLine(line); } catch (IOException e) { throw new IllegalStateException(e); } } - public String[] readLine(String line) throws IOException { - - - final List<String> values = new ArrayList<String>(); - - if (line == null) { - return null; - } - - StringBuilder nextValue = new StringBuilder(); - - int valueIndex = 0; - - final CharacterIterator it = new StringCharacterIterator(line); - for (char c = it.first(); c != CharacterIterator.DONE; c = it - .next()) { - nextValue.append(c); - - final int valueWidth; - if (constantWidth) { - valueWidth = _fixedValueWidth; - } else { - if (valueIndex >= _valueWidths.length) { - if (_failOnInconsistentLineWidth) { - String[] result = values.toArray(new String[values - .size()]); - throw new InconsistentValueWidthException(result, - line, _rowNumber + 1); - } else { - // silently ignore the inconsistency - break; - } - } - valueWidth = _valueWidths[valueIndex]; - } - - if (nextValue.length() == valueWidth) { - // write the value - values.add(nextValue.toString().trim()); - nextValue = new StringBuilder(); - valueIndex++; - } - } - - if (nextValue.length() > 0) { - values.add(nextValue.toString().trim()); - } - - String[] result = values.toArray(new String[values.size()]); - - if (!_failOnInconsistentLineWidth && !constantWidth) { - if (result.length != _valueWidths.length) { - String[] correctedResult = new String[_valueWidths.length]; - for (int i = 0; i < result.length - && i < _valueWidths.length; i++) { - correctedResult[i] = result[i]; - } - result = correctedResult; - } - } - - if (_failOnInconsistentLineWidth) { - _rowNumber++; - if (constantWidth) { - if (line.length() % _fixedValueWidth != 0) { - throw new InconsistentValueWidthException(result, line, - _rowNumber); - } - } else { - if (result.length != values.size()) { - throw new InconsistentValueWidthException(result, line, - _rowNumber); - } - - if (line.length() != expectedLineLength) { - throw new InconsistentValueWidthException(result, line, - _rowNumber); - } - } - } - - return result; - } @Override public void close() throws IOException { http://git-wip-us.apache.org/repos/asf/metamodel/blob/250b12db/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthLineParserTest.java ---------------------------------------------------------------------- diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthLineParserTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthLineParserTest.java new file mode 100644 index 0000000..50d5097 --- /dev/null +++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthLineParserTest.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.metamodel.fixedwidth; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Arrays; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class FixedWidthLineParserTest { + + @Rule + public ExpectedException exception = ExpectedException.none(); + + @Test + public void testParser() throws IOException { + int[] widths = new int[] { 8, 9 }; + FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, widths, false); + final FixedWidthLineParser parser = new FixedWidthLineParser(fixedWidthConfiguration, 17, 0); + + final String lineToParse1 = "greeting greeter "; + final String[] line = parser.parseLine(lineToParse1); + assertEquals("[greeting, greeter]", Arrays.asList(line).toString()); + + final String lineToParse2="howdy partner"; + String[] line2 = parser.parseLine(lineToParse2); + assertEquals("[howdy, partner]", Arrays.asList(line2).toString()); + + final String lineToParse3 ="hi there "; + String[] line3 = parser.parseLine(lineToParse3); + assertEquals("[hi, there]", Arrays.asList(line3).toString()); + + } + + @Test + public void testParserFailInconsistentRowException() throws IOException { + int[] widths = new int[] { 8, 5 }; + FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, widths, true); + final FixedWidthLineParser parser = new FixedWidthLineParser(fixedWidthConfiguration, 17, 0); + + final String lineToParse1 = "greeting greeter "; + exception.expect(InconsistentValueWidthException.class); + @SuppressWarnings("unused") + final String[] line = parser.parseLine(lineToParse1); + } +} http://git-wip-us.apache.org/repos/asf/metamodel/blob/250b12db/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java ---------------------------------------------------------------------- diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java index dd45900..4d11f0e 100644 --- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java +++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java @@ -26,12 +26,17 @@ import java.io.FileReader; import java.io.IOException; import java.util.Arrays; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; public class FixedWidthReaderTest { + @Rule + public final ExpectedException exception = ExpectedException.none(); + @Test - public void testBufferedReader() throws IOException { + public void testBufferedReader1() throws IOException { final File file = new File("src/test/resources/example_simple1.txt"); final BufferedReader reader = new BufferedReader(new FileReader(file)); int[] widths = new int[] { 8, 9 }; @@ -44,14 +49,55 @@ public class FixedWidthReaderTest { assertEquals("[hi, there]", Arrays.asList(line3).toString()); } } - + @Test - public void testNoBufferReader() throws IOException { - int[] widths = new int[] { 8, 9 }; - final String lineToBeRead = "greeting greeter "; - @SuppressWarnings("resource") - final FixedWidthReader fixedWidthReader = new FixedWidthReader(null, widths, false); - final String[] line = fixedWidthReader.readLine(lineToBeRead); - assertEquals("[greeting, greeter]", Arrays.asList(line).toString()); + public void testBufferedReader2() throws IOException { + final File file = new File("src/test/resources/example_simple2.txt"); + final BufferedReader reader = new BufferedReader(new FileReader(file)); + int[] widths = new int[] {1, 8, 9 }; + try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) { + final String[] line1 = fixedWidthReader.readLine(); + assertEquals("[i, greeting, greeter]", Arrays.asList(line1).toString()); + final String[] line2 = fixedWidthReader.readLine(); + assertEquals("[1, hello, world]", Arrays.asList(line2).toString()); + final String[] line3 = fixedWidthReader.readLine(); + assertEquals("[2, hi, there]", Arrays.asList(line3).toString()); + } } + + @Test + public void testBufferedReader3() throws IOException { + final File file = new File("src/test/resources/example_simple3.txt"); + final BufferedReader reader = new BufferedReader(new FileReader(file)); + try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, false)) { + final String[] line1 = fixedWidthReader.readLine(); + assertEquals("[hello]", Arrays.asList(line1).toString()); + final String[] line2 = fixedWidthReader.readLine(); + assertEquals("[world]", Arrays.asList(line2).toString()); + final String[] line3 = fixedWidthReader.readLine(); + assertEquals("[howdy]", Arrays.asList(line3).toString()); + final String[] line4 = fixedWidthReader.readLine(); + assertEquals("[ther]", Arrays.asList(line4).toString()); + } + } + + @Test + public void testBufferedReaderFailOnInconsistentRows() throws IOException { + final File file = new File("src/test/resources/example_simple3.txt"); + final BufferedReader reader = new BufferedReader(new FileReader(file)); + try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, true)) { + final String[] line1 = fixedWidthReader.readLine(); + assertEquals("[hello]", Arrays.asList(line1).toString()); + final String[] line2 = fixedWidthReader.readLine(); + assertEquals("[world]", Arrays.asList(line2).toString()); + final String[] line3 = fixedWidthReader.readLine(); + assertEquals("[howdy]", Arrays.asList(line3).toString()); + + exception.expect(InconsistentValueWidthException.class); + @SuppressWarnings("unused") + final String[] line4 = fixedWidthReader.readLine(); + } + } + + } http://git-wip-us.apache.org/repos/asf/metamodel/blob/250b12db/fixedwidth/src/test/resources/example_simple3.txt ---------------------------------------------------------------------- diff --git a/fixedwidth/src/test/resources/example_simple3.txt b/fixedwidth/src/test/resources/example_simple3.txt new file mode 100644 index 0000000..a9e1cf3 --- /dev/null +++ b/fixedwidth/src/test/resources/example_simple3.txt @@ -0,0 +1,4 @@ +hello +world +howdy +ther \ No newline at end of file