Author: henning Date: Sun Sep 11 10:26:30 2005 New Revision: 280154 URL: http://svn.apache.org/viewcvs?rev=280154&view=rev Log: - rework ReadColumnNames method. - cover remaining methods with UnitTests.
Modified: jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/CSVParser.java jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/DataStreamParser.java jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/test/org/apache/turbine/util/parser/CSVParserTest.java Modified: jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/CSVParser.java URL: http://svn.apache.org/viewcvs/jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/CSVParser.java?rev=280154&r1=280153&r2=280154&view=diff ============================================================================== --- jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/CSVParser.java (original) +++ jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/CSVParser.java Sun Sep 11 10:26:30 2005 @@ -42,6 +42,7 @@ * * @author <a href="mailto:[EMAIL PROTECTED]">Sean Legassick</a> * @author <a href="mailto:[EMAIL PROTECTED]">Martin van den Bemt</a> + * @author <a href="mailto:[EMAIL PROTECTED]">Henning P. Schmiedehausen</a> * @version $Id$ */ public class CSVParser extends DataStreamParser Modified: jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/DataStreamParser.java URL: http://svn.apache.org/viewcvs/jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/DataStreamParser.java?rev=280154&r1=280153&r2=280154&view=diff ============================================================================== --- jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/DataStreamParser.java (original) +++ jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/java/org/apache/turbine/util/parser/DataStreamParser.java Sun Sep 11 10:26:30 2005 @@ -23,13 +23,11 @@ import java.io.StreamTokenizer; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - /** * DataStreamParser is used to parse a stream with a fixed format and * generate ValueParser objects which can be used to extract the values @@ -51,18 +49,11 @@ * * @author <a href="mailto:[EMAIL PROTECTED]">Sean Legassick</a> * @author <a href="mailto:[EMAIL PROTECTED]">Martin van den Bemt</a> + * @author <a href="mailto:[EMAIL PROTECTED]">Henning P. Schmiedehausen</a> * @version $Id$ */ public abstract class DataStreamParser implements Iterator { - /** Logging */ - private static Log log = LogFactory.getLog(DataStreamParser.class); - - /** - * Conditional compilation flag. - */ - private static final boolean DEBUG = false; - /** * The constant for empty fields */ @@ -71,7 +62,7 @@ /** * The list of column names. */ - private List columnNames; + private List columnNames = Collections.EMPTY_LIST; /** * The stream tokenizer for reading values from the input reader. @@ -110,19 +101,21 @@ public DataStreamParser(Reader in, List columnNames, String characterEncoding) { - this.columnNames = columnNames; + setColumnNames(columnNames); + this.characterEncoding = characterEncoding; if (this.characterEncoding == null) { - // try and get the characterEncoding from the reader - this.characterEncoding = "US-ASCII"; - try + if (in instanceof InputStreamReader) { this.characterEncoding = ((InputStreamReader) in).getEncoding(); } - catch (ClassCastException e) + + if (this.characterEncoding == null) { + // try and get the characterEncoding from the reader + this.characterEncoding = "US-ASCII"; } } @@ -169,7 +162,19 @@ */ public void setColumnNames(List columnNames) { - this.columnNames = columnNames; + if (columnNames != null) + { + this.columnNames = columnNames; + } + } + + /** + * get the list of column names. + * + */ + public List getColumnNames() + { + return columnNames; } /** @@ -182,37 +187,52 @@ public void readColumnNames() throws IOException { - columnNames = new ArrayList(); - int lastTtype = 0; - int fieldCounter = 1; - - neverRead = false; - tokenizer.nextToken(); - while (tokenizer.ttype == StreamTokenizer.TT_WORD || tokenizer.ttype == StreamTokenizer.TT_EOL - || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator) + List columnNames = new ArrayList(); + int fieldCounter = 0; + + if (hasNextRow()) { - if (tokenizer.ttype != fieldSeparator && tokenizer.ttype != StreamTokenizer.TT_EOL) - { - columnNames.add(tokenizer.sval); - fieldCounter++; - } - else if (tokenizer.ttype == fieldSeparator && lastTtype == fieldSeparator) - { - // we have an empty field name - columnNames.add(EMPTYFIELDNAME + fieldCounter); - fieldCounter++; - } - else if (lastTtype == fieldSeparator && tokenizer.ttype == StreamTokenizer.TT_EOL) - { - columnNames.add(EMPTYFIELDNAME + fieldCounter); - break; - } - else if (tokenizer.ttype == StreamTokenizer.TT_EOL) + String colName = null; + boolean foundEol = false; + + while(!foundEol) { - break; + tokenizer.nextToken(); + + if (tokenizer.ttype == '"' + || tokenizer.ttype == StreamTokenizer.TT_WORD) + { + // tokenizer.ttype is either '"' or TT_WORD + colName = tokenizer.sval; + } + else + { + // fieldSeparator, EOL or EOF + fieldCounter++; + + if (colName == null) + { + colName = EMPTYFIELDNAME + fieldCounter; + } + + columnNames.add(colName); + colName = null; + } + + // EOL and EOF are checked independently from existing fields. + if (tokenizer.ttype == StreamTokenizer.TT_EOL) + { + foundEol = true; + } + else if (tokenizer.ttype == StreamTokenizer.TT_EOF) + { + // Keep this token in the tokenizer for hasNext() + tokenizer.pushBack(); + foundEol = true; + } } - lastTtype = tokenizer.ttype; - tokenizer.nextToken(); + + setColumnNames(columnNames); } } @@ -323,7 +343,7 @@ } catch (IOException e) { - log.error("IOException in CSVParser.hasNext", e); + throw new RuntimeException(e); } return hasNext; @@ -347,8 +367,7 @@ } catch (IOException e) { - log.error("IOException in CSVParser.next", e); - throw new NoSuchElementException(); + throw new RuntimeException(e); } return nextRow; Modified: jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/test/org/apache/turbine/util/parser/CSVParserTest.java URL: http://svn.apache.org/viewcvs/jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/test/org/apache/turbine/util/parser/CSVParserTest.java?rev=280154&r1=280153&r2=280154&view=diff ============================================================================== --- jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/test/org/apache/turbine/util/parser/CSVParserTest.java (original) +++ jakarta/turbine/core/branches/TURBINE_2_3_BRANCH/src/test/org/apache/turbine/util/parser/CSVParserTest.java Sun Sep 11 10:26:30 2005 @@ -16,10 +16,17 @@ * limitations under the License. */ +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.io.Reader; import java.io.StringReader; import java.util.Arrays; +import java.util.Iterator; import java.util.List; +import java.util.NoSuchElementException; import junit.framework.TestSuite; @@ -65,6 +72,8 @@ ValueParser vp = (ValueParser) p.next(); + assertEquals("Wrong character encoding set", "UTF-8", vp.getCharacterEncoding()); + assertFalse(p.hasNext()); for (int i = 0; i < fields.length; i ++) @@ -479,6 +488,432 @@ assertFalse(p.hasNext()); assertEquals("Value does not match", "\"", vp.getString(fields[0])); + } + + public void testExceptions() + { + String readLine = "0,1,2,3,4,5,6,7,8,9\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, fieldNames, "UTF-8"); + + assertTrue(p.hasNext()); + + p.next(); + + try + { + p.remove(); + fail("remove() succeeded!"); + } + catch (Exception e) + { + assertEquals("Wrong Exception thrown", UnsupportedOperationException.class, e.getClass()); + } + + assertFalse(p.hasNext()); + + try + { + p.next(); + fail("next() succeeded!"); + } + catch (Exception e) + { + assertEquals("Wrong Exception thrown", NoSuchElementException.class, e.getClass()); + } + } + + public void testEncodingFromReader() + { + String readLine = "0,1,2,3,4,5,6,7,8,9\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + int [] values = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, fieldNames, null); + + assertTrue(p.hasNext()); + + ValueParser vp = (ValueParser) p.next(); + + assertEquals("Wrong character encoding set", "US-ASCII", vp.getCharacterEncoding()); + + assertFalse(p.hasNext()); + + for (int i = 0; i < fields.length; i ++) + { + assertEquals("Value does not match", Integer.toString(values[i]), vp.getString(fields[i])); + assertEquals("Value does not match", values[i], vp.getInt(fields[i])); + } + } + + public void testEncodingFromStream() + throws Exception + { + String readLine = "0,1,2,3,4,5,6,7,8,9\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + int [] values = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 }; + + File myFile = File.createTempFile("turbine", null, null); + + assertNotNull(myFile); + + myFile.deleteOnExit(); + + FileOutputStream fos = new FileOutputStream(myFile); + + assertNotNull(fos); + + OutputStreamWriter osw = new OutputStreamWriter(fos, "ISO-8859-1"); + assertNotNull(osw); + + osw.write(readLine, 0, readLine.length()); + osw.flush(); + fos.flush(); + osw.close(); + fos.close(); + + FileInputStream fis = new FileInputStream(myFile); + assertNotNull(fis); + + InputStreamReader myReader = new InputStreamReader(fis, "ISO-8859-1"); + assertNotNull(myReader); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, fieldNames, null); + + assertTrue(p.hasNext()); + + ValueParser vp = (ValueParser) p.next(); + + assertEquals("Wrong character encoding set", myReader.getEncoding(), vp.getCharacterEncoding()); + + assertFalse(p.hasNext()); + + for (int i = 0; i < fields.length; i ++) + { + assertEquals("Value does not match", Integer.toString(values[i]), vp.getString(fields[i])); + assertEquals("Value does not match", values[i], vp.getInt(fields[i])); + } + } + + public void testSetColumnNames() + { + String readLine = "0,1,2,3,4,5,6,7,8,9\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + int [] values = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 }; + + Reader myReader = new StringReader(readLine); + + DataStreamParser p = new CSVParser(myReader, null, null); + + assertTrue(p.hasNext()); + + p.setColumnNames(Arrays.asList(fields)); + + ValueParser vp = (ValueParser) p.next(); + + assertEquals("Wrong character encoding set", "US-ASCII", vp.getCharacterEncoding()); + + assertFalse(p.hasNext()); + + for (int i = 0; i < fields.length; i ++) + { + assertEquals("Value does not match", Integer.toString(values[i]), vp.getString(fields[i])); + assertEquals("Value does not match", values[i], vp.getInt(fields[i])); + } + } + + public void testEmptyColumnNames() + throws Exception + { + String readLine = + "eins,zwei,drei,vier,fuenf,sechs,sieben,acht,neun,null\n" + + "0,1,2,3,4,5,6,7,8,9\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertNotNull(p.getColumnNames()); + assertEquals("Number of columns is not 0", 0, p.getColumnNames().size()); + + p.setColumnNames(fieldNames); + assertNotNull(p.getColumnNames()); + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + } + + public void testReadColumnNames() + throws Exception + { + String readLine = + "eins,zwei,drei,vier,fuenf,sechs,sieben,acht,neun,null\n" + + "0,1,2,3,4,5,6,7,8,9\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + int [] values = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertTrue(p.hasNext()); + + ValueParser vp = (ValueParser) p.next(); + + assertEquals("Wrong character encoding set", "UTF-8", vp.getCharacterEncoding()); + + assertFalse(p.hasNext()); + + for (int i = 0; i < fields.length; i ++) + { + assertEquals("Value does not match", Integer.toString(values[i]), vp.getString(fields[i])); + assertEquals("Value does not match", values[i], vp.getInt(fields[i])); + } + } + + public void testJustReadColumnNames() + throws Exception + { + String readLine = + "eins,zwei,drei,vier,fuenf,sechs,sieben,acht,neun,null\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertFalse(p.hasNext()); + } + + public void testJustReadColumnNamesEOF() + throws Exception + { + String readLine = + "eins,zwei,drei,vier,fuenf,sechs,sieben,acht,neun,null"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertFalse(p.hasNext()); + } + + public void testJustReadColumnNamesQuoted() + throws Exception + { + String readLine = + "\"eins\",\"zwei\",\"drei\",\"vier\",\"fuenf\",\"sechs\",\"sieben\",\"acht\",\"neun\",\"null\"\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertFalse(p.hasNext()); + } + + public void testJustReadColumnNamesQuotedEOF() + throws Exception + { + String readLine = + "\"eins\",\"zwei\",\"drei\",\"vier\",\"fuenf\",\"sechs\",\"sieben\",\"acht\",\"neun\",\"null\""; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertFalse(p.hasNext()); + } + public void testJustReadColumnNamesQuotedMissing() + throws Exception + { + String readLine = + "\"eins\",\"zwei\",\"drei\",\"vier\",\"fuenf\",\"sechs\",\"sieben\",\"acht\",\"neun\",\"null\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "null", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertFalse(p.hasNext()); + } + + public void testColumnNamesMissing() + throws Exception + { + String readLine = + "eins,zwei,drei,vier,,sechs,,acht,neun,\n"; + + String [] fields = { "eins", "zwei", "drei", "vier", "UNKNOWNFIELD5", "sechs", "UNKNOWNFIELD7", "acht", "neun", "UNKNOWNFIELD10", }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertFalse(p.hasNext()); + } + + public void testAllColumnNamesMissing() + throws Exception + { + String readLine = + ",,,\n0,1,2,3,4,5,6,7,8,9\n"; + + String [] fields = { "UNKNOWNFIELD1", "UNKNOWNFIELD2", "UNKNOWNFIELD3", "UNKNOWNFIELD4", }; + int [] values = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 }; + + Reader myReader = new StringReader(readLine); + + List fieldNames = Arrays.asList(fields); + + DataStreamParser p = new CSVParser(myReader, null, "UTF-8"); + + assertTrue(p.hasNext()); + + p.readColumnNames(); + + assertEquals("Number of columns is not the number of set fields", fieldNames.size(), p.getColumnNames().size()); + int cnt = 0; + for (Iterator it = p.getColumnNames().iterator(); it.hasNext(); ) + { + assertEquals("Column name does not match", ((String) it.next()), fields[cnt++]); + } + + assertTrue(p.hasNext()); + + ValueParser vp = (ValueParser) p.next(); + + assertEquals("Wrong character encoding set", "UTF-8", vp.getCharacterEncoding()); + + assertFalse(p.hasNext()); + + for (int i = 0; i < fields.length; i ++) + { + assertEquals("Value does not match", Integer.toString(values[i]), vp.getString(fields[i])); + assertEquals("Value does not match", values[i], vp.getInt(fields[i])); + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]