Rework CSVParser wrapper and make setup RFC compliant. Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/3db6a8a5 Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/3db6a8a5 Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/3db6a8a5
Branch: refs/heads/master Commit: 3db6a8a5f7877537662bb6ef5ca45e819fd987d6 Parents: da76ae3 Author: Andy Seaborne <[email protected]> Authored: Fri Oct 3 21:47:44 2014 +0100 Committer: Andy Seaborne <[email protected]> Committed: Fri Oct 3 21:47:44 2014 +0100 ---------------------------------------------------------------------- .../hp/hpl/jena/sparql/resultset/CSVInput.java | 1 - .../org/apache/jena/atlas/csv/CSVParser.java | 67 +++++++++----------- .../apache/jena/atlas/csv/TestCSVParser.java | 4 +- 3 files changed, 33 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/3db6a8a5/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java index db5f3f3..abfeeb5 100644 --- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java +++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java @@ -117,7 +117,6 @@ public class CSVInput FmtLog.warn(log, "Boolean result variable is '%s', not '_askResult'", vars.get(0).getName()) ; } - List<String> line = parser.parse1() ; if ( line.size() != 1 ) { throw new ARQException("CSV Boolean Results malformed: data line='"+line+"'") ; http://git-wip-us.apache.org/repos/asf/jena/blob/3db6a8a5/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java b/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java index 88b56c5..83613ae 100644 --- a/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java +++ b/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java @@ -18,25 +18,25 @@ package org.apache.jena.atlas.csv ; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import java.io.IOException ; +import java.io.InputStream ; +import java.io.Reader ; +import java.util.Iterator ; +import java.util.List ; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVRecord; -import org.apache.jena.atlas.io.IO; +import org.apache.commons.csv.CSVFormat ; +import org.apache.commons.csv.CSVRecord ; +import org.apache.jena.atlas.io.IO ; +import org.apache.jena.atlas.iterator.Iter ; +import org.apache.jena.atlas.iterator.Transform ; -/** Written specifically to handle SPARQL results CSV files. - * Acts as a wrapper for Commons CSV parser. +/** + * Wrapper for Commons CSV parser. */ public class CSVParser implements Iterable<List<String>> { - private final org.apache.commons.csv.CSVParser parser; + private final Iterator<CSVRecord> iterator ; public static CSVParser create(String filename) { InputStream input = IO.openFile(filename) ; @@ -44,49 +44,44 @@ public class CSVParser implements Iterable<List<String>> } public static CSVParser create(InputStream input) { - CSVParser parser = new CSVParser(new InputStreamReader(input)) ; + CSVParser parser = new CSVParser(IO.asBufferedUTF8(input)) ; return parser ; } - + + /** Be careful about charsets */ public static CSVParser create(Reader input) { CSVParser parser = new CSVParser(input) ; return parser ; } - public CSVParser(Reader input) { + private CSVParser(Reader input) { try { - this.parser = CSVFormat.EXCEL.withQuote('\'').parse(input); + this.parser = CSVFormat.RFC4180.parse(input); + this.iterator = parser.iterator() ; } catch (IOException e) { throw new CSVParseException("Failed to create the CSV parser: " + e.getMessage(), e); } } + private static Transform<CSVRecord, List<String>> transform = new Transform<CSVRecord, List<String>>() { + @Override + public List<String> convert(CSVRecord record) { + return recordToList(record) ; + } + } ; + @Override public Iterator<List<String>> iterator() { - List<List<String>> list = new ArrayList<>(); - for (CSVRecord record : parser) { - List<String> row = new ArrayList<>(); - for (String columnValue : record) { - row.add(columnValue); - } - list.add(row); - } - return list.iterator(); + return Iter.map(iterator, transform) ; } public List<String> parse1() { - Iterator<List<String>> iterator = iterator(); - if (iterator.hasNext()) - { - final List<String> firstRow = iterator.next(); - return firstRow; - } + if (iterator.hasNext()) + return recordToList(iterator.next()) ; return null; } - static void exception(String msg, long line, long col) { - if ( line >= 0 && col > 0 ) - msg = String.format("[%s, %s] %s", line, col, msg) ; - throw new CSVParseException(msg) ; + private static List<String> recordToList(CSVRecord record) { + return Iter.toList(record.iterator()) ; } } http://git-wip-us.apache.org/repos/asf/jena/blob/3db6a8a5/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java b/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java index de66fdb..7574ea6 100644 --- a/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java +++ b/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java @@ -41,7 +41,7 @@ public class TestCSVParser extends BaseTest @Test public void csv_parse_07() { csv(",,\n", new String[][] {{"", "", ""}}) ; } @Test public void csv_parse_10() { csv("\n\n", new String[][] { {""}, {""} }) ; } - @Test public void csv_parse_11() { csv("'aa'\naa\n", new String[][] { {"aa"}, {"aa"} }) ; } + @Test public void csv_parse_11() { csv("'aa'\naa\n", new String[][] { {"'aa'"}, {"aa"} }) ; } @Test public void csv_parse_12() { csv("\naa", new String[][] { {""}, {"aa"} }) ; } @Test public void csv_parse_13() { csv("a,b\nc,d", new String[][] { {"a", "b"}, {"c", "d"} }) ; } @Test public void csv_parse_14() { csv("a,b\rc,d", new String[][] { {"a", "b"}, {"c", "d"} }) ; } @@ -63,7 +63,7 @@ public class TestCSVParser extends BaseTest private static void csv(String input, List<List<String>> answers) { List<List<String>> x = new ArrayList<>() ; - CSVParser parser = new CSVParser(new StringReader(input)) ; + CSVParser parser = CSVParser.create(new StringReader(input)) ; for (List<String> row : parser) { x.add(row) ; }
