Adjust jena-csv to latest Jena. Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/b6af5326 Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/b6af5326 Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/b6af5326
Branch: refs/heads/master Commit: b6af532690a2baa869491011cb7c98f8156dedef Parents: 3db6a8a Author: Andy Seaborne <[email protected]> Authored: Fri Oct 3 21:48:15 2014 +0100 Committer: Andy Seaborne <[email protected]> Committed: Fri Oct 3 21:48:15 2014 +0100 ---------------------------------------------------------------------- jena-csv/pom.xml | 1 - .../impl/PropertyTableBuilder.java | 106 +++++----- .../apache/jena/propertytable/lang/CSV2RDF.java | 33 +++ .../apache/jena/propertytable/lang/LangCSV.java | 114 ++--------- .../lang/ReaderRIOTFactoryCSV.java | 31 +++ .../propertytable/lang/ReaderRIOTLangCSV.java | 77 +++++++ .../main/java/riotcmd/LocatorOupputFile.java | 148 ------------- jena-csv/src/main/java/riotcmd/ModDest.java | 51 ----- jena-csv/src/main/java/riotcmd/csv2rdf.java | 205 ------------------- jena-csv/src/main/java/riotcmdx/csv2rdf.java | 53 +++++ .../jena/propertytable/graph/GraphCSVTest.java | 41 ++-- .../impl/AbstractPropertyTableBuilderTest.java | 36 ++-- .../jena/propertytable/lang/TestLangCSV.java | 2 +- .../HEFCE_organogram_senior_data_31032011.csv | 2 +- 14 files changed, 304 insertions(+), 596 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/pom.xml ---------------------------------------------------------------------- diff --git a/jena-csv/pom.xml b/jena-csv/pom.xml index b8400ee..6842895 100644 --- a/jena-csv/pom.xml +++ b/jena-csv/pom.xml @@ -18,7 +18,6 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> - <groupId>org.apache.jena</groupId> <artifactId>jena-csv</artifactId> <packaging>jar</packaging> <name>Apache Jena - Data Tables for RDF and SPARQL</name> http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/org/apache/jena/propertytable/impl/PropertyTableBuilder.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/org/apache/jena/propertytable/impl/PropertyTableBuilder.java b/jena-csv/src/main/java/org/apache/jena/propertytable/impl/PropertyTableBuilder.java index 665c282..da2e91f 100644 --- a/jena-csv/src/main/java/org/apache/jena/propertytable/impl/PropertyTableBuilder.java +++ b/jena-csv/src/main/java/org/apache/jena/propertytable/impl/PropertyTableBuilder.java @@ -18,21 +18,21 @@ package org.apache.jena.propertytable.impl; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; +import java.io.InputStream ; +import java.util.ArrayList ; +import java.util.Iterator ; +import java.util.List ; -import org.apache.jena.atlas.csv.CSVParser; -import org.apache.jena.atlas.csv.CSVTokenIterator; -import org.apache.jena.atlas.io.IO; -import org.apache.jena.propertytable.PropertyTable; -import org.apache.jena.propertytable.Row; -import org.apache.jena.propertytable.lang.LangCSV; -import org.apache.jena.riot.system.IRIResolver; +import org.apache.jena.atlas.csv.CSVParser ; +import org.apache.jena.atlas.io.IO ; +import org.apache.jena.propertytable.PropertyTable ; +import org.apache.jena.propertytable.Row ; +import org.apache.jena.propertytable.lang.LangCSV ; +import org.apache.jena.riot.system.IRIResolver ; -import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; -import com.hp.hpl.jena.graph.Node; -import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype ; +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.NodeFactory ; /** @@ -75,56 +75,58 @@ public class PropertyTableBuilder { protected static PropertyTable fillPropertyTable(PropertyTable table, String csvFilePath ){ InputStream input = IO.openFile(csvFilePath) ; - CSVTokenIterator iterator = new CSVTokenIterator(input) ; + CSVParser iterator = CSVParser.create(input) ; return fillPropertyTable(table, iterator, csvFilePath); } - protected static PropertyTable fillPropertyTable(PropertyTable table, CSVTokenIterator iterator, String csvFilePath){ + protected static PropertyTable fillPropertyTable(PropertyTable table, CSVParser parser, String csvFilePath){ if (table == null){ return null; } - CSVParser parser = new CSVParser (iterator); - List<String> rowLine = null; ArrayList<Node> predicates = new ArrayList<Node>(); int rowNum = 0; + + Iterator<List<String>> iter = parser.iterator() ; + if ( ! iter.hasNext() ) + return table ; + List<String> row1 = iter.next() ; + table.createColumn(CSV_ROW_NODE); + for (String column : row1) { + String uri = createColumnKeyURI(csvFilePath, column); + Node p = NodeFactory.createURI(uri); + predicates.add(p); + table.createColumn(p); + } + + rowNum++ ; + while(iter.hasNext()) { + List<String> rowLine = iter.next(); + Node subject = LangCSV.caculateSubject(rowNum, csvFilePath); + Row row = table.createRow(subject); - while ((rowLine = parser.parse1()) != null) { - if (rowNum == 0) { - table.createColumn(CSV_ROW_NODE); - for (String column : rowLine) { - String uri = createColumnKeyURI(csvFilePath, column); - Node p = NodeFactory.createURI(uri); - predicates.add(p); - table.createColumn(p); - } - } else { - Node subject = LangCSV.caculateSubject(rowNum, csvFilePath); - Row row = table.createRow(subject); - - row.setValue(table.getColumn(CSV_ROW_NODE), NodeFactory.createLiteral( - (rowNum + ""), XSDDatatype.XSDinteger)); + row.setValue(table.getColumn(CSV_ROW_NODE), + NodeFactory.createLiteral((rowNum + ""), XSDDatatype.XSDinteger)); - for (int col = 0; col < rowLine.size() && col<predicates.size(); col++) { + for (int col = 0; col < rowLine.size() && col<predicates.size(); col++) { - String columnValue = rowLine.get(col).trim(); - if("".equals(columnValue)){ - continue; - } - Node o; - try { - // Try for a double. - double d = Double.parseDouble(columnValue); - o = NodeFactory.createLiteral(columnValue, - XSDDatatype.XSDdouble); - } catch (Exception e) { - o = NodeFactory.createLiteral(columnValue); - } - row.setValue(table.getColumn(predicates.get(col)), o); - } - } - rowNum++; - } - return table; + String columnValue = rowLine.get(col).trim(); + if("".equals(columnValue)){ + continue; + } + Node o; + try { + // Try for a double. + double d = Double.parseDouble(columnValue); + o = NodeFactory.createLiteral(columnValue, + XSDDatatype.XSDdouble); + } catch (Exception e) { + o = NodeFactory.createLiteral(columnValue); + } + row.setValue(table.getColumn(predicates.get(col)), o); + } + rowNum++ ; + } + return table; } protected static String createColumnKeyURI(String csvFilePath, String column){ http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/org/apache/jena/propertytable/lang/CSV2RDF.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/org/apache/jena/propertytable/lang/CSV2RDF.java b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/CSV2RDF.java new file mode 100644 index 0000000..1d3f415 --- /dev/null +++ b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/CSV2RDF.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.propertytable.lang; + +import org.apache.jena.riot.Lang ; +import org.apache.jena.riot.RDFParserRegistry ; + +public class CSV2RDF { + + public static void init() { } + + static { + RDFParserRegistry.removeRegistration(Lang.CSV); + RDFParserRegistry.registerLangTriples(Lang.CSV, new ReaderRIOTFactoryCSV()); + } +} + http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/org/apache/jena/propertytable/lang/LangCSV.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/org/apache/jena/propertytable/lang/LangCSV.java b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/LangCSV.java index ccda47e..b4a3ab0 100644 --- a/jena-csv/src/main/java/org/apache/jena/propertytable/lang/LangCSV.java +++ b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/LangCSV.java @@ -18,33 +18,21 @@ package org.apache.jena.propertytable.lang; -import static org.apache.jena.riot.RDFLanguages.CSV; - -import java.io.InputStream; -import java.io.Reader; -import java.util.ArrayList; -import java.util.List; - -import org.apache.jena.atlas.csv.CSVParser; -import org.apache.jena.atlas.web.ContentType; -import org.apache.jena.propertytable.util.IRILib; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFLanguages; -import org.apache.jena.riot.RDFParserRegistry; -import org.apache.jena.riot.ReaderRIOT; -import org.apache.jena.riot.ReaderRIOTFactory; -import org.apache.jena.riot.lang.LangRIOT; -import org.apache.jena.riot.system.ErrorHandler; -import org.apache.jena.riot.system.ErrorHandlerFactory; -import org.apache.jena.riot.system.IRIResolver; -import org.apache.jena.riot.system.ParserProfile; -import org.apache.jena.riot.system.RiotLib; -import org.apache.jena.riot.system.StreamRDF; - -import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; -import com.hp.hpl.jena.graph.Node; -import com.hp.hpl.jena.graph.NodeFactory; -import com.hp.hpl.jena.sparql.util.Context; +import java.io.InputStream ; +import java.io.Reader ; +import java.util.ArrayList ; +import java.util.List ; + +import org.apache.jena.atlas.csv.CSVParser ; +import org.apache.jena.propertytable.util.IRILib ; +import org.apache.jena.riot.Lang ; +import org.apache.jena.riot.RDFLanguages ; +import org.apache.jena.riot.lang.LangRIOT ; +import org.apache.jena.riot.system.* ; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype ; +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.NodeFactory ; /** * The LangRIOT implementation for CSV @@ -61,11 +49,6 @@ public class LangCSV implements LangRIOT { private String filename; private StreamRDF sink; private ParserProfile profile; // Warning - we don't use all of this. - - public static void register(){ - RDFParserRegistry.removeRegistration(Lang.CSV); - RDFParserRegistry.registerLangTriples(Lang.CSV, new ReaderRIOTFactoryCSV()); - } @Override public Lang getLang() { @@ -82,8 +65,7 @@ public class LangCSV implements LangRIOT { this.profile = profile; } - public LangCSV(Reader reader, String base, String filename, - ErrorHandler errorHandler, StreamRDF sink) { + public LangCSV(Reader reader, String base, String filename, ErrorHandler errorHandler, StreamRDF sink) { this.reader = reader; this.base = base; this.filename = filename; @@ -91,8 +73,7 @@ public class LangCSV implements LangRIOT { this.profile = RiotLib.profile(getLang(), base, errorHandler); } - public LangCSV(InputStream in, String base, String filename, - ErrorHandler errorHandler, StreamRDF sink) { + public LangCSV(InputStream in, String base, String filename, ErrorHandler errorHandler, StreamRDF sink) { this.input = in; this.base = base; this.filename = filename; @@ -103,8 +84,7 @@ public class LangCSV implements LangRIOT { @Override public void parse() { sink.start(); - CSVParser parser = (input != null) ? CSVParser.create(input) - : CSVParser.create(reader); + CSVParser parser = (input != null) ? CSVParser.create(input) : CSVParser.create(reader); List<String> row = null; ArrayList<Node> predicates = new ArrayList<Node>(); int rowNum = 0; @@ -168,62 +148,4 @@ public class LangCSV implements LangRIOT { // Node subject = NodeFactory.createURI(uri); return subject; } - - - - - private static class ReaderRIOTFactoryCSV implements ReaderRIOTFactory - { - @Override - public ReaderRIOT create(Lang lang) { - return new ReaderRIOTLangCSV(lang) ; - } - } - - private static class ReaderRIOTLangCSV implements ReaderRIOT - { - private final Lang lang ; - private ErrorHandler errorHandler ; - private ParserProfile parserProfile = null ; - - ReaderRIOTLangCSV(Lang lang) { - this.lang = lang ; - errorHandler = ErrorHandlerFactory.getDefaultErrorHandler() ; - } - - @Override - public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, Context context) { - if ( lang == CSV){ - LangRIOT parser = new LangCSV (in, baseURI, baseURI, ErrorHandlerFactory.getDefaultErrorHandler(), output); - if ( parserProfile != null ) - parser.setProfile(parserProfile); - if ( errorHandler != null ) - parser.getProfile().setHandler(errorHandler) ; - parser.parse() ; - } else { - throw new IllegalArgumentException("The Lang must be 'CSV'!"); - } - - } - - @Override - public void read(Reader in, String baseURI, ContentType ct, StreamRDF output, Context context) { - if ( lang == CSV){ - LangRIOT parser = new LangCSV (in, baseURI, baseURI, ErrorHandlerFactory.getDefaultErrorHandler(), output); - if ( parserProfile != null ) - parser.setProfile(parserProfile); - if ( errorHandler != null ) - parser.getProfile().setHandler(errorHandler) ; - parser.parse() ; - } else { - throw new IllegalArgumentException("The Lang must be 'CSV'!"); - } - } - - @Override public ErrorHandler getErrorHandler() { return errorHandler ; } - @Override public void setErrorHandler(ErrorHandler errorHandler) { this.errorHandler = errorHandler ; } - - @Override public ParserProfile getParserProfile() { return parserProfile ; } - @Override public void setParserProfile(ParserProfile parserProfile) { this.parserProfile = parserProfile ; } - } } http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTFactoryCSV.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTFactoryCSV.java b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTFactoryCSV.java new file mode 100644 index 0000000..ec0a68f --- /dev/null +++ b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTFactoryCSV.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.propertytable.lang; + +import org.apache.jena.riot.Lang ; +import org.apache.jena.riot.ReaderRIOT ; +import org.apache.jena.riot.ReaderRIOTFactory ; + +class ReaderRIOTFactoryCSV implements ReaderRIOTFactory +{ + @Override + public ReaderRIOT create(Lang lang) { + return new ReaderRIOTLangCSV(lang) ; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTLangCSV.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTLangCSV.java b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTLangCSV.java new file mode 100644 index 0000000..413b9f9 --- /dev/null +++ b/jena-csv/src/main/java/org/apache/jena/propertytable/lang/ReaderRIOTLangCSV.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.propertytable.lang; + +import static org.apache.jena.riot.RDFLanguages.CSV ; + +import java.io.InputStream ; +import java.io.Reader ; + +import org.apache.jena.atlas.web.ContentType ; +import org.apache.jena.riot.Lang ; +import org.apache.jena.riot.ReaderRIOT ; +import org.apache.jena.riot.lang.LangRIOT ; +import org.apache.jena.riot.system.ErrorHandler ; +import org.apache.jena.riot.system.ErrorHandlerFactory ; +import org.apache.jena.riot.system.ParserProfile ; +import org.apache.jena.riot.system.StreamRDF ; + +import com.hp.hpl.jena.sparql.util.Context ; + +class ReaderRIOTLangCSV implements ReaderRIOT +{ + private final Lang lang ; + private ErrorHandler errorHandler ; + private ParserProfile parserProfile = null ; + + ReaderRIOTLangCSV(Lang lang) { + this.lang = lang ; + errorHandler = ErrorHandlerFactory.getDefaultErrorHandler() ; + } + + @Override + public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, Context context) { + if ( lang != CSV) + throw new IllegalArgumentException("The Lang must be 'CSV'!"); + LangRIOT parser = new LangCSV (in, baseURI, baseURI, ErrorHandlerFactory.getDefaultErrorHandler(), output); + if ( parserProfile != null ) + parser.setProfile(parserProfile); + if ( errorHandler != null ) + parser.getProfile().setHandler(errorHandler) ; + parser.parse() ; + } + + @Override + public void read(Reader in, String baseURI, ContentType ct, StreamRDF output, Context context) { + if ( lang != CSV) + throw new IllegalArgumentException("The Lang must be 'CSV'!"); + LangRIOT parser = new LangCSV (in, baseURI, baseURI, ErrorHandlerFactory.getDefaultErrorHandler(), output); + if ( parserProfile != null ) + parser.setProfile(parserProfile); + if ( errorHandler != null ) + parser.getProfile().setHandler(errorHandler) ; + parser.parse() ; + } + + @Override public ErrorHandler getErrorHandler() { return errorHandler ; } + @Override public void setErrorHandler(ErrorHandler errorHandler) { this.errorHandler = errorHandler ; } + + @Override public ParserProfile getParserProfile() { return parserProfile ; } + @Override public void setParserProfile(ParserProfile parserProfile) { this.parserProfile = parserProfile ; } +} http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/riotcmd/LocatorOupputFile.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/riotcmd/LocatorOupputFile.java b/jena-csv/src/main/java/riotcmd/LocatorOupputFile.java deleted file mode 100644 index 14caeeb..0000000 --- a/jena-csv/src/main/java/riotcmd/LocatorOupputFile.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package riotcmd; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.security.AccessControlException; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.hp.hpl.jena.util.FileManager; -import com.hp.hpl.jena.util.FileUtils; -import com.hp.hpl.jena.util.LocatorFile; - -class LocatorOupputFile { - static Logger log = LoggerFactory.getLogger(LocatorOupputFile.class) ; - private String thisDir = null ; - private String thisDirLogStr = "" ; - - public LocatorOupputFile(String dir) - { - if ( dir != null ) - { - if ( dir.endsWith("/") || dir.endsWith(java.io.File.separator) ) - dir = dir.substring(0,dir.length()-1) ; - thisDirLogStr = " ["+dir+"]" ; - } - thisDir = dir ; - } - - LocatorOupputFile() - { - this(null) ; - } - - @Override - public boolean equals( Object other ) - { - return - other instanceof LocatorFile - && equals( thisDir, ((LocatorOupputFile) other).thisDir ); - } - - private boolean equals( String a, String b ) - { - return a == null ? b == null : a.equals( b ); - } - - @Override - public int hashCode() - { - if ( thisDir == null ) - return 157 ; - return thisDir.hashCode(); - } - - private File toFile(String filenameOrURI) - { - String fn = FileUtils.toFilename(filenameOrURI) ; - if ( fn == null ) - return null ; - - if ( thisDir != null && ! fn.startsWith("/") && ! fn.startsWith(FileManager.filePathSeparator) ) - fn = thisDir+java.io.File.separator+fn ; - - return new File(fn) ; - } - - - public boolean exists(String filenameOrURI) - { - File f = toFile(filenameOrURI) ; - - if ( f == null ) - return false ; - - return f.exists() ; - } - - - public OutputStream open(String filenameOrURI) - { - // Worry about %20. - // toFile calls FileUtils.toFilename(filenameOrURI) ; - File f = toFile(filenameOrURI) ; - - try { - if ( f == null ) - { - if ( log.isTraceEnabled()) - log.trace("Not found: "+filenameOrURI+thisDirLogStr) ; - return null ; - } - } catch (AccessControlException e) { - log.warn("Security problem testing for file", e); - return null; - } - - try { - OutputStream out = new FileOutputStream(f) ; - - if ( log.isTraceEnabled() ) - log.trace("Found: "+filenameOrURI+thisDirLogStr) ; - - - // Create base -- Java 1.4-isms - //base = f.toURI().toURL().toExternalForm() ; - //base = base.replaceFirst("^file:/([^/])", "file:///$1") ; - return out ; - } catch (IOException ioEx) - { - // Includes FileNotFoundException - // We already tested whether the file exists or not. - // log.warn("File unreadable (but exists): "+f.getPath()+" Exception: "+ioEx.getMessage()) ; - return null ; - } - } - - public String getDir() { return thisDir ; } - - - public String getName() - { - String tmp = "LocatorFile" ; - if ( thisDir != null ) - tmp = tmp+"("+thisDir+")" ; - return tmp ; - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/riotcmd/ModDest.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/riotcmd/ModDest.java b/jena-csv/src/main/java/riotcmd/ModDest.java deleted file mode 100644 index e5560f0..0000000 --- a/jena-csv/src/main/java/riotcmd/ModDest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package riotcmd; - -import arq.cmd.CmdException; -import arq.cmdline.ArgDecl; -import arq.cmdline.ArgModuleGeneral; -import arq.cmdline.CmdArgModule; -import arq.cmdline.CmdGeneral; - -class ModDest implements ArgModuleGeneral{ - - private ArgDecl argDest = new ArgDecl(ArgDecl.HasValue, "dest") ; - private String dest = null ; - - @Override - public void processArgs(CmdArgModule cmdLine) { - if ( cmdLine.contains(argDest) ) { - dest = cmdLine.getValue(argDest) ; - } else { - throw new CmdException("No destination output file! Please add '--dest=file' in the program arguements") ; - } - } - - @Override - public void registerWith(CmdGeneral cmdLine) { - cmdLine.getUsage().startCategory("Destination Output") ; - cmdLine.add(argDest, "--dest=file", "The destination output file") ; - } - - public String getDest() { - return dest ; - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/riotcmd/csv2rdf.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/riotcmd/csv2rdf.java b/jena-csv/src/main/java/riotcmd/csv2rdf.java deleted file mode 100644 index 882a29a..0000000 --- a/jena-csv/src/main/java/riotcmd/csv2rdf.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package riotcmd; - -import java.io.OutputStream; - -import org.apache.jena.atlas.io.IO; -import org.apache.jena.atlas.web.ContentType; -import org.apache.jena.atlas.web.TypedInputStream; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.RDFLanguages; -import org.apache.jena.riot.ReaderRIOT; -import org.apache.jena.riot.RiotException; -import org.apache.jena.riot.SysRIOT; -import org.apache.jena.riot.lang.LabelToNode; -import org.apache.jena.riot.lang.StreamRDFCounting; -import org.apache.jena.riot.out.NodeToLabel; -import org.apache.jena.riot.process.inf.InfFactory; -import org.apache.jena.riot.system.ErrorHandler; -import org.apache.jena.riot.system.ErrorHandlerFactory; -import org.apache.jena.riot.system.RiotLib; -import org.apache.jena.riot.system.StreamRDF; -import org.apache.jena.riot.system.StreamRDF2; -import org.apache.jena.riot.system.StreamRDFLib; -import org.apache.jena.riot.system.SyntaxLabels; - -import arq.cmd.CmdException; - -import com.hp.hpl.jena.sparql.util.Utils; - -/** - * It's a command line tool for direct and scalable transforming from CSV to the formatted RDF syntax (i.e. N-Triples), - * with no intermediary Graph or PropertyTable. - * - * It reuses the parsing functions from CmdLangParse and sinks the triples into the destination output file. - * - */ -public class csv2rdf extends CmdLangParse{ - - protected ModDest modDest = new ModDest() ; - protected OutputStream destOut; - - public static void main(String... argv) - { - new csv2rdf(argv).mainRun() ; - } - - protected csv2rdf(String[] argv) - { - super(argv) ; - super.addModule(modDest) ; - - } - - @Override - protected Lang selectLang(String filename, ContentType contentType, - Lang dftLang) { - return RDFLanguages.CSV; - } - - @Override - protected String getCommandName() { - return Utils.classShortName(csv2rdf.class) ; - } - - @Override - protected String getSummary() - { - return getCommandName()+" --dest=outputFile inputFile ..." ; - } - - // override the original CmdLangParse.parseRIOT() - protected void parseRIOT(String baseURI, String filename, TypedInputStream in) - { - - String dest = modDest.getDest(); - LocatorOupputFile l = new LocatorOupputFile(); - destOut = l.open(dest); - - if (destOut == null){ - System.err.println("Can't write to destination output file: '"+dest+"' ") ; - return ; - } - - // I ti s shame we effectively duplicate deciding thelnaguage but we want to control the - // pasrer at a deep level (in validation, we want line numbers get into error message) - // This code predates RDFDataMgr. - - ContentType ct = in.getMediaType() ; - - baseURI = SysRIOT.chooseBaseIRI(baseURI, filename) ; - - boolean checking = true ; - if ( modLangParse.explicitChecking() ) checking = true ; - if ( modLangParse.explicitNoChecking() ) checking = false ; - - ErrorHandler errHandler = null ; - if ( checking ) - { - if ( modLangParse.stopOnBadTerm() ) - errHandler = ErrorHandlerFactory.errorHandlerStd ; - else - // Try to go on if possible. This is the default behaviour. - errHandler = ErrorHandlerFactory.errorHandlerWarn ; - } - - if ( modLangParse.skipOnBadTerm() ) - { - // TODO skipOnBadterm - } - - Lang lang = selectLang(filename, ct, RDFLanguages.NQUADS) ; - LangHandler handler = dispatch.get(lang) ; - if ( handler == null ) - throw new CmdException("Undefined language: "+lang) ; - - // If multiple files, choose the overall labels. - if ( langHandlerOverall == null ) - langHandlerOverall = handler ; - else - { - if ( langHandlerOverall != langHandlerAny ) - { - if ( langHandlerOverall != handler ) - langHandlerOverall = langHandlerAny ; - } - } - - // Make a flag. - // Input and output subflags. - // If input is "label, then output using NodeToLabel.createBNodeByLabelRaw() ; - // else use NodeToLabel.createBNodeByLabel() ; - // Also, as URI. - final boolean labelsAsGiven = false ; - - NodeToLabel labels = SyntaxLabels.createNodeToLabel() ; - if ( labelsAsGiven ) - labels = NodeToLabel.createBNodeByLabelEncoded() ; - - StreamRDF s = StreamRDFLib.sinkNull() ; - if ( ! modLangParse.toBitBucket() ) - s = StreamRDFLib.writer(output) ; - - // add dest output - if ( destOut != null) - s = new StreamRDF2(s, StreamRDFLib.writer(destOut)); - - if ( setup != null ) - s = InfFactory.inf(s, setup) ; - - StreamRDFCounting sink = StreamRDFLib.count(s) ; - s = null ; - - ReaderRIOT reader = RDFDataMgr.createReader(lang) ; - try { - if ( checking ) { - if ( lang == RDFLanguages.NTRIPLES || lang == RDFLanguages.NQUADS ) - reader.setParserProfile(RiotLib.profile(baseURI, false, true, errHandler)) ; - else - reader.setParserProfile(RiotLib.profile(baseURI, true, true, errHandler)) ; - } else - reader.setParserProfile(RiotLib.profile(baseURI, false, false, errHandler)) ; - - if ( labelsAsGiven ) - reader.getParserProfile().setLabelToNode(LabelToNode.createUseLabelAsGiven()) ; - modTime.startTimer() ; - reader.read(in, baseURI, ct, sink, null) ; - } catch (RiotException ex) { - // Should have handled the exception and logged a message by now. - // System.err.println("++++"+ex.getMessage()); - - if ( modLangParse.stopOnBadTerm() ) - return ; - } finally { - // Not close - we may write again to the underlying output stream in another call to parse a file. - sink.finish() ; - IO.close(in) ; - } - long x = modTime.endTimer() ; - long n = sink.countTriples()+sink.countQuads() ; - - if ( modTime.timingEnabled() ) - output(filename, n, x, handler) ; - - totalMillis += x ; - totalTuples += n ; - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/main/java/riotcmdx/csv2rdf.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/main/java/riotcmdx/csv2rdf.java b/jena-csv/src/main/java/riotcmdx/csv2rdf.java new file mode 100644 index 0000000..7bd1a87 --- /dev/null +++ b/jena-csv/src/main/java/riotcmdx/csv2rdf.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package riotcmdx; + +import org.apache.jena.atlas.web.ContentType ; +import org.apache.jena.riot.Lang ; +import org.apache.jena.riot.RDFLanguages ; +import riotcmd.CmdLangParse ; + +import com.hp.hpl.jena.sparql.util.Utils ; + +/** + * A command line tool for direct and scalable transforming from CSV to the formatted RDF syntax (i.e. N-Triples), + * with no intermediary Graph or PropertyTable. + */ +public class csv2rdf extends CmdLangParse{ + + public static void main(String... argv) + { + new csv2rdf(argv).mainRun() ; + } + + protected csv2rdf(String[] argv) + { + super(argv) ; + } + + @Override + protected Lang selectLang(String filename, ContentType contentType, Lang dftLang) { + return RDFLanguages.CSV; + } + + @Override + protected String getCommandName() { + return Utils.classShortName(csv2rdf.class) ; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/test/java/org/apache/jena/propertytable/graph/GraphCSVTest.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/test/java/org/apache/jena/propertytable/graph/GraphCSVTest.java b/jena-csv/src/test/java/org/apache/jena/propertytable/graph/GraphCSVTest.java index 3d23f86..2915ac9 100644 --- a/jena-csv/src/test/java/org/apache/jena/propertytable/graph/GraphCSVTest.java +++ b/jena-csv/src/test/java/org/apache/jena/propertytable/graph/GraphCSVTest.java @@ -18,23 +18,17 @@ package org.apache.jena.propertytable.graph; -import org.apache.jena.propertytable.graph.GraphCSV; -import org.apache.jena.propertytable.lang.LangCSV; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.apache.jena.atlas.lib.StrUtils ; +import org.apache.jena.propertytable.lang.CSV2RDF ; +import org.junit.Assert ; +import org.junit.BeforeClass ; +import org.junit.Test ; -import com.hp.hpl.jena.query.ARQ; -import com.hp.hpl.jena.query.Query; -import com.hp.hpl.jena.query.QueryExecution; -import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory; -import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.sparql.engine.main.StageBuilder; -import com.hp.hpl.jena.sparql.engine.main.StageGenerator; +import com.hp.hpl.jena.query.* ; +import com.hp.hpl.jena.rdf.model.Model ; +import com.hp.hpl.jena.rdf.model.ModelFactory ; +import com.hp.hpl.jena.sparql.engine.main.StageBuilder ; +import com.hp.hpl.jena.sparql.engine.main.StageGenerator ; /** * Tests related to GraphCSV with some real world data. @@ -44,7 +38,7 @@ public class GraphCSVTest extends Assert { @BeforeClass public static void init(){ - LangCSV.register(); + CSV2RDF.init() ; } @Test @@ -90,8 +84,17 @@ public class GraphCSVTest extends Assert { Model csv = ModelFactory.createModelForGraph(new GraphCSV(file)); assertEquals(72, csv.size()); - Query query = QueryFactory - .create("PREFIX : <src/test/resources/HEFCE_organogram_senior_data_31032011.csv#> SELECT ?name ?unit {?x :Name ?name ; :Unit ?unit ; :Actual%20Pay%20Floor%20%28%A3%29 ?floor ; :Actual%20Pay%20Ceiling%20%28%A3%29 ?ceiling . FILTER(?floor > 100000 && ?ceiling <120000 )}"); + String x = StrUtils.strjoinNL + ("PREFIX : <src/test/resources/HEFCE_organogram_senior_data_31032011.csv#>" + ,"SELECT ?name ?unit" + ,"{ ?x :Name ?name ;" + ," :Unit ?unit ;" + ," :Actual%20Pay%20Floor%20%28%A3%29 ?floor ;" + ," :Actual%20Pay%20Ceiling%20%28%A3%29 ?ceiling ." + ,"FILTER(?floor > 100000 && ?ceiling <120000 )" + ,"}"); + + Query query = QueryFactory.create(x) ; QueryExecution qexec = QueryExecutionFactory.create(query, csv); ResultSet results = qexec.execSelect(); http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/test/java/org/apache/jena/propertytable/impl/AbstractPropertyTableBuilderTest.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/test/java/org/apache/jena/propertytable/impl/AbstractPropertyTableBuilderTest.java b/jena-csv/src/test/java/org/apache/jena/propertytable/impl/AbstractPropertyTableBuilderTest.java index f7a1e02..a9f5ba0 100644 --- a/jena-csv/src/test/java/org/apache/jena/propertytable/impl/AbstractPropertyTableBuilderTest.java +++ b/jena-csv/src/test/java/org/apache/jena/propertytable/impl/AbstractPropertyTableBuilderTest.java @@ -18,18 +18,16 @@ package org.apache.jena.propertytable.impl; -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; +import java.io.StringReader ; -import org.apache.jena.atlas.csv.CSVTokenIterator; -import org.apache.jena.propertytable.BaseTest; -import org.apache.jena.propertytable.Row; -import org.junit.Assert; -import org.junit.Test; +import org.apache.jena.atlas.csv.CSVParser ; +import org.apache.jena.propertytable.BaseTest ; +import org.apache.jena.propertytable.Row ; +import org.junit.Assert ; +import org.junit.Test ; -import com.hp.hpl.jena.graph.Node; -import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.NodeFactory ; /** @@ -40,7 +38,7 @@ public abstract class AbstractPropertyTableBuilderTest extends BaseTest { @Test public void testFillPropertyTable() { - CSVTokenIterator iterator = csv("a,b\nc,d\ne,f"); + CSVParser iterator = csv("a,b\nc,d\ne,f"); PropertyTableBuilder.fillPropertyTable(table, iterator, csvFilePath); Assert.assertEquals(3, table.getColumns().size()); @@ -59,7 +57,7 @@ public abstract class AbstractPropertyTableBuilderTest extends BaseTest { @Test public void testIrregularTable1() { - CSVTokenIterator iterator = csv("a,b\nc\ne,f"); + CSVParser iterator = csv("a,b\nc\ne,f"); PropertyTableBuilder.fillPropertyTable(table, iterator, csvFilePath); Assert.assertEquals(3, table.getColumns().size()); @@ -77,7 +75,7 @@ public abstract class AbstractPropertyTableBuilderTest extends BaseTest { @Test public void testIrregularTable2() { - CSVTokenIterator iterator = csv("a,b\nc,d1,d2\ne,f"); + CSVParser iterator = csv("a,b\nc,d1,d2\ne,f"); PropertyTableBuilder.fillPropertyTable(table, iterator, csvFilePath); Assert.assertEquals(3, table.getColumns().size()); @@ -95,7 +93,7 @@ public abstract class AbstractPropertyTableBuilderTest extends BaseTest { @Test public void testIrregularTable3() { - CSVTokenIterator iterator = csv("a,b\n,d\ne,f"); + CSVParser iterator = csv("a,b\n,d\ne,f"); PropertyTableBuilder.fillPropertyTable(table, iterator, csvFilePath); Assert.assertEquals(3, table.getColumns().size()); @@ -135,13 +133,7 @@ public abstract class AbstractPropertyTableBuilderTest extends BaseTest { Assert.assertTrue(collectionContains(table.getColumns(), columnKey)); } - private CSVTokenIterator csv(String input) { - try { - InputStream in = new ByteArrayInputStream(input.getBytes("UTF-8")); - CSVTokenIterator iterator = new CSVTokenIterator(in); - return iterator; - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + private CSVParser csv(String input) { + return CSVParser.create(new StringReader(input)); } } http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/test/java/org/apache/jena/propertytable/lang/TestLangCSV.java ---------------------------------------------------------------------- diff --git a/jena-csv/src/test/java/org/apache/jena/propertytable/lang/TestLangCSV.java b/jena-csv/src/test/java/org/apache/jena/propertytable/lang/TestLangCSV.java index 697ba27..581160b 100644 --- a/jena-csv/src/test/java/org/apache/jena/propertytable/lang/TestLangCSV.java +++ b/jena-csv/src/test/java/org/apache/jena/propertytable/lang/TestLangCSV.java @@ -44,7 +44,7 @@ public class TestLangCSV extends BaseTest { @BeforeClass public static void init(){ - LangCSV.register(); + CSV2RDF.init() ; } @Test http://git-wip-us.apache.org/repos/asf/jena/blob/b6af5326/jena-csv/src/test/resources/HEFCE_organogram_senior_data_31032011.csv ---------------------------------------------------------------------- diff --git a/jena-csv/src/test/resources/HEFCE_organogram_senior_data_31032011.csv b/jena-csv/src/test/resources/HEFCE_organogram_senior_data_31032011.csv index 77df38f..f881ac1e 100644 --- a/jena-csv/src/test/resources/HEFCE_organogram_senior_data_31032011.csv +++ b/jena-csv/src/test/resources/HEFCE_organogram_senior_data_31032011.csv @@ -1,5 +1,5 @@ Post Unique Reference,Name,Grade,Job Title,Job/Team Function,Parent Department,Organisation,Unit,Contact Phone,Contact E-mail,Reports to Senior Post,Salary Cost of Reports (£),FTE,Actual Pay Floor (£),Actual Pay Ceiling (£),,Profession,Notes,Valid? 90115,Steve Egan,SCS1A,Deputy Chief Executive,Finance and Corporate Resources,Department for Business Innovation and Skills,Higher Education Funding Council for England,Finance and Corporate Resources,0117 931 7408,[email protected],90334,5883433,1,120000,124999,,Finance,,1 -90250,David Sweeney,SCS1A,Director,"Research, Innovation and Skills",Department for Business Innovation and Skills,Higher Education Funding Council for England,"Research, Innovation and Skills",0117 931 7304,[email protected],90334,1207171,1,110000,114999,,Policy,,1 +90250,David Sweeney,SCS1A,Director,"ResearchX Innovation and Skills",Department for Business Innovation and Skills,Higher Education Funding Council for England,"Research, Innovation and Skills",0117 931 7304,[email protected],90334,1207171,1,110000,114999,,Policy,,1 90284,Heather Fry,SCS1A,Director,Education and Participation,Department for Business Innovation and Skills,Higher Education Funding Council for England,Education and Participation,0117 931 7280,[email protected],90334,1645195,1,100000,104999,,Policy,,1 90334,Sir Alan Langlands,SCS4,Chief Executive,Chief Executive,Department for Business Innovation and Skills,Higher Education Funding Council for England,HEFCE,0117 931 7300/7341,[email protected],xx,0,1,230000,234999,,Policy,,1
