http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java ---------------------------------------------------------------------- diff --git a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java b/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java deleted file mode 100644 index ba54b01..0000000 --- a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java +++ /dev/null @@ -1,654 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.io.nquads; - -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.openrdf.model.BNode; -import org.openrdf.model.Literal; -import org.openrdf.model.Resource; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.impl.URIImpl; -import org.openrdf.rio.ParseLocationListener; -import org.openrdf.rio.RDFHandler; -import org.openrdf.rio.RDFHandlerException; -import org.openrdf.rio.RDFParseException; -import org.openrdf.rio.RDFParser; -import org.openrdf.rio.RioSetting; -import org.openrdf.rio.helpers.BasicParserSettings; -import org.openrdf.rio.helpers.NTriplesParserSettings; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import static org.hamcrest.core.Is.is; - -/** - * Test case for {@link NQuadsParser}. - * - * @author Michele Mostarda ([email protected]) - */ -public class NQuadsParserTest { - - private static final Logger logger = LoggerFactory.getLogger(NQuadsParser.class); - - private NQuadsParser parser; - - private TestRDFHandler rdfHandler; - - @Before - public void setUp() { - parser = new NQuadsParser(); - rdfHandler = new TestRDFHandler(); - parser.setRDFHandler(rdfHandler); - Set<RioSetting<?>> nonFatalErrors = new HashSet<RioSetting<?>>(); - parser.getParserConfig().setNonFatalErrors(nonFatalErrors); - parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false); - parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES); - parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false); - parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES); - parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false); - parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES); - - } - - @After - public void tearDown() { - parser = null; - } - - /** - * Tests the correct behavior with incomplete input. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test(expected = RDFParseException.class) - public void testIncompleteParsing() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://s> <http://p> <http://o> <http://g>".getBytes() - ); - parser.parse(bais, "http://base-uri"); - } - - /** - * Tests parsing of empty lines and comments. - * - * @throws java.io.IOException - */ - @Test - public void testParseEmptyLinesAndComments() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - " \n\n\n# This is a comment\n\n#this is another comment." - .getBytes() - ); - parser.parse(bais, "http://test.base.uri"); - Assert.assertEquals(rdfHandler.getStatements().size(), 0); - } - - /** - * Tests basic N-Quads parsing. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testParseBasic() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://www.v/dat/4b> <http://www.w3.org/20/ica#dtend> <http://sin/value/2> <http://sin.siteserv.org/def/>." - .getBytes() - ); - parser.parse(bais, "http://test.base.uri"); - Assert.assertThat(rdfHandler.getStatements().size(), is(1)); - final Statement statement = rdfHandler.getStatements().get(0); - Assert.assertEquals("http://www.v/dat/4b", statement.getSubject().stringValue()); - Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue()); - Assert.assertTrue(statement.getObject() instanceof URI); - Assert.assertEquals("http://sin/value/2", statement.getObject().stringValue()); - Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue()); - } - - /** - * Tests basic N-Quads parsing with blank node. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testParseBasicBNode() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "_:a123456768 <http://www.w3.org/20/ica#dtend> <http://sin/value/2> <http://sin.siteserv.org/def/>." - .getBytes() - ); - parser.parse(bais, "http://test.base.uri"); - Assert.assertThat(rdfHandler.getStatements().size(), is(1)); - final Statement statement = rdfHandler.getStatements().get(0); - Assert.assertTrue(statement.getSubject() instanceof BNode); - Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue()); - Assert.assertTrue(statement.getObject() instanceof URI); - Assert.assertEquals("http://sin/value/2", statement.getObject().stringValue()); - Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue()); - } - - /** - * Tests basic N-Quads parsing with literal. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testParseBasicLiteral() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "_:a123456768 <http://www.w3.org/20/ica#dtend> \"2010-05-02\" <http://sin.siteserv.org/def/>." - .getBytes() - ); - parser.parse(bais, "http://test.base.uri"); - Assert.assertThat(rdfHandler.getStatements().size(), is(1)); - final Statement statement = rdfHandler.getStatements().get(0); - Assert.assertTrue(statement.getSubject() instanceof BNode); - Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue()); - Assert.assertTrue(statement.getObject() instanceof Literal); - Assert.assertEquals("2010-05-02", statement.getObject().stringValue()); - Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue()); - } - - /** - * Tests N-Quads parsing with literal and language. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testParseBasicLiteralLang() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://www.v/dat/4b2-21> <http://www.w3.org/20/ica#dtend> \"2010-05-02\"@en <http://sin.siteserv.org/def/>." - .getBytes() - ); - parser.parse(bais, "http://test.base.uri"); - final Statement statement = rdfHandler.getStatements().get(0); - Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue()); - Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue()); - Assert.assertTrue(statement.getObject() instanceof Literal); - Literal object = (Literal) statement.getObject(); - Assert.assertEquals("2010-05-02", object.stringValue()); - Assert.assertEquals("en", object.getLanguage()); - Assert.assertNull("en", object.getDatatype()); - Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue()); - } - - /** - * Tests N-Quads parsing with literal and datatype. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testParseBasicLiteraDatatype() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - ("<http://www.v/dat/4b2-21> " + - "<http://www.w3.org/20/ica#dtend> " + - "\"2010\"^^<http://www.w3.org/2001/XMLSchema#integer> " + - "<http://sin.siteserv.org/def/>." - ).getBytes() - ); - parser.parse(bais, "http://test.base.uri"); - final Statement statement = rdfHandler.getStatements().get(0); - Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue()); - Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue()); - Assert.assertTrue(statement.getObject() instanceof Literal); - Literal object = (Literal) statement.getObject(); - Assert.assertEquals("2010", object.stringValue()); - Assert.assertNull(object.getLanguage()); - Assert.assertEquals("http://www.w3.org/2001/XMLSchema#integer", object.getDatatype().toString()); - Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue()); - } - - /** - * Tests the correct support for literal escaping. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testLiteralEscapeManagement1() - throws RDFHandlerException, IOException, RDFParseException { - TestParseLocationListener parseLocationListener = new TestParseLocationListener(); - parser.setParseLocationListener(parseLocationListener); - - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://a> <http://b> \"\\\\\" <http://c> .".getBytes() - ); - parser.parse(bais, "http://base-uri"); - - rdfHandler.assertHandler(1); - //parseLocationListener.assertListener(1, 40); - parseLocationListener.assertListener(1, 1); - } - - /** - * Tests the correct support for literal escaping. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testLiteralEscapeManagement2() - throws RDFHandlerException, IOException, RDFParseException { - TestParseLocationListener parseLocationListener = new TestParseLocationListener(); - parser.setParseLocationListener(parseLocationListener); - - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://a> <http://b> \"Line text 1\\nLine text 2\" <http://c> .".getBytes() - ); - parser.parse(bais, "http://base-uri"); - - rdfHandler.assertHandler(1); - final Value object = rdfHandler.getStatements().get(0).getObject(); - Assert.assertTrue( object instanceof Literal); - final String literalContent = ((Literal) object).getLabel(); - Assert.assertEquals("Line text 1\nLine text 2", literalContent); - } - - /** - * Tests the correct decoding of UTF-8 encoded chars in URIs. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test - public void testURIDecodingManagement() throws RDFHandlerException, IOException, RDFParseException { - TestParseLocationListener parseLocationListener = new TestParseLocationListener(); - parser.setParseLocationListener(parseLocationListener); - - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://s/\\u306F\\u3080> <http://p/\\u306F\\u3080> <http://o/\\u306F\\u3080> <http://g/\\u306F\\u3080> ." - .getBytes() - ); - parser.parse(bais, "http://base-uri"); - - rdfHandler.assertHandler(1); - final Statement statement = rdfHandler.getStatements().get(0); - - final Resource subject = statement.getSubject(); - Assert.assertTrue( subject instanceof URI); - final String subjectURI = subject.toString(); - Assert.assertEquals("http://s/ã¯ã", subjectURI); - - final Resource predicate = statement.getPredicate(); - Assert.assertTrue( predicate instanceof URI); - final String predicateURI = predicate.toString(); - Assert.assertEquals("http://p/ã¯ã", predicateURI); - - final Value object = statement.getObject(); - Assert.assertTrue( object instanceof URI); - final String objectURI = object.toString(); - Assert.assertEquals("http://o/ã¯ã", objectURI); - - final Resource graph = statement.getContext(); - Assert.assertTrue( graph instanceof URI); - final String graphURI = graph.toString(); - Assert.assertEquals("http://g/ã¯ã", graphURI); - } - - @Test - public void testUnicodeLiteralManagement() throws RDFHandlerException, IOException, RDFParseException { - final String INPUT_LITERAL = "[ã¯ãã¤ã®ãªã¹ããã³ã¤ã³ã°ã©ã³ãã®é¦é½ã§ãã] [æ¯å¤§ä¸åé¡ååæç¾èè¯åçååè±æ ¼èçé¦é½]"; - final String INPUT_STRING = String.format( - "<http://a> <http://b> \"%s\" <http://c> .", - INPUT_LITERAL - ); - final ByteArrayInputStream bais = new ByteArrayInputStream( - INPUT_STRING.getBytes() - ); - parser.parse(bais, "http://base-uri"); - - rdfHandler.assertHandler(1); - final Literal obj = (Literal) rdfHandler.getStatements().get(0).getObject(); - Assert.assertEquals(INPUT_LITERAL, obj.getLabel()); - } - - @Test - public void testUnicodeLiteralDecoding() throws RDFHandlerException, IOException, RDFParseException { - final String INPUT_LITERAL_PLAIN = "[ã¯]"; - final String INPUT_LITERAL_ENCODED = "[\\u306F]"; - final String INPUT_STRING = String.format( - "<http://a> <http://b> \"%s\" <http://c> .", - INPUT_LITERAL_ENCODED - ); - final ByteArrayInputStream bais = new ByteArrayInputStream( - INPUT_STRING.getBytes() - ); - parser.parse(bais, "http://base-uri"); - - rdfHandler.assertHandler(1); - final Literal obj = (Literal) rdfHandler.getStatements().get(0).getObject(); - Assert.assertEquals(INPUT_LITERAL_PLAIN, obj.getLabel()); - } - - @Test(expected = RDFParseException.class) - public void testWrongUnicodeEncodedCharFail() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://s> <http://p> \"\\u123X\" <http://g> .".getBytes() - ); - parser.parse(bais, "http://base-uri"); - } - - /** - * Tests the correct support for EOS exception. - * - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - */ - @Test(expected = RDFParseException.class) - public void testEndOfStreamReached() - throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://a> <http://b> \"\\\" <http://c> .".getBytes() - ); - parser.parse(bais, "http://base-uri"); - } - - /** - * Tests the parser with all cases defined by the NQuads grammar. - * - * @throws IOException - * @throws RDFParseException - * @throws RDFHandlerException - */ - @Test - public void testFullParseScenario() - throws IOException, RDFParseException, RDFHandlerException { - TestParseLocationListener parseLocationListerner = new TestParseLocationListener(); - FullParseScenarioRDFHandler rdfHandler = new FullParseScenarioRDFHandler(); - parser.setParseLocationListener(parseLocationListerner); - parser.setRDFHandler(rdfHandler); - - BufferedReader br = new BufferedReader( - new InputStreamReader( - this.getClass().getClassLoader().getResourceAsStream("application/nquads/test1.nq") - ) - ); - parser.parse( - br, - "http://test.base.uri" - ); - - rdfHandler.assertHandler(6); - parseLocationListerner.assertListener(8, 71); - } - - /** - * Tests parser with real data. - * - * @throws IOException - * @throws RDFParseException - * @throws RDFHandlerException - */ - @Test - public void testParseRealData() - throws IOException, RDFParseException, RDFHandlerException { - TestParseLocationListener parseLocationListener = new TestParseLocationListener(); - parser.setParseLocationListener(parseLocationListener); - - parser.parse( - this.getClass().getClassLoader().getResourceAsStream("application/nquads/test2.nq"), - "http://test.base.uri" - ); - - rdfHandler.assertHandler(400); - parseLocationListener.assertListener(400, 349); - } - - @Test - public void testStatementWithInvalidLiteralContentAndIgnoreValidation() - throws RDFHandlerException, IOException, RDFParseException { - verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling.IGNORE); - } - - @Test(expected = RDFParseException.class) - public void testStatementWithInvalidLiteralContentAndStrictValidation() - throws RDFHandlerException, IOException, RDFParseException { - verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling.VERIFY); - } - - @Test - public void testStatementWithInvalidDatatypeAndIgnoreValidation() - throws RDFHandlerException, IOException, RDFParseException { - verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling.IGNORE); - } - - @Test(expected = RDFParseException.class) - public void testStatementWithInvalidDatatypeAndVerifyValidation() - throws RDFHandlerException, IOException, RDFParseException { - verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling.VERIFY); - } - - @Test (expected = RDFParseException.class) - public void testStopAtFirstErrorStrictParsing() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - ( - "<http://s0> <http://p0> <http://o0> <http://g0> .\n" + - "<http://sX> .\n" + // Line with error. - "<http://s1> <http://p1> <http://o1> <http://g1> .\n" - ).getBytes() - ); - parser.setStopAtFirstError(true); - parser.parse(bais, "http://base-uri"); - } - - @Test - public void testStopAtFirstErrorTolerantParsing() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - ( - "<http://s0> <http://p0> <http://o0> <http://g0> .\n" + - "<http://sX> .\n" + // Line with error. - "<http://s1> <http://p1> <http://o1> <http://g1> .\n" - ).getBytes() - ); - //parser.setStopAtFirstError(false); - parser.getParserConfig().addNonFatalError(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES); - parser.parse(bais, "http://base-uri"); - rdfHandler.assertHandler(2); - final List<Statement> statements = rdfHandler.getStatements(); - final int size = statements.size(); - for(int i = 0; i < size; i++) { - Assert.assertEquals("http://s" + i, statements.get(i).getSubject().stringValue() ); - Assert.assertEquals("http://p" + i, statements.get(i).getPredicate().stringValue()); - Assert.assertEquals("http://o" + i, statements.get(i).getObject().stringValue() ); - Assert.assertEquals("http://g" + i, statements.get(i).getContext().stringValue() ); - } - } - - @Test - public void testReportInvalidLiteralAttribute() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - "<http://a> <http://b> \"literal\"^^xsd:datetime <http://c> .".getBytes() - ); - try { - parser.parse(bais, "http://base-uri"); - Assert.fail("Expected failure here."); - } catch (RDFParseException e) { - Assert.assertTrue(e.getMessage().contains("Expected '<'")); - Assert.assertEquals(1 , e.getLineNumber()); - //Assert.assertEquals(35, e.getColumnNumber()); - } - } - - @Test - public void testParseWithNoContext() throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - ("<http://www.v/dat/4b2-21>" + - "<http://www.w3.org/20/ica#dtend>" + - "\"2010\"^^<http://www.w3.org/2001/XMLSchema#integer> ." - ).getBytes() - ); - parser.parse(bais, "http://test.base.uri"); - final Statement statement = rdfHandler.getStatements().get(0); - Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue()); - Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue()); - Assert.assertTrue(statement.getObject() instanceof Literal); - Literal object = (Literal) statement.getObject(); - Assert.assertEquals("2010", object.stringValue()); - Assert.assertNull(object.getLanguage()); - Assert.assertEquals("http://www.w3.org/2001/XMLSchema#integer", object.getDatatype().toString()); - Assert.assertNull(statement.getContext()); - } - - private void verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling datatypeHandling) - throws RDFHandlerException, IOException, RDFParseException { - final ByteArrayInputStream bais = new ByteArrayInputStream( - ( - "<http://dbpedia.org/resource/Camillo_Benso,_conte_di_Cavour> " + - "<http://dbpedia.org/property/mandatofine> " + - "\"1380.0\"^^<http://www.w3.org/2001/XMLSchema#int> " + // Float declared as int. - "<http://it.wikipedia.org/wiki/Camillo_Benso,_conte_di_Cavour#absolute-line=20> ." - ).getBytes() - ); - if(datatypeHandling == RDFParser.DatatypeHandling.VERIFY) { - parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>()); - parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true); - parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); - } - //parser.setDatatypeHandling(datatypeHandling); - parser.parse(bais, "http://base-uri"); - } - - private void verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling datatypeHandling) - throws RDFHandlerException, IOException, RDFParseException { - parser.setDatatypeHandling(datatypeHandling); - final ByteArrayInputStream bais = new ByteArrayInputStream( - ( - "<http://dbpedia.org/resource/Camillo_Benso,_conte_di_Cavour> " + - "<http://dbpedia.org/property/mandatofine> " + - "\"1380.0\"^^<http://dbpedia.org/invalid/datatype/second> " + - "<http://it.wikipedia.org/wiki/Camillo_Benso,_conte_di_Cavour#absolute-line=20> ." - ).getBytes() - ); - if(datatypeHandling == RDFParser.DatatypeHandling.VERIFY) { - parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>()); - parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true); - parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); - } - parser.parse(bais, "http://base-uri"); - rdfHandler.assertHandler(1); - } - - private class TestParseLocationListener implements ParseLocationListener { - - private int lastRow, lastCol; - - public void parseLocationUpdate(int r, int c) { - lastRow = r; - lastCol = c; - } - - private void assertListener(int row, int col) { - Assert.assertEquals("Unexpected last row", row , lastRow); - // Column numbers are not supported by the Rio NQuadsParser currently - //Assert.assertEquals("Unexpected last col", col , lastCol); - } - - } - - private class TestRDFHandler implements RDFHandler { - - private boolean started = false; - private boolean ended = false; - - private final List<Statement> statements = new ArrayList<Statement>(); - - protected List<Statement> getStatements() { - return statements; - } - - public void startRDF() throws RDFHandlerException { - started = true; - } - - public void endRDF() throws RDFHandlerException { - ended = true; - } - - public void handleNamespace(String s, String s1) throws RDFHandlerException { - } - - public void handleStatement(Statement statement) throws RDFHandlerException { - logger.debug(statement.toString()); - statements.add(statement); - } - - public void handleComment(String s) throws RDFHandlerException { - } - - public void assertHandler(int expected) { - Assert.assertTrue("Never stated.", started); - Assert.assertTrue("Never ended." , ended ); - Assert.assertEquals("Unexpected number of statements.", expected, statements.size()); - } - } - - private class FullParseScenarioRDFHandler extends TestRDFHandler { - - public void handleStatement(Statement statement) throws RDFHandlerException { - int statementIndex = getStatements().size(); - if(statementIndex == 0){ - Assert.assertEquals(new URIImpl("http://example.org/alice/foaf.rdf#me"), statement.getSubject() ); - } else { - Assert.assertTrue(statement.getSubject() instanceof BNode); - } - - if( statementIndex == 4) { - Assert.assertEquals(new URIImpl("http://example.org/#like"), statement.getPredicate() ); - } - - if(statementIndex == 5) { - Assert.assertNull(statement.getContext()); - } else { - Assert.assertEquals( - new URIImpl(String.format("http://example.org/alice/foaf%s.rdf", statementIndex + 1)), - statement.getContext() - ); - } - - super.handleStatement(statement); - } - } - -} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java ---------------------------------------------------------------------- diff --git a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java b/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java deleted file mode 100644 index 1fe92c6..0000000 --- a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.any23.io.nquads; - -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.openrdf.model.BNode; -import org.openrdf.model.Literal; -import org.openrdf.model.Resource; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ValueFactoryImpl; -import org.openrdf.rio.RDFHandlerException; -import org.openrdf.rio.RDFParseException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -/** - * Test case for {@link NQuadsWriter}. - * - * @author Michele Mostarda ([email protected]) - */ -public class NQuadsWriterTest { - - private static final Logger logger = LoggerFactory.getLogger(NQuadsWriterTest.class); - - private final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - - private NQuadsWriter writer; - - private ValueFactory vf; - - @Before - public void setUp() { - vf = ValueFactoryImpl.getInstance(); - writer = new NQuadsWriter(baos); - } - - @After - public void tearDown() { - logger.debug( "\n" + baos.toString() ); - baos.reset(); - writer = null; - } - - @Test - public void testWrite() throws RDFHandlerException { - Statement s1 = quad( - uri("http://sub"), - uri("http://pre"), - uri("http://obj"), - uri("http://gra1") - ); - Statement s2 = quad( - bnode("1"), - uri("http://pre"), - bnode("2"), - uri("http://gra2") - ); - Statement s3 = quad( - bnode("3"), - uri("http://pre"), - literal("Sample text 1"), - uri("http://gra2") - ); - Statement s4 = quad( - bnode("4"), - uri("http://pre"), - literal("Sample text 2", "en"), - uri("http://gra2") - ); - Statement s5 = quad( - bnode("5"), - uri("http://pre"), - literal("12345", uri("http://www.w3.org/2001/XMLSchema#integer")), - uri("http://gra2") - ); -// Statement s6 = quad( -// uri("p1:sub"), -// uri("p1:pre"), -// uri("p1:obj"), -// uri("p1:gra2") -// ); - Statement s7 = quad( - uri("http://sub"), - uri("http://pre"), - literal("This is line 1.\nThis is line 2.\n"), - uri("http://gra3") - ); - - // Sending events. - writer.startRDF(); - //writer.handleNamespace("p1", "http://test.com/"); - writer.handleStatement(s1); - writer.handleStatement(s2); - writer.handleStatement(s3); - writer.handleStatement(s4); - writer.handleStatement(s5); - //writer.handleStatement(s6); - writer.handleStatement(s7); - writer.endRDF(); - - // Checking content. - String content = baos.toString(); - logger.info("output={}", content); - String[] lines = content.split("\n"); - Assert.assertEquals("Unexpected number of lines.", 6, lines.length); - Assert.assertTrue( lines[0].matches("<.*> <.*> <.*> <.*> \\.") ); - Assert.assertTrue( lines[1].matches("_:.* <.*> _:.* <.*> \\.") ); - Assert.assertTrue( lines[2].matches("_:.* <.*> \".*\" <.*> \\.") ); - Assert.assertTrue( lines[3].matches("_:.* <.*> \".*\"@en <.*> \\.") ); - Assert.assertTrue( lines[4].matches("_:.* <.*> \".*\"\\^\\^<.*> <.*> \\.") ); - //Assert.assertTrue( lines[5].matches("<http://.*> <http://.*> <http://.*> <http://.*> \\.") ); - Assert.assertEquals( - "<http://sub> <http://pre> \"This is line 1.\\nThis is line 2.\\n\" <http://gra3> .", - lines[5] - ); - } - - @Test - public void testReadWrite() throws RDFHandlerException, IOException, RDFParseException { - NQuadsParser parser = new NQuadsParser(); - parser.setRDFHandler(writer); - parser.parse( - this.getClass().getClassLoader().getResourceAsStream("application/nquads/test2.nq"), - "http://test.base.uri" - ); - - Assert.assertEquals("Unexpected number of lines.", 400, baos.toString().split("\n").length); - } - - private Statement quad(Resource subject, URI predicate, Value object, Resource context) { - return this.vf.createStatement(subject, predicate, object, context); - } - - private URI uri(String uri) { - return this.vf.createURI(uri); - } - - private BNode bnode(String testID) { - return this.vf.createBNode(testID); - } - - private Literal literal(String literalValue) { - return this.vf.createLiteral(literalValue); - } - - private Literal literal(String literalValue, URI datatype) { - return this.vf.createLiteral(literalValue, datatype); - } - - private Literal literal(String literalValue, String language) { - return this.vf.createLiteral(literalValue, language); - } -} http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/nquads/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/nquads/src/test/resources/log4j.properties b/nquads/src/test/resources/log4j.properties deleted file mode 100644 index 84062ba..0000000 --- a/nquads/src/test/resources/log4j.properties +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -log4j.rootCategory=INFO, O - -# Stdout -log4j.appender.O=org.apache.log4j.ConsoleAppender - -# File -#log4j.appender.R=org.apache.log4j.RollingFileAppender -#log4j.appender.R.File=log4j.log - -# Control the maximum log file size -#log4j.appender.R.MaxFileSize=100KB - -# Archive log files (one backup file here) -log4j.appender.R.MaxBackupIndex=1 - -log4j.appender.R.layout=org.apache.log4j.PatternLayout -log4j.appender.O.layout=org.apache.log4j.PatternLayout - -log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n -log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/basic-crawler/pom.xml ---------------------------------------------------------------------- diff --git a/plugins/basic-crawler/pom.xml b/plugins/basic-crawler/pom.xml index 6d63627..bffd7e2 100644 --- a/plugins/basic-crawler/pom.xml +++ b/plugins/basic-crawler/pom.xml @@ -21,7 +21,7 @@ <parent> <groupId>org.apache.any23</groupId> <artifactId>apache-any23</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <relativePath>../../pom.xml</relativePath> </parent> @@ -35,8 +35,8 @@ <dependencies> <!-- Sesame. --> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-model</artifactId> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-model</artifactId> <scope>provided</scope> </dependency> @@ -44,14 +44,14 @@ <dependency> <groupId>org.apache.any23</groupId> <artifactId>apache-any23-core</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.any23</groupId> <artifactId>apache-any23-core</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <type>test-jar</type> <scope>test</scope> </dependency> http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java ---------------------------------------------------------------------- diff --git a/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java b/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java index 1f84069..66b167b 100644 --- a/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java +++ b/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java @@ -78,10 +78,10 @@ public class Crawler extends Rover { public void run() throws Exception { super.configure(); - if (inputURIs.size() != 1) { + if (inputIRIs.size() != 1) { throw new IllegalArgumentException("Expected just one seed."); } - final URL seed = new URL(inputURIs.get( 0 )); + final URL seed = new URL(inputIRIs.get( 0 )); if ( storageFolder.isFile() ) { throw new IllegalStateException( format( "Storage folder %s can not be a file, must be a directory", http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java ---------------------------------------------------------------------- diff --git a/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java b/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java index 2439336..eac75f3 100644 --- a/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java +++ b/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java @@ -22,10 +22,10 @@ import org.apache.any23.rdf.RDFUtils; import org.apache.any23.util.FileUtils; import org.junit.Ignore; import org.junit.Test; -import org.openrdf.model.Statement; -import org.openrdf.rio.RDFFormat; -import org.openrdf.rio.RDFHandlerException; -import org.openrdf.rio.RDFParseException; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFHandlerException; +import org.eclipse.rdf4j.rio.RDFParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/pom.xml ---------------------------------------------------------------------- diff --git a/plugins/html-scraper/pom.xml b/plugins/html-scraper/pom.xml index 04340cf..e04651a 100644 --- a/plugins/html-scraper/pom.xml +++ b/plugins/html-scraper/pom.xml @@ -21,7 +21,7 @@ <parent> <groupId>org.apache.any23</groupId> <artifactId>apache-any23</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <relativePath>../../pom.xml</relativePath> </parent> @@ -35,8 +35,8 @@ <dependencies> <!-- Sesame. --> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-model</artifactId> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-model</artifactId> <scope>provided</scope> </dependency> @@ -44,7 +44,7 @@ <dependency> <groupId>org.apache.any23</groupId> <artifactId>apache-any23-core</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <scope>provided</scope> </dependency> http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java ---------------------------------------------------------------------- diff --git a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java index 0605f62..ab7d34a 100644 --- a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java +++ b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java @@ -30,8 +30,8 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.Extractor; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.vocab.SINDICE; -import org.openrdf.model.URI; -import org.openrdf.model.impl.ValueFactoryImpl; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import java.io.IOException; import java.io.InputStream; @@ -46,14 +46,14 @@ import java.util.List; */ public class HTMLScraperExtractor implements Extractor.ContentExtractor { - public final static URI PAGE_CONTENT_DE_PROPERTY = - ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/de"); - public final static URI PAGE_CONTENT_AE_PROPERTY = - ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/ae"); - public final static URI PAGE_CONTENT_LCE_PROPERTY = - ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/lce"); - public final static URI PAGE_CONTENT_CE_PROPERTY = - ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/ce"); + public final static IRI PAGE_CONTENT_DE_PROPERTY = + SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/de"); + public final static IRI PAGE_CONTENT_AE_PROPERTY = + SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ae"); + public final static IRI PAGE_CONTENT_LCE_PROPERTY = + SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/lce"); + public final static IRI PAGE_CONTENT_CE_PROPERTY = + SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ce"); private final List<ExtractionRule> extractionRules = new ArrayList<ExtractionRule>(); @@ -61,7 +61,7 @@ public class HTMLScraperExtractor implements Extractor.ContentExtractor { loadDefaultRules(); } - public void addTextExtractor(String name, URI property, BoilerpipeExtractor extractor) { + public void addTextExtractor(String name, IRI property, BoilerpipeExtractor extractor) { extractionRules.add( new ExtractionRule(name, property, extractor) ); } @@ -81,13 +81,13 @@ public class HTMLScraperExtractor implements Extractor.ContentExtractor { ExtractionResult extractionResult ) throws IOException, ExtractionException { try { - final URI documentURI = extractionContext.getDocumentURI(); + final IRI documentIRI = extractionContext.getDocumentIRI(); for (ExtractionRule extractionRule : extractionRules) { final String content = extractionRule.boilerpipeExtractor.getText(new InputStreamReader(inputStream)); extractionResult.writeTriple( - documentURI, + documentIRI, extractionRule.property, - ValueFactoryImpl.getInstance().createLiteral(content) + SimpleValueFactory.getInstance().createLiteral(content) ); } } catch (BoilerpipeProcessingException bpe) { @@ -118,10 +118,10 @@ public class HTMLScraperExtractor implements Extractor.ContentExtractor { class ExtractionRule { public final String name; - public final URI property; + public final IRI property; public final BoilerpipeExtractor boilerpipeExtractor; - ExtractionRule(String name, URI property, BoilerpipeExtractor boilerpipeExtractor) { + ExtractionRule(String name, IRI property, BoilerpipeExtractor boilerpipeExtractor) { if(name == null) { throw new NullPointerException("name cannot be null."); } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java ---------------------------------------------------------------------- diff --git a/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java b/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java index 1420362..7465ade 100644 --- a/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java +++ b/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java @@ -26,9 +26,9 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Matchers; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.impl.ValueFactoryImpl; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import java.io.IOException; import java.io.InputStream; @@ -68,24 +68,24 @@ public class HTMLScraperExtractorTest { public void testRun() throws IOException, ExtractionException { final InputStream is = this.getClass().getResourceAsStream("html-scraper-extractor-test.html"); final ExtractionResult extractionResult = mock(ExtractionResult.class); - final URI pageURI = ValueFactoryImpl.getInstance().createURI("http://fake/test/page/testrun"); + final IRI pageIRI = SimpleValueFactory.getInstance().createIRI("http://fake/test/page/testrun"); final ExtractionContext extractionContext = new ExtractionContext( extractor.getDescription().getExtractorName(), - pageURI + pageIRI ); extractor.run(ExtractionParameters.newDefault(), extractionContext, is, extractionResult); verify(extractionResult).writeTriple( - eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_DE_PROPERTY) , (Value) Matchers.anyObject()) + eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_DE_PROPERTY) , (Value) Matchers.anyObject()) ; verify(extractionResult).writeTriple( - eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_AE_PROPERTY) , (Value) Matchers.anyObject()) + eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_AE_PROPERTY) , (Value) Matchers.anyObject()) ; verify(extractionResult).writeTriple( - eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_LCE_PROPERTY) , (Value) Matchers.anyObject()) + eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_LCE_PROPERTY) , (Value) Matchers.anyObject()) ; verify(extractionResult).writeTriple( - eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_CE_PROPERTY) , (Value) Matchers.anyObject()) + eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_CE_PROPERTY) , (Value) Matchers.anyObject()) ; } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html ---------------------------------------------------------------------- diff --git a/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html b/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html index 1e416ac..6ec92fb 100644 --- a/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html +++ b/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html @@ -138,15 +138,15 @@ </div> <div class="search-types" id="search-type-term"> <form action="/search" method="get"> - <input onfocus="if (this.value=='Type one or more keywords or URI') {this.value=''} else {this.select()}; return true;" - type="text" name="q" size="45" value="Type one or more keywords or URI"/> + <input onfocus="if (this.value=='Type one or more keywords or IRI') {this.value=''} else {this.select()}; return true;" + type="text" name="q" size="45" value="Type one or more keywords or IRI"/> <button type="submit" class="inspectButton"> SEARCH </button> </form> <span class="tip">Examples: <a href="/search?q=tim%20berners%20lee">tim berners lee</a> - (by <a href="/search?q=http%3A%2F%2Fwww.w3.org%2FPeople%2FBerners-Lee%2Fcard">URI</a>), + (by <a href="/search?q=http%3A%2F%2Fwww.w3.org%2FPeople%2FBerners-Lee%2Fcard">IRI</a>), <a href="/search?q=michele">michele</a>, <a href="/search?q=deri">deri</a></span> <div style="height:1em;"> http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/integration-test/pom.xml ---------------------------------------------------------------------- diff --git a/plugins/integration-test/pom.xml b/plugins/integration-test/pom.xml index d80f5fc..c1d0723 100644 --- a/plugins/integration-test/pom.xml +++ b/plugins/integration-test/pom.xml @@ -21,7 +21,7 @@ <parent> <groupId>org.apache.any23</groupId> <artifactId>apache-any23</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <relativePath>../../pom.xml</relativePath> </parent> http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/pom.xml ---------------------------------------------------------------------- diff --git a/plugins/office-scraper/pom.xml b/plugins/office-scraper/pom.xml index 80cb238..db5e3ef 100644 --- a/plugins/office-scraper/pom.xml +++ b/plugins/office-scraper/pom.xml @@ -21,7 +21,7 @@ <parent> <groupId>org.apache.any23</groupId> <artifactId>apache-any23</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <relativePath>../../pom.xml</relativePath> </parent> @@ -35,8 +35,8 @@ <dependencies> <!-- Sesame. --> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-model</artifactId> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-model</artifactId> <scope>provided</scope> </dependency> @@ -44,7 +44,7 @@ <dependency> <groupId>org.apache.any23</groupId> <artifactId>apache-any23-core</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <scope>provided</scope> </dependency> http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java ---------------------------------------------------------------------- diff --git a/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java b/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java index c4e5284..4c8826c 100644 --- a/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java +++ b/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java @@ -31,8 +31,8 @@ import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; -import org.openrdf.model.URI; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.vocabulary.RDF; import java.io.IOException; import java.io.InputStream; @@ -76,90 +76,88 @@ public class ExcelExtractor implements Extractor.ContentExtractor { ExtractionResult er ) throws IOException, ExtractionException { try { - final URI documentURI = context.getDocumentURI(); - final Workbook workbook = createWorkbook(documentURI, in); - processWorkbook(documentURI, workbook, er); + final IRI documentIRI = context.getDocumentIRI(); + final Workbook workbook = createWorkbook(documentIRI, in); + processWorkbook(documentIRI, workbook, er); } catch (Exception e) { throw new ExtractionException("An error occurred while extracting MS Excel content.", e); } } // TODO: this should be done by Tika, the extractors should be split. - private Workbook createWorkbook(URI document, InputStream is) throws IOException { - final String documentURI = document.toString(); - if(documentURI.endsWith(".xlsx")) { + private Workbook createWorkbook(IRI document, InputStream is) throws IOException { + final String documentIRI = document.toString(); + if(documentIRI.endsWith(".xlsx")) { return new XSSFWorkbook(is); - } else if(documentURI.endsWith("xls")) { + } else if(documentIRI.endsWith("xls")) { return new HSSFWorkbook(is); } else { - throw new IllegalArgumentException("Unsupported extension for resource [" + documentURI + "]"); + throw new IllegalArgumentException("Unsupported extension for resource [" + documentIRI + "]"); } } - private void processWorkbook(URI documentURI, Workbook wb, ExtractionResult er) { + private void processWorkbook(IRI documentIRI, Workbook wb, ExtractionResult er) { for (int sheetIndex = 0; sheetIndex < wb.getNumberOfSheets(); sheetIndex++) { final Sheet sheet = wb.getSheetAt(sheetIndex); - final URI sheetURI = getSheetURI(documentURI, sheet); - er.writeTriple(documentURI, excel.containsSheet, sheetURI); - er.writeTriple(sheetURI, RDF.TYPE, excel.sheet); - writeSheetMetadata(sheetURI, sheet, er); + final IRI sheetIRI = getSheetIRI(documentIRI, sheet); + er.writeTriple(documentIRI, excel.containsSheet, sheetIRI); + er.writeTriple(sheetIRI, RDF.TYPE, excel.sheet); + writeSheetMetadata(sheetIRI, sheet, er); for (Row row : sheet) { - final URI rowURI = getRowURI(sheetURI, row); - er.writeTriple(sheetURI, excel.containsRow, rowURI); - er.writeTriple(rowURI, RDF.TYPE, excel.row); - writeRowMetadata(rowURI, row, er); + final IRI rowIRI = getRowIRI(sheetIRI, row); + er.writeTriple(sheetIRI, excel.containsRow, rowIRI); + er.writeTriple(rowIRI, RDF.TYPE, excel.row); + writeRowMetadata(rowIRI, row, er); for (Cell cell : row) { - writeCell(rowURI, cell, er); + writeCell(rowIRI, cell, er); } } } } - private void writeSheetMetadata(URI sheetURI, Sheet sheet, ExtractionResult er) { + private void writeSheetMetadata(IRI sheetIRI, Sheet sheet, ExtractionResult er) { final String sheetName = sheet.getSheetName(); final int firstRowNum = sheet.getFirstRowNum(); final int lastRowNum = sheet.getLastRowNum(); - er.writeTriple(sheetURI, excel.sheetName, RDFUtils.literal(sheetName)); - er.writeTriple(sheetURI, excel.firstRow, RDFUtils.literal(firstRowNum)); - er.writeTriple(sheetURI, excel.lastRow , RDFUtils.literal(lastRowNum )); + er.writeTriple(sheetIRI, excel.sheetName, RDFUtils.literal(sheetName)); + er.writeTriple(sheetIRI, excel.firstRow, RDFUtils.literal(firstRowNum)); + er.writeTriple(sheetIRI, excel.lastRow , RDFUtils.literal(lastRowNum )); } - private void writeRowMetadata(URI rowURI, Row row, ExtractionResult er) { + private void writeRowMetadata(IRI rowIRI, Row row, ExtractionResult er) { final int firstCellNum = row.getFirstCellNum(); final int lastCellNum = row.getLastCellNum(); - er.writeTriple(rowURI, excel.firstCell , RDFUtils.literal(firstCellNum)); - er.writeTriple(rowURI, excel.lastCell , RDFUtils.literal(lastCellNum )); + er.writeTriple(rowIRI, excel.firstCell , RDFUtils.literal(firstCellNum)); + er.writeTriple(rowIRI, excel.lastCell , RDFUtils.literal(lastCellNum )); } - private void writeCell(URI rowURI, Cell cell, ExtractionResult er) { - final URI cellType = cellTypeToType(cell.getCellType()); + private void writeCell(IRI rowIRI, Cell cell, ExtractionResult er) { + final IRI cellType = cellTypeToType(cell.getCellType()); if(cellType == null) return; // Skip unsupported cells. - final URI cellURI = getCellURI(rowURI, cell); - er.writeTriple(rowURI, excel.containsCell, cellURI); - er.writeTriple(cellURI, RDF.TYPE, excel.cell); + final IRI cellIRI = getCellIRI(rowIRI, cell); + er.writeTriple(rowIRI, excel.containsCell, cellIRI); + er.writeTriple(cellIRI, RDF.TYPE, excel.cell); er.writeTriple( - cellURI, + cellIRI, excel.cellValue, RDFUtils.literal(cell.getStringCellValue(), cellType) ); } - private URI getSheetURI(URI documentURI, Sheet sheet) { - return RDFUtils.uri( documentURI.toString() + "/sheet/" + sheet.getSheetName() ); + private IRI getSheetIRI(IRI documentIRI, Sheet sheet) { + return RDFUtils.iri(documentIRI.toString() + "/sheet/" + sheet.getSheetName()); } - private URI getRowURI(URI sheetURI, Row row) { - return RDFUtils.uri( sheetURI.toString() + "/" + row.getRowNum() ); + private IRI getRowIRI(IRI sheetIRI, Row row) { + return RDFUtils.iri(sheetIRI.toString() + "/" + row.getRowNum()); } - private URI getCellURI(URI rowURI, Cell cell) { - return RDFUtils.uri( - rowURI + - String.format("/%d/", cell.getColumnIndex()) - ); + private IRI getCellIRI(IRI rowIRI, Cell cell) { + return RDFUtils.iri(rowIRI + + String.format("/%d/", cell.getColumnIndex())); } - private URI cellTypeToType(int cellType) { + private IRI cellTypeToType(int cellType) { final String postfix; switch (cellType) { case Cell.CELL_TYPE_STRING: @@ -174,7 +172,7 @@ public class ExcelExtractor implements Extractor.ContentExtractor { default: postfix = null; } - return postfix == null ? null : RDFUtils.uri(excel.getNamespace().toString() + postfix); + return postfix == null ? null : RDFUtils.iri(excel.getNamespace().toString() + postfix); } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java ---------------------------------------------------------------------- diff --git a/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java b/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java index de21ff8..64ca060 100644 --- a/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java +++ b/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java @@ -17,7 +17,7 @@ package org.apache.any23.vocab; -import org.openrdf.model.URI; +import org.eclipse.rdf4j.model.IRI; /** * The <i>MS Excel</i> extractor vocabulary. @@ -46,63 +46,63 @@ public class Excel extends Vocabulary { /** * This property links the identifier of a <i>document</i> to the identifier of a <i>sheet</i>. */ - public final URI containsSheet = createProperty(CONTAINS_SHEET); + public final IRI containsSheet = createProperty(CONTAINS_SHEET); /** * This property links the identifier of a <i>sheet</i> to the identifier of a <i>row</i>. */ - public final URI containsRow = createProperty(CONTAINS_ROW); + public final IRI containsRow = createProperty(CONTAINS_ROW); /** * This property links the identifier of a <i>row</i> to the identifier of a <i>cell</i>. */ - public final URI containsCell = createProperty(CONTAINS_CELL); + public final IRI containsCell = createProperty(CONTAINS_CELL); /** * This property links the identifier of a <i>Sheet</i> to the name of the sheet. */ - public final URI sheetName = createProperty(SHEET_NAME); + public final IRI sheetName = createProperty(SHEET_NAME); /** * This property links the identifier of a <i>Sheet</i> to the index of the first declared row. */ - public final URI firstRow = createProperty(FIRST_ROW); + public final IRI firstRow = createProperty(FIRST_ROW); /** * This property links the identifier of a <i>Sheet</i> to the index of the last declared row. */ - public final URI lastRow = createProperty(LAST_ROW); + public final IRI lastRow = createProperty(LAST_ROW); /** * This property links the identifier of a <i>Row</i> to the index of the first declared cell. */ - public final URI firstCell = createProperty(FIRST_CELL); + public final IRI firstCell = createProperty(FIRST_CELL); /** * This property links the identifier of a <i>Row</i> to the index of the last declared cell. */ - public final URI lastCell = createProperty(LAST_CELL); + public final IRI lastCell = createProperty(LAST_CELL); /** * This property links the identifier of a <i>cell</i> to the content of the cell. */ - public final URI cellValue = createProperty(CELL_VALUE); + public final IRI cellValue = createProperty(CELL_VALUE); /** * This resource identifies a <i>Sheet</i>. */ - public final URI sheet = createResource(SHEET); + public final IRI sheet = createResource(SHEET); /** * This resource identifies a <i>row</i>. */ - public final URI row = createResource(ROW); + public final IRI row = createResource(ROW); /** * This resource identifies a <i>cell</i>. */ - public final URI cell = createResource(CELL); + public final IRI cell = createResource(CELL); /** * The namespace of the vocabulary as a string. @@ -118,16 +118,16 @@ public class Excel extends Vocabulary { return instance; } - public URI createResource(String localName) { + public IRI createResource(String localName) { return createProperty(NS, localName); } /** * * @param localName - * @return the new URI instance. + * @return the new IRI instance. */ - public URI createProperty(String localName) { + public IRI createProperty(String localName) { return createProperty(NS, localName); } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java ---------------------------------------------------------------------- diff --git a/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java b/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java index 3650790..f970889 100644 --- a/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java +++ b/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java @@ -33,10 +33,10 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; -import org.openrdf.model.Resource; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,7 +81,7 @@ public class ExcelExtractorTest { final ExtractionParameters extractionParameters = ExtractionParameters.newDefault(); final ExtractionContext extractionContext = new ExtractionContext( extractor.getDescription().getExtractorName(), - RDFUtils.uri("file://" + resource) + RDFUtils.iri("file://" + resource) ); final InputStream is = this.getClass().getResourceAsStream(resource); final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler(); @@ -107,24 +107,24 @@ public class ExcelExtractorTest { verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().cell , 18); } - private void verifyPredicateOccurrence(TripleHandler mock, URI predicate, int occurrence) + private void verifyPredicateOccurrence(TripleHandler mock, IRI predicate, int occurrence) throws TripleHandlerException { Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple( Mockito.<Resource>anyObject(), Mockito.eq(predicate), Mockito.<Value>anyObject(), - Mockito.<URI>any(), + Mockito.<IRI>any(), Mockito.<ExtractionContext>anyObject() ); } - private void verifyTypeOccurrence(TripleHandler mock, URI type, int occurrence) + private void verifyTypeOccurrence(TripleHandler mock, IRI type, int occurrence) throws TripleHandlerException { Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple( Mockito.<Resource>anyObject(), Mockito.eq(RDF.TYPE), Mockito.eq(type), - Mockito.<URI>any(), + Mockito.<IRI>any(), Mockito.<ExtractionContext>anyObject() ); } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index e761507..0a8c69b 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ <groupId>org.apache.any23</groupId> <artifactId>apache-any23</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <packaging>pom</packaging> <name>Apache Any23</name> @@ -199,7 +199,6 @@ <modules> <module>api</module> <module>test-resources</module> - <module>nquads</module> <module>csvutils</module> <module>mime</module> <module>encoding</module> @@ -236,9 +235,9 @@ <maven.build.timestamp.format>yyyy-MM-dd HH:mm:ssZ</maven.build.timestamp.format> <implementation.build>${scmBranch}@r${buildNumber}</implementation.build> <implementation.build.tstamp>${maven.build.timestamp}</implementation.build.tstamp> - <slf4j.logger.version>1.7.5</slf4j.logger.version> - <sesame.version>2.7.14</sesame.version> - <semargl.version>0.6.1</semargl.version> + <slf4j.logger.version>1.7.21</slf4j.logger.version> + <rdf4j.version>2.1.3</rdf4j.version> + <semargl.version>0.7</semargl.version> <latest.stable.released>1.1</latest.stable.released> <form.tracker.id>UA-59636188-1</form.tracker.id> @@ -343,77 +342,77 @@ <!-- BEGIN: Sesame --> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-model</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-model</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-api</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-api</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-turtle</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-turtle</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-rdfxml</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-rdfxml</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-ntriples</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-ntriples</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-nquads</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-nquads</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-n3</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-n3</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-trix</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-trix</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-rio-rdfjson</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-rdfjson</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-repository-sail</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-repository-sail</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-sail-memory</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-sail-memory</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> - <groupId>org.openrdf.sesame</groupId> - <artifactId>sesame-repository-api</artifactId> - <version>${sesame.version}</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-repository-api</artifactId> + <version>${rdf4j.version}</version> </dependency> <dependency> <groupId>org.semarglproject</groupId> - <artifactId>semargl-sesame</artifactId> + <artifactId>semargl-rdf4j</artifactId> <version>${semargl.version}</version> </dependency> <!-- END: Sesame --> <!-- BEGIN: Misc --> <dependency> - <groupId>com.github.jsonld-java</groupId> - <artifactId>jsonld-java-sesame</artifactId> - <version>0.5.0</version> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-rio-jsonld</artifactId> + <version>${rdf4j.version}</version> </dependency> <!-- END: Misc --> @@ -493,7 +492,7 @@ <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> - <version>4.11</version> + <version>4.12</version> <scope>test</scope> </dependency> <dependency> http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/service/pom.xml ---------------------------------------------------------------------- diff --git a/service/pom.xml b/service/pom.xml index 4276412..3264290 100644 --- a/service/pom.xml +++ b/service/pom.xml @@ -21,7 +21,7 @@ <parent> <groupId>org.apache.any23</groupId> <artifactId>apache-any23</artifactId> - <version>1.2-SNAPSHOT</version> + <version>2.0-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> @@ -43,11 +43,6 @@ <artifactId>apache-any23-core</artifactId> <version>${project.version}</version> </dependency> - <dependency> - <groupId>org.apache.any23</groupId> - <artifactId>apache-any23-nquads</artifactId> - <version>${project.version}</version> - </dependency> <!-- Logging --> <dependency> http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/service/src/main/java/org/apache/any23/servlet/Servlet.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java index 1c13c3c..b60ad5f 100644 --- a/service/src/main/java/org/apache/any23/servlet/Servlet.java +++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java @@ -27,7 +27,7 @@ import org.apache.any23.source.DocumentSource; import org.apache.any23.source.HTTPDocumentSource; import org.apache.any23.source.StringDocumentSource; import org.apache.commons.httpclient.URI; -import org.openrdf.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +42,7 @@ import java.util.regex.Pattern; import static org.apache.any23.extractor.ExtractionParameters.ValidationMode; /** - * A <i>Servlet</i> that fetches a client-specified <i>URI</i>, + * A <i>Servlet</i> that fetches a client-specified <i>IRI</i>, * RDFizes the content, and returns it in a format chosen by the client. * * @author Gabriele Renzi @@ -52,7 +52,7 @@ public class Servlet extends HttpServlet { private static final Logger LOG = LoggerFactory.getLogger(Servlet.class); - public static final String DEFAULT_BASE_URI = "http://any23.org/tmp/"; + public static final String DEFAULT_BASE_IRI = "http://any23.org/tmp/"; private static final long serialVersionUID = 8207685628715421336L; @@ -70,9 +70,9 @@ public class Servlet extends HttpServlet { responder.sendError(406, "Client accept header does not include a supported output format", report); return; } - final String uri = getInputURIFromRequest(req); + final String uri = getInputIRIFromRequest(req); if (uri == null) { - responder.sendError(404, "Missing URI in GET request. Try /format/http://example.com/myfile", report); + responder.sendError(404, "Missing IRI in GET request. Try /format/http://example.com/myfile", report); return; } final ExtractionParameters eps = getExtractionParameters(req); @@ -88,7 +88,7 @@ public class Servlet extends HttpServlet { responder.sendError(400, "Invalid POST request, no Content-Type for the message body specified", report); return; } - final String uri = getInputURIFromRequest(req); + final String uri = getInputIRIFromRequest(req); final String format = getFormatFromRequestOrNegotiation(req); if (format == null) { responder.sendError(406, "Client accept header does not include a supported output format", report); @@ -97,7 +97,7 @@ public class Servlet extends HttpServlet { final ExtractionParameters eps = getExtractionParameters(req); if ("application/x-www-form-urlencoded".equals(getContentTypeHeader(req))) { if (uri != null) { - log("Attempting conversion to '" + format + "' from URI <" + uri + ">"); + log("Attempting conversion to '" + format + "' from IRI <" + uri + ">"); responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate); return; } @@ -111,7 +111,7 @@ public class Servlet extends HttpServlet { } log("Attempting conversion to '" + format + "' from body parameter"); responder.runExtraction( - new StringDocumentSource(req.getParameter("body"), Servlet.DEFAULT_BASE_URI, type), + new StringDocumentSource(req.getParameter("body"), Servlet.DEFAULT_BASE_IRI, type), eps, format, report, annotate @@ -122,7 +122,7 @@ public class Servlet extends HttpServlet { responder.runExtraction( new ByteArrayDocumentSource( req.getInputStream(), - Servlet.DEFAULT_BASE_URI, + Servlet.DEFAULT_BASE_IRI, getContentTypeHeader(req) ), eps, @@ -169,7 +169,7 @@ public class Servlet extends HttpServlet { return args[1]; } - private String getInputURIFromRequest(HttpServletRequest request) { + private String getInputIRIFromRequest(HttpServletRequest request) { if (request.getPathInfo() == null) return null; String[] args = request.getPathInfo().split("/", 3); if (args.length < 3) { @@ -222,13 +222,13 @@ public class Servlet extends HttpServlet { private DocumentSource createHTTPDocumentSource(WebResponder responder, String uri, boolean report) throws IOException { try { - if (!isValidURI(uri)) { + if (!isValidIRI(uri)) { throw new URISyntaxException(uri, "@@@"); } return createHTTPDocumentSource(responder.getRunner().getHTTPClient(), uri); } catch (URISyntaxException ex) { - LOG.error("Invalid URI detected", ex); - responder.sendError(400, "Invalid input URI " + uri, report); + LOG.error("Invalid IRI detected", ex); + responder.sendError(400, "Invalid input IRI " + uri, report); return null; } } @@ -238,7 +238,7 @@ public class Servlet extends HttpServlet { return new HTTPDocumentSource(httpClient, uri); } - private boolean isValidURI(String s) { + private boolean isValidIRI(String s) { try { URI uri = new URI(s, false); if (!"http".equals(uri.getScheme()) && !"https".equals(uri.getScheme())) { http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/service/src/main/resources/form.html ---------------------------------------------------------------------- diff --git a/service/src/main/resources/form.html b/service/src/main/resources/form.html index 8a53d2c..e11a019 100644 --- a/service/src/main/resources/form.html +++ b/service/src/main/resources/form.html @@ -61,9 +61,9 @@ function showModal( id ) <p>Parses Microformats, RDFa, Microdata, RDF/XML, Turtle, N-Triples, JSON-LD and NQuads.</p> <p>Download and install Any23: visit the <a href="http://any23.apache.org/" target="_blank">Developers Site</a> and the <a href="http://any23.apache.org/getting-started.html" target="_blank">Documentation</a>. <hr /> - <h2>Convert document at URI</h2> + <h2>Convert document at IRI</h2> <form class="well form-horizontal" method="get" action="any23/"> - <label>Pick an output format and enter the URI of a web document:</label> + <label>Pick an output format and enter the IRI of a web document:</label> <div class="control-group"> <label class="control-label app-base-uri" for="format">http://.../</label> @@ -204,18 +204,18 @@ function showModal( id ) </ul> <h3>Compact API</h3> <p>HTTP GET requests can be made - to URIs of the shape</p> + to IRIs of the shape</p> <pre><span class="app-base-uri">http://.../</span><em>format</em>/<em>input-uri</em></pre> <p>The response is the input document converted to the desired output format.</p> <h3>Form-style GET API</h3> <p>HTTP GET requests can be made to - the URI + the IRI <code class="app-base-uri">http://.../</code> with the following query parameters: </p> <table class="table"> - <tr><th>uri</th><td>URI of an input document.</td></tr> + <tr><th>uri</th><td>IRI of an input document.</td></tr> <tr><th>format</th><td>Desired output format, defaults to <code>best</code>.</td></tr> <tr><th>validation-mode</th><td>The validation level to be applied on the input. Possible values:<br/> <code>none</code> (no validation applied);<br/> @@ -308,7 +308,7 @@ Content-Length: 174 <tbody> <tr><th>200 OK</th><td>Success</td></tr> <tr><th>400 Bad Request</th><td>Missing or malformed input parameter</td></tr> - <tr><th>404 Not Found</th><td>Malformed request URI</td></tr> + <tr><th>404 Not Found</th><td>Malformed request IRI</td></tr> <tr><th>406 Not Acceptable</th><td>None of the media types specified in the <code>Accept</code> header are supported</td></tr> <tr><th>415 Unsupported Media Type</th><td>Document body with unsupported media type was POSTed</td></tr> <tr><th>501 Not Implemented</th><td>Extraction from input was successful, but yielded zero triples</td></tr>
