http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStreamTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStreamTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStreamTest.java new file mode 100644 index 0000000..f453aa5 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStreamTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input.util; + +import java.io.InputStream; + +import org.apache.jena.hadoop.rdf.io.input.util.TrackableInputStream; +import org.apache.jena.hadoop.rdf.io.input.util.TrackedInputStream; + +/** + * Tests for the {@link TrackedInputStream} + * + * + * + */ +public class TrackedInputStreamTest extends AbstractTrackableInputStreamTests { + + @Override + protected TrackableInputStream getInstance(InputStream input) { + return new TrackedInputStream(input); + } + +}
http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormatTests.java new file mode 100644 index 0000000..b5ea2d8 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormatTests.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Iterator; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.task.JobContextImpl; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.lang.StreamRDFCounting; +import org.apache.jena.riot.system.StreamRDFLib; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Abstract node tuple output format tests + * + * + * @param <TValue> + * Tuple type + * @param <T> + * Writable tuple type + * + */ +public abstract class AbstractNodeTupleOutputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> { + + @SuppressWarnings("unused") + private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleOutputFormatTests.class); + + protected static final int EMPTY_SIZE = 0, SMALL_SIZE = 100, LARGE_SIZE = 10000, VERY_LARGE_SIZE = 100000; + + /** + * Temporary folder for the tests + */ + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + /** + * Prepares a fresh configuration + * + * @return Configuration + */ + protected Configuration prepareConfiguration() { + Configuration config = new Configuration(true); + // Nothing else to do + return config; + } + + /** + * Gets the extra file extension to add to the filenames + * + * @return File extension + */ + protected abstract String getFileExtension(); + + /** + * Generates tuples to be output for testing + * + * @param num + * Number of tuples to generate + * @return Iterator of tuples + */ + protected abstract Iterator<T> generateTuples(int num); + + /** + * Counts tuples in the output file + * + * @param f + * Output file + * @return Tuple count + */ + protected final long countTuples(File f) { + StreamRDFCounting counter = StreamRDFLib.count(); + RDFDataMgr.parse(counter, f.getAbsolutePath(), this.getRdfLanguage(), null); + return counter.count(); + } + + /** + * Checks that tuples are as expected + * + * @param f + * File + * @param expected + * Expected number of tuples + */ + protected void checkTuples(File f, long expected) { + Assert.assertEquals(expected, this.countTuples(f)); + } + + /** + * Gets the RDF language of the produced output which is used to parse back + * in the output to validate the correct amount of output was produced + * + * @return RDF language + */ + protected abstract Lang getRdfLanguage(); + + /** + * Gets the output format to test + * + * @return Output format + */ + protected abstract OutputFormat<NullWritable, T> getOutputFormat(); + + /** + * Adds an output path to the job configuration + * + * @param f + * File + * @param config + * Configuration + * @param job + * Job + * @throws IOException + */ + protected void addOutputPath(File f, Configuration config, Job job) throws IOException { + FileSystem fs = FileSystem.getLocal(config); + Path outputPath = fs.makeQualified(new Path(f.getAbsolutePath())); + FileOutputFormat.setOutputPath(job, outputPath); + } + + protected File findOutputFile(File dir, JobContext context) throws FileNotFoundException, IOException { + Path outputPath = FileOutputFormat.getOutputPath(context); + RemoteIterator<LocatedFileStatus> files = outputPath.getFileSystem(context.getConfiguration()).listFiles( + outputPath, true); + while (files.hasNext()) { + LocatedFileStatus status = files.next(); + if (status.isFile() && !status.getPath().getName().startsWith("_")) { + return new File(status.getPath().toUri()); + } + } + return null; + } + + /** + * Tests output + * + * @param f + * File to output to + * @param num + * Number of tuples to output + * @throws IOException + * @throws InterruptedException + */ + protected final void testOutput(File f, int num) throws IOException, InterruptedException { + // Prepare configuration + Configuration config = this.prepareConfiguration(); + + // Set up fake job + OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat(); + Job job = Job.getInstance(config); + job.setOutputFormatClass(outputFormat.getClass()); + this.addOutputPath(f, job.getConfiguration(), job); + JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); + Assert.assertNotNull(FileOutputFormat.getOutputPath(context)); + + // Output the data + TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1); + TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id); + RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext); + Iterator<T> tuples = this.generateTuples(num); + while (tuples.hasNext()) { + writer.write(NullWritable.get(), tuples.next()); + } + writer.close(taskContext); + + // Check output + File outputFile = this.findOutputFile(this.folder.getRoot(), context); + Assert.assertNotNull(outputFile); + this.checkTuples(outputFile, num); + } + + /** + * Basic output tests + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public void output_01() throws IOException, InterruptedException { + this.testOutput(this.folder.getRoot(), EMPTY_SIZE); + } + + /** + * Basic output tests + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public void output_02() throws IOException, InterruptedException { + this.testOutput(this.folder.getRoot(), SMALL_SIZE); + } + + /** + * Basic output tests + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public void output_03() throws IOException, InterruptedException { + this.testOutput(this.folder.getRoot(), LARGE_SIZE); + } + + /** + * Basic output tests + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public void output_04() throws IOException, InterruptedException { + this.testOutput(this.folder.getRoot(), VERY_LARGE_SIZE); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractQuadOutputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractQuadOutputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractQuadOutputFormatTests.java new file mode 100644 index 0000000..f1822f6 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractQuadOutputFormatTests.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Abstract tests for quad output formats + * + * + * + */ +public abstract class AbstractQuadOutputFormatTests extends AbstractNodeTupleOutputFormatTests<Quad, QuadWritable> { + + @Override + protected Iterator<QuadWritable> generateTuples(int num) { + List<QuadWritable> qs = new ArrayList<QuadWritable>(); + for (int i = 0; i < num; i++) { + Quad q = new Quad(NodeFactory.createURI("http://example.org/graphs/" + i), + NodeFactory.createURI("http://example.org/subjects/" + i), + NodeFactory.createURI("http://example.org/predicate"), NodeFactory.createLiteral(Integer.toString(i), + XSDDatatype.XSDinteger)); + qs.add(new QuadWritable(q)); + } + return qs.iterator(); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractTripleOutputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractTripleOutputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractTripleOutputFormatTests.java new file mode 100644 index 0000000..90eb531 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/AbstractTripleOutputFormatTests.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; + +/** + * Abstract tests for triple output formats + * + * + */ +public abstract class AbstractTripleOutputFormatTests extends AbstractNodeTupleOutputFormatTests<Triple, TripleWritable> { + + @Override + protected Iterator<TripleWritable> generateTuples(int num) { + List<TripleWritable> ts = new ArrayList<TripleWritable>(); + for (int i = 0; i < num; i++) { + Triple t = new Triple(NodeFactory.createURI("http://example.org/subjects/" + i), NodeFactory.createURI("http://example.org/predicate"), NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger)); + ts.add(new TripleWritable(t)); + } + return ts.iterator(); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdQuadOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdQuadOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdQuadOutputTest.java new file mode 100644 index 0000000..c6784a5 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdQuadOutputTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.jsonld; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; + +/** + * Tests for JSON-LD output format + */ +public class JsonLdQuadOutputTest extends AbstractQuadOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".jsonld"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.JSONLD; + } + + @Override + protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() { + return new JsonLDQuadOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdTripleOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdTripleOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdTripleOutputTest.java new file mode 100644 index 0000000..d157409 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLdTripleOutputTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.jsonld; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * Tests for JSON-LD output format + */ +public class JsonLdTripleOutputTest extends AbstractTripleOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".jsonld"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.JSONLD; + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new JsonLDTripleOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputTest.java new file mode 100644 index 0000000..1a7ffa4 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.nquads; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; + + +/** + * Tests for NQuads output format + * + * + * + */ +public class NQuadsOutputTest extends AbstractQuadOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".nq"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.NQUADS; + } + + @Override + protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() { + return new NQuadsOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputTest.java new file mode 100644 index 0000000..ad9be56 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.ntriples; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + + +/** + * Tests for NTriples output format + * + * + * + */ +public class NTriplesOutputTest extends AbstractTripleOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".nt"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.NTRIPLES; + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new NTriplesOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputTest.java new file mode 100644 index 0000000..833f89b --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.rdfjson; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + + +/** + * Tests for RDF/JSON output + * + * + * + */ +public class RdfJsonOutputTest extends AbstractTripleOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".rj"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.RDFJSON; + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new RdfJsonOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputTest.java new file mode 100644 index 0000000..40bc937 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.rdfxml; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + + +/** + * Tests for RDF/XML output + * + * + * + */ +public class RdfXmlOutputTest extends AbstractTripleOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".rdf"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.RDFXML; + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new RdfXmlOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputTest.java new file mode 100644 index 0000000..91509da --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.thrift; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFLanguages; + +/** + * Tests for Thrift output format + */ +public class ThriftQuadOutputTest extends AbstractQuadOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".trdf"; + } + + @Override + protected Lang getRdfLanguage() { + return RDFLanguages.THRIFT; + } + + @Override + protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() { + return new ThriftQuadOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputTest.java new file mode 100644 index 0000000..cf50330 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.thrift; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFLanguages; + +/** + * Tests for Thrift output format + */ +public class ThriftTripleOutputTest extends AbstractTripleOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".trdf"; + } + + @Override + protected Lang getRdfLanguage() { + return RDFLanguages.THRIFT; + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new ThriftTripleOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java new file mode 100644 index 0000000..fd886a3 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.trig; + +import java.util.Arrays; +import java.util.Collection; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + + +/** + * Tests for TriG output + * + * + * + */ +@RunWith(Parameterized.class) +public class BatchedTriGOutputTest extends AbstractQuadOutputFormatTests { + + static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE; + static long $bs2 = 1000; + static long $bs3 = 100; + static long $bs4 = 1; + + /** + * @return Test parameters + */ + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } }); + } + + private final long batchSize; + + /** + * Creates new tests + * + * @param batchSize + * Batch size + */ + public BatchedTriGOutputTest(long batchSize) { + this.batchSize = batchSize; + } + + @Override + protected String getFileExtension() { + return ".trig"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.TRIG; + } + + @Override + protected Configuration prepareConfiguration() { + Configuration config = super.prepareConfiguration(); + config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize); + return config; + } + + @Override + protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() { + return new BatchedTriGOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java new file mode 100644 index 0000000..9b2b669 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.trig; + +import java.util.Arrays; +import java.util.Collection; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + + +/** + * Tests for Turtle output + * + * + * + */ +@RunWith(Parameterized.class) +public class StreamedTriGOutputTest extends AbstractQuadOutputFormatTests { + + static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE; + static long $bs2 = 1000; + static long $bs3 = 100; + static long $bs4 = 1; + + /** + * @return Test parameters + */ + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } }); + } + + private final long batchSize; + + /** + * Creates new tests + * + * @param batchSize + * Batch size + */ + public StreamedTriGOutputTest(long batchSize) { + this.batchSize = batchSize; + } + + @Override + protected String getFileExtension() { + return ".trig"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.TRIG; + } + + @Override + protected Configuration prepareConfiguration() { + Configuration config = super.prepareConfiguration(); + config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize); + return config; + } + + @Override + protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() { + return new TriGOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java new file mode 100644 index 0000000..c9b3a26 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.trig; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.RDFDataMgr; +import org.junit.Assert; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ResIterator; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Tests for TriG output with blank nodes + * + * + * + */ +@RunWith(Parameterized.class) +public class TriGBlankNodeOutputTests extends StreamedTriGOutputTest { + + static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE; + static long $bs2 = 1000; + static long $bs3 = 100; + static long $bs4 = 1; + + /** + * @return Test parameters + */ + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, + { $bs4 } }); + } + + /** + * Creates new tests + * + * @param batchSize + * Batch size + */ + public TriGBlankNodeOutputTests(long batchSize) { + super(batchSize); + } + + @Override + protected Iterator<QuadWritable> generateTuples(int num) { + List<QuadWritable> qs = new ArrayList<QuadWritable>(); + Node subject = NodeFactory.createAnon(); + for (int i = 0; i < num; i++) { + Quad t = new Quad( + NodeFactory.createURI("http://example.org/graphs/" + i), + subject, + NodeFactory.createURI("http://example.org/predicate"), + NodeFactory.createLiteral(Integer.toString(i), + XSDDatatype.XSDinteger)); + qs.add(new QuadWritable(t)); + } + return qs.iterator(); + } + + @Override + protected void checkTuples(File f, long expected) { + super.checkTuples(f, expected); + + Model m = RDFDataMgr.loadModel("file://" + f.getAbsolutePath(), + this.getRdfLanguage()); + ResIterator iter = m.listSubjects(); + Set<Node> subjects = new HashSet<Node>(); + while (iter.hasNext()) { + Resource res = iter.next(); + Assert.assertTrue(res.isAnon()); + subjects.add(res.asNode()); + } + // Should only be one subject unless the data was empty in which case + // there will be zero subjects + Assert.assertEquals(expected == 0 ? 0 : 1, subjects.size()); + } + + @Override + protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() { + return new TriGOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java new file mode 100644 index 0000000..9b6e307 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.trix; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; + +/** + * Tests for TriX output format + */ +public class TriXOutputTest extends AbstractQuadOutputFormatTests { + + @Override + protected String getFileExtension() { + return ".trix"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.TRIX; + } + + @Override + protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() { + return new TriXOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java new file mode 100644 index 0000000..a6c4d70 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.turtle; + +import java.util.Arrays; +import java.util.Collection; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + + +/** + * Tests for Turtle output + * + * + * + */ +@RunWith(Parameterized.class) +public class BatchedTurtleOutputTest extends AbstractTripleOutputFormatTests { + + static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE; + static long $bs2 = 1000; + static long $bs3 = 100; + static long $bs4 = 1; + + /** + * @return Test parameters + */ + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } }); + } + + private final long batchSize; + + /** + * Creates new tests + * + * @param batchSize + * Batch size + */ + public BatchedTurtleOutputTest(long batchSize) { + this.batchSize = batchSize; + } + + @Override + protected String getFileExtension() { + return ".ttl"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.TURTLE; + } + + @Override + protected Configuration prepareConfiguration() { + Configuration config = super.prepareConfiguration(); + config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize); + return config; + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new BatchedTurtleOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java new file mode 100644 index 0000000..d8843d3 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.turtle; + +import java.util.Arrays; +import java.util.Collection; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + + +/** + * Tests for Turtle output + * + * + * + */ +@RunWith(Parameterized.class) +public class StreamedTurtleOutputTest extends AbstractTripleOutputFormatTests { + + static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE; + static long $bs2 = 1000; + static long $bs3 = 100; + static long $bs4 = 1; + + /** + * @return Test parameters + */ + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } }); + } + + private final long batchSize; + + /** + * Creates new tests + * + * @param batchSize + * Batch size + */ + public StreamedTurtleOutputTest(long batchSize) { + this.batchSize = batchSize; + } + + @Override + protected String getFileExtension() { + return ".ttl"; + } + + @Override + protected Lang getRdfLanguage() { + return Lang.TURTLE; + } + + @Override + protected Configuration prepareConfiguration() { + Configuration config = super.prepareConfiguration(); + config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize); + return config; + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new TurtleOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java new file mode 100644 index 0000000..8dcae4e --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.output.turtle; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.RDFDataMgr; +import org.junit.Assert; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ResIterator; +import com.hp.hpl.jena.rdf.model.Resource; + +/** + * Tests for Turtle output with blank nodes + * + * + * + */ +@RunWith(Parameterized.class) +public class TurtleBlankNodeOutputTests extends StreamedTurtleOutputTest { + + static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE; + static long $bs2 = 1000; + static long $bs3 = 100; + static long $bs4 = 1; + + /** + * @return Test parameters + */ + @Parameters + public static Collection<Object[]> data() { + return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, + { $bs4 } }); + } + + /** + * Creates new tests + * + * @param batchSize + * Batch size + */ + public TurtleBlankNodeOutputTests(long batchSize) { + super(batchSize); + } + + @Override + protected Iterator<TripleWritable> generateTuples(int num) { + List<TripleWritable> ts = new ArrayList<TripleWritable>(); + Node subject = NodeFactory.createAnon(); + for (int i = 0; i < num; i++) { + Triple t = new Triple(subject, + NodeFactory.createURI("http://example.org/predicate"), + NodeFactory.createLiteral(Integer.toString(i), + XSDDatatype.XSDinteger)); + ts.add(new TripleWritable(t)); + } + return ts.iterator(); + } + + @Override + protected void checkTuples(File f, long expected) { + super.checkTuples(f, expected); + + Model m = RDFDataMgr.loadModel("file://" + f.getAbsolutePath(), + this.getRdfLanguage()); + ResIterator iter = m.listSubjects(); + Set<Node> subjects = new HashSet<Node>(); + while (iter.hasNext()) { + Resource res = iter.next(); + Assert.assertTrue(res.isAnon()); + subjects.add(res.asNode()); + } + // Should only be one subject unless the data was empty in which case + // there will be zero subjects + Assert.assertEquals(expected == 0 ? 0 : 1, subjects.size()); + } + + @Override + protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() { + return new TurtleOutputFormat<NullWritable>(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java new file mode 100644 index 0000000..2eae232 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry; + +import java.io.IOException; +import java.io.StringWriter; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFLanguages; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests for the {@link HadoopRdfIORegistry} + */ +public class TestHadoopRdfIORegistry { + + private void testLang(Lang lang, boolean triples, boolean quads, boolean writesSupported) { + Assert.assertEquals(triples, HadoopRdfIORegistry.hasTriplesReader(lang)); + Assert.assertEquals(quads, HadoopRdfIORegistry.hasQuadReader(lang)); + + // Some formats may be asymmetric + if (writesSupported) { + Assert.assertEquals(triples, HadoopRdfIORegistry.hasTriplesWriter(lang)); + Assert.assertEquals(quads, HadoopRdfIORegistry.hasQuadWriter(lang)); + } else { + Assert.assertFalse(HadoopRdfIORegistry.hasTriplesWriter(lang)); + Assert.assertFalse(HadoopRdfIORegistry.hasQuadWriter(lang)); + } + + if (triples) { + // Check that triples are supported + RecordReader<LongWritable, TripleWritable> tripleReader; + try { + tripleReader = HadoopRdfIORegistry.createTripleReader(lang); + Assert.assertNotNull(tripleReader); + } catch (IOException e) { + Assert.fail("Registry indicates that " + lang.getName() + + " can read triples but fails to produce a triple reader when asked: " + e.getMessage()); + } + + if (writesSupported) { + RecordWriter<NullWritable, TripleWritable> tripleWriter; + try { + tripleWriter = HadoopRdfIORegistry.createTripleWriter(lang, new StringWriter(), new Configuration( + false)); + Assert.assertNotNull(tripleWriter); + } catch (IOException e) { + Assert.fail("Registry indicates that " + lang.getName() + + " can write triples but fails to produce a triple writer when asked: " + e.getMessage()); + } + } + } else { + // Check that triples are not supported + try { + HadoopRdfIORegistry.createTripleReader(lang); + Assert.fail("Registry indicates that " + lang.getName() + + " cannot read triples but produced a triple reader when asked (error was expected)"); + } catch (IOException e) { + // This is expected + } + try { + HadoopRdfIORegistry.createTripleWriter(lang, new StringWriter(), new Configuration(false)); + Assert.fail("Registry indicates that " + lang.getName() + + " cannot write triples but produced a triple write when asked (error was expected)"); + } catch (IOException e) { + // This is expected + } + } + + if (quads) { + // Check that quads are supported + RecordReader<LongWritable, QuadWritable> quadReader; + try { + quadReader = HadoopRdfIORegistry.createQuadReader(lang); + Assert.assertNotNull(quadReader); + } catch (IOException e) { + Assert.fail("Registry indicates that " + lang.getName() + + " can read quads but fails to produce a quad reader when asked: " + e.getMessage()); + } + + if (writesSupported) { + RecordWriter<NullWritable, QuadWritable> quadWriter; + try { + quadWriter = HadoopRdfIORegistry.createQuadWriter(lang, new StringWriter(), + new Configuration(false)); + Assert.assertNotNull(quadWriter); + } catch (IOException e) { + Assert.fail("Registry indicates that " + lang.getName() + + " can write quads but fails to produce a triple writer when asked: " + e.getMessage()); + } + } + } else { + try { + HadoopRdfIORegistry.createQuadReader(lang); + Assert.fail("Registry indicates that " + lang.getName() + + " cannot read quads but produced a quad reader when asked (error was expected)"); + } catch (IOException e) { + // This is expected + } + try { + HadoopRdfIORegistry.createQuadWriter(lang, new StringWriter(), new Configuration(false)); + Assert.fail("Registry indicates that " + lang.getName() + + " cannot write quads but produced a quad writer when asked (error was expected)"); + } catch (IOException e) { + // This is expected + } + } + } + + @Test + public void json_ld_registered() { + testLang(Lang.JSONLD, true, true, true); + } + + @Test + public void nquads_registered() { + testLang(Lang.NQUADS, false, true, true); + testLang(Lang.NQ, false, true, true); + } + + @Test + public void ntriples_registered() { + testLang(Lang.NTRIPLES, true, false, true); + testLang(Lang.NT, true, false, true); + } + + @Test + public void rdf_json_registered() { + testLang(Lang.RDFJSON, true, false, true); + } + + @Test + public void rdf_xml_registered() { + testLang(Lang.RDFXML, true, false, true); + } + + @Test + public void rdf_thrift_registered() { + testLang(RDFLanguages.THRIFT, true, true, true); + } + + @Test + public void trig_registered() { + testLang(Lang.TRIG, false, true, true); + } + + @Test + public void trix_registered() { + testLang(Lang.TRIX, false, true, true); + } + + @Test + public void turtle_registered() { + testLang(Lang.TURTLE, true, false, true); + testLang(Lang.TTL, true, false, true); + testLang(Lang.N3, true, false, true); + } + + @Test + public void unregistered() { + testLang(Lang.RDFNULL, false, false, true); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-stats/hadoop-job.xml ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-stats/hadoop-job.xml b/jena-hadoop-rdf/jena-elephas-stats/hadoop-job.xml new file mode 100644 index 0000000..de72645 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-stats/hadoop-job.xml @@ -0,0 +1,46 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<assembly> + <id>hadoop-job</id> + <formats> + <format>jar</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + <dependencySets> + <dependencySet> + <unpack>false</unpack> + <scope>runtime</scope> + <outputDirectory>lib</outputDirectory> + <excludes> + <exclude>${groupId}:${artifactId}</exclude> + </excludes> + </dependencySet> + <dependencySet> + <unpack>true</unpack> + <includes> + <include>${groupId}:${artifactId}</include> + </includes> + </dependencySet> + </dependencySets> + <fileSets> + <fileSet> + <directory>${basedir}/target/test-classes</directory> + <outputDirectory>/</outputDirectory> + </fileSet> + </fileSets> +</assembly> http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-stats/pom.xml ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-stats/pom.xml b/jena-hadoop-rdf/jena-elephas-stats/pom.xml new file mode 100644 index 0000000..899d612 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-stats/pom.xml @@ -0,0 +1,103 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.jena</groupId> + <artifactId>jena-elephas</artifactId> + <version>0.9.0-SNAPSHOT</version> + </parent> + <artifactId>jena-elephas-stats</artifactId> + <name>Apache Jena - RDF Tools for Hadoop - Statistics Demo App</name> + <description>A demo application that can be run on Hadoop to produce a statistical analysis on arbitrary RDF inputs</description> + + <dependencies> + <!-- Internal Project Dependencies --> + <dependency> + <groupId>org.apache.jena</groupId> + <artifactId>jena-hadoop-rdf-io</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.jena</groupId> + <artifactId>jena-hadoop-rdf-mapreduce</artifactId> + <version>${project.version}</version> + </dependency> + + <!-- CLI related Dependencies --> + <dependency> + <groupId>io.airlift</groupId> + <artifactId>airline</artifactId> + <version>0.6</version> + </dependency> + + <!-- Hadoop Dependencies --> + <!-- Note these will be provided on the Hadoop cluster hence the provided + scope --> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-common</artifactId> + <scope>provided</scope> + </dependency> + + <!-- Test Dependencies --> + <dependency> + <groupId>org.apache.jena</groupId> + <artifactId>jena-hadoop-rdf-mapreduce</artifactId> + <version>${project.version}</version> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.mrunit</groupId> + <artifactId>mrunit</artifactId> + <scope>test</scope> + <classifier>hadoop2</classifier> + </dependency> + </dependencies> + + <build> + <plugins> + <!-- Assembly plugin is used to produce the runnable Hadoop JAR with all + dependencies contained therein --> + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <configuration> + <descriptors> + <descriptor>hadoop-job.xml</descriptor> + </descriptors> + </configuration> + <executions> + <execution> + <id>make-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> \ No newline at end of file
