http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java deleted file mode 100644 index 1cda0bd..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -/** - * Abstract tests for blocked triple input formats - * - * - * - */ -public abstract class AbstractBlockedQuadInputFormatTests extends AbstractWholeFileQuadInputFormatTests { - - @Override - protected boolean canSplitInputs() { - return true; - } -}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java deleted file mode 100644 index 2e1e865..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -/** - * Abstract tests for blocked triple input formats - * - * - * - */ -public abstract class AbstractBlockedTripleInputFormatTests extends AbstractWholeFileTripleInputFormatTests { - - @Override - protected boolean canSplitInputs() { - return true; - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java deleted file mode 100644 index e22650f..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java +++ /dev/null @@ -1,612 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; -import org.apache.hadoop.mapreduce.task.JobContextImpl; -import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; -import org.apache.jena.hadoop.rdf.io.RdfIOConstants; -import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; -import org.junit.After; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Abstract node tuple input format tests - * - * - * - * @param <TValue> - * @param <T> - */ -public abstract class AbstractNodeTupleInputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> { - - private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleInputFormatTests.class); - - protected static final int EMPTY_SIZE = 0, SMALL_SIZE = 100, LARGE_SIZE = 10000, BAD_SIZE = 100, MIXED_SIZE = 100; - protected static final String EMPTY = "empty"; - protected static final String SMALL = "small"; - protected static final String LARGE = "large"; - protected static final String BAD = "bad"; - protected static final String MIXED = "mixed"; - - /** - * Temporary folder for the tests - */ - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - protected File empty, small, large, bad, mixed; - - /** - * Prepares the inputs for the tests - * - * @throws IOException - */ - @Before - public void beforeTest() throws IOException { - this.prepareInputs(); - } - - /** - * Cleans up the inputs after each test - */ - @After - public void afterTest() { - // Should be unnecessary since JUnit will clean up the temporary folder - // anyway but best to do this regardless - if (empty != null) - empty.delete(); - if (small != null) - small.delete(); - if (large != null) - large.delete(); - if (bad != null) - bad.delete(); - if (mixed != null) - mixed.delete(); - } - - /** - * Prepares a fresh configuration - * - * @return Configuration - */ - protected Configuration prepareConfiguration() { - Configuration config = new Configuration(true); - // Nothing else to do - return config; - } - - /** - * Prepares the inputs - * - * @throws IOException - */ - protected void prepareInputs() throws IOException { - String ext = this.getFileExtension(); - empty = folder.newFile(EMPTY + ext); - this.generateTuples(empty, EMPTY_SIZE); - small = folder.newFile(SMALL + ext); - this.generateTuples(small, SMALL_SIZE); - large = folder.newFile(LARGE + ext); - this.generateTuples(large, LARGE_SIZE); - bad = folder.newFile(BAD + ext); - this.generateBadTuples(bad, BAD_SIZE); - mixed = folder.newFile(MIXED + ext); - this.generateMixedTuples(mixed, MIXED_SIZE); - } - - /** - * Gets the extra file extension to add to the filenames - * - * @return File extension - */ - protected abstract String getFileExtension(); - - /** - * Generates tuples used for tests - * - * @param f - * File - * @param num - * Number of tuples to generate - * @throws IOException - */ - protected final void generateTuples(File f, int num) throws IOException { - this.generateTuples(this.getOutputStream(f), num); - } - - /** - * Gets the output stream to use for generating tuples - * - * @param f - * File - * @return Output Stream - * @throws IOException - */ - protected OutputStream getOutputStream(File f) throws IOException { - return new FileOutputStream(f, false); - } - - /** - * Generates tuples used for tests - * - * @param output - * Output Stream to write to - * @param num - * Number of tuples to generate - * @throws IOException - */ - protected abstract void generateTuples(OutputStream output, int num) throws IOException; - - /** - * Generates bad tuples used for tests - * - * @param f - * File - * @param num - * Number of bad tuples to generate - * @throws IOException - */ - protected final void generateBadTuples(File f, int num) throws IOException { - this.generateBadTuples(this.getOutputStream(f), num); - } - - /** - * Generates bad tuples used for tests - * - * @param output - * Output Stream to write to - * @param num - * Number of bad tuples to generate - * @throws IOException - */ - protected abstract void generateBadTuples(OutputStream output, int num) throws IOException; - - /** - * Generates a mixture of good and bad tuples used for tests - * - * @param f - * File - * @param num - * Number of tuples to generate, they should be a 50/50 mix of - * good and bad tuples - * @throws IOException - */ - protected final void generateMixedTuples(File f, int num) throws IOException { - this.generateMixedTuples(this.getOutputStream(f), num); - } - - /** - * Generates a mixture of good and bad tuples used for tests - * - * @param output - * Output Stream to write to - * @param num - * Number of tuples to generate, they should be a 50/50 mix of - * good and bad tuples - * @throws IOException - */ - protected abstract void generateMixedTuples(OutputStream output, int num) throws IOException; - - /** - * Adds an input path to the job configuration - * - * @param f - * File - * @param config - * Configuration - * @param job - * Job - * @throws IOException - */ - protected void addInputPath(File f, Configuration config, Job job) throws IOException { - FileSystem fs = FileSystem.getLocal(config); - Path inputPath = fs.makeQualified(new Path(f.getAbsolutePath())); - FileInputFormat.addInputPath(job, inputPath); - } - - protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException { - int count = 0; - - // Check initial progress - LOG.info(String.format("Initial Reported Progress %f", reader.getProgress())); - float progress = reader.getProgress(); - if (Float.compare(0.0f, progress) == 0) { - Assert.assertEquals(0.0d, reader.getProgress(), 0.0d); - } else if (Float.compare(1.0f, progress) == 0) { - // If reader is reported 1.0 straight away then we expect there to - // be no key values - Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); - Assert.assertFalse(reader.nextKeyValue()); - } else { - Assert.fail(String.format( - "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f", - progress)); - } - - // Count tuples - boolean debug = LOG.isDebugEnabled(); - while (reader.nextKeyValue()) { - count++; - progress = reader.getProgress(); - if (debug) - LOG.debug(String.format("Current Reported Progress %f", progress)); - Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress), - progress > 0.0f && progress <= 1.0f); - } - reader.close(); - LOG.info(String.format("Got %d tuples from this record reader", count)); - - // Check final progress - LOG.info(String.format("Final Reported Progress %f", reader.getProgress())); - Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); - - return count; - } - - protected final void checkTuples(RecordReader<LongWritable, T> reader, int expected) throws IOException, - InterruptedException { - Assert.assertEquals(expected, this.countTuples(reader)); - } - - /** - * Runs a test with a single input - * - * @param input - * Input - * @param expectedTuples - * Expected tuples - * @throws IOException - * @throws InterruptedException - */ - protected final void testSingleInput(File input, int expectedSplits, int expectedTuples) throws IOException, - InterruptedException { - // Prepare configuration - Configuration config = this.prepareConfiguration(); - this.testSingleInput(config, input, expectedSplits, expectedTuples); - } - - /** - * Runs a test with a single input - * - * @param config - * Configuration - * @param input - * Input - * @param expectedTuples - * Expected tuples - * @throws IOException - * @throws InterruptedException - */ - protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples) - throws IOException, InterruptedException { - // Set up fake job - InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - this.addInputPath(input, job.getConfiguration(), job); - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length); - NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE); - - // Check splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(expectedSplits, splits.size()); - - // Check tuples - for (InputSplit split : splits) { - TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); - RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); - reader.initialize(split, taskContext); - this.checkTuples(reader, expectedTuples); - } - } - - protected abstract InputFormat<LongWritable, T> getInputFormat(); - - /** - * Basic tuples input test - * - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException - */ - @Test - public final void single_input_01() throws IOException, InterruptedException, ClassNotFoundException { - testSingleInput(empty, this.canSplitInputs() ? 0 : 1, EMPTY_SIZE); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException - */ - @Test - public final void single_input_02() throws IOException, InterruptedException, ClassNotFoundException { - testSingleInput(small, 1, SMALL_SIZE); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException - */ - @Test - public final void single_input_03() throws IOException, InterruptedException, ClassNotFoundException { - testSingleInput(large, 1, LARGE_SIZE); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException - */ - @Test - public final void single_input_04() throws IOException, InterruptedException, ClassNotFoundException { - testSingleInput(bad, 1, 0); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException - */ - @Test - public final void single_input_05() throws IOException, InterruptedException, ClassNotFoundException { - testSingleInput(mixed, 1, MIXED_SIZE / 2); - } - - /** - * Tests behaviour when ignoring bad tuples is disabled - * - * @throws InterruptedException - * @throws IOException - */ - @Test(expected = IOException.class) - public final void fail_on_bad_input_01() throws IOException, InterruptedException { - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); - testSingleInput(config, bad, 1, 0); - } - - /** - * Tests behaviour when ignoring bad tuples is disabled - * - * @throws InterruptedException - * @throws IOException - */ - @Test(expected = IOException.class) - public final void fail_on_bad_input_02() throws IOException, InterruptedException { - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); - testSingleInput(config, mixed, 1, MIXED_SIZE / 2); - } - - /** - * Runs a multiple input test - * - * @param inputs - * Inputs - * @param expectedSplits - * Number of splits expected - * @param expectedTuples - * Number of tuples expected - * @throws IOException - * @throws InterruptedException - */ - protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples) throws IOException, - InterruptedException { - // Prepare configuration and inputs - Configuration config = this.prepareConfiguration(); - - // Set up fake job - InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - for (File input : inputs) { - this.addInputPath(input, job.getConfiguration(), job); - } - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); - NLineInputFormat.setNumLinesPerSplit(job, expectedTuples); - - // Check splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(expectedSplits, splits.size()); - - // Check tuples - int count = 0; - for (InputSplit split : splits) { - TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); - RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); - reader.initialize(split, taskContext); - count += this.countTuples(reader); - } - Assert.assertEquals(expectedTuples, count); - } - - /** - * tuples test with multiple inputs - * - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException - */ - @Test - public final void multiple_inputs_01() throws IOException, InterruptedException, ClassNotFoundException { - testMultipleInputs(new File[] { empty, small, large }, this.canSplitInputs() ? 2 : 3, EMPTY_SIZE + SMALL_SIZE - + LARGE_SIZE); - } - - /** - * tuples test with multiple inputs - * - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException - */ - @Test - public final void multiple_inputs_02() throws IOException, InterruptedException, ClassNotFoundException { - testMultipleInputs(new File[] { folder.getRoot() }, this.canSplitInputs() ? 4 : 5, EMPTY_SIZE + SMALL_SIZE - + LARGE_SIZE + (MIXED_SIZE / 2)); - } - - protected final void testSplitInputs(Configuration config, File[] inputs, int expectedSplits, int expectedTuples) - throws IOException, InterruptedException { - // Set up fake job - InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - for (File input : inputs) { - this.addInputPath(input, job.getConfiguration(), job); - } - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); - - // Check splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(expectedSplits, splits.size()); - - // Check tuples - int count = 0; - for (InputSplit split : splits) { - // Validate split - Assert.assertTrue(this.isValidSplit(split, config)); - - // Read split - TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); - RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); - reader.initialize(split, taskContext); - count += this.countTuples(reader); - } - Assert.assertEquals(expectedTuples, count); - } - - /** - * Determines whether an input split is valid - * - * @param split - * Input split - * @return True if a valid split, false otherwise - * @throws IOException - */ - protected boolean isValidSplit(InputSplit split, Configuration config) throws IOException { - return split instanceof FileSplit; - } - - /** - * Indicates whether inputs can be split, defaults to true - * - * @return Whether inputs can be split - */ - protected boolean canSplitInputs() { - return true; - } - - /** - * Tests for input splitting - * - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - */ - @Test - public final void split_input_01() throws IOException, InterruptedException, ClassNotFoundException { - Assume.assumeTrue(this.canSplitInputs()); - - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); - this.testSplitInputs(config, new File[] { small }, 100, SMALL_SIZE); - } - - /** - * Tests for input splitting - * - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - */ - @Test - public final void split_input_02() throws IOException, InterruptedException, ClassNotFoundException { - Assume.assumeTrue(this.canSplitInputs()); - - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - config.setLong(NLineInputFormat.LINES_PER_MAP, 10); - Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); - this.testSplitInputs(config, new File[] { small }, 10, SMALL_SIZE); - } - - /** - * Tests for input splitting - * - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - */ - @Test - public final void split_input_03() throws IOException, InterruptedException, ClassNotFoundException { - Assume.assumeTrue(this.canSplitInputs()); - - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - config.setLong(NLineInputFormat.LINES_PER_MAP, 100); - Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); - this.testSplitInputs(config, new File[] { large }, 100, LARGE_SIZE); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java deleted file mode 100644 index 78d7f33..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.hadoop.rdf.types.QuadWritable; - -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * Abstract tests for Quad input formats - * - * - */ -public abstract class AbstractQuadsInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { - - private static final Charset utf8 = Charset.forName("utf-8"); - - @Override - protected void generateTuples(OutputStream output, int num) throws IOException { - for (int i = 0; i < num; i++) { - output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n").getBytes(utf8)); - } - output.flush(); - output.close(); - } - - @Override - protected void generateBadTuples(OutputStream output, int num) throws IOException { - for (int i = 0; i < num; i++) { - output.write("<http://broken\n".getBytes(utf8)); - } - output.flush(); - output.close(); - } - - @Override - protected void generateMixedTuples(OutputStream output, int num) throws IOException { - boolean bad = false; - for (int i = 0; i < num; i++, bad = !bad) { - if (bad) { - output.write("<http://broken\n".getBytes(utf8)); - } else { - output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n").getBytes(utf8)); - } - } - output.flush(); - output.close(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java deleted file mode 100644 index 65a9889..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.hadoop.rdf.types.TripleWritable; - -import com.hp.hpl.jena.graph.Triple; - -/** - * Abstract tests for Triple input formats - * - * - * - */ -public abstract class AbstractTriplesInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { - - private static final Charset utf8 = Charset.forName("utf-8"); - - @Override - protected void generateTuples(OutputStream output, int num) throws IOException { - for (int i = 0; i < num; i++) { - output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" .\n").getBytes(utf8)); - } - output.flush(); - output.close(); - } - - @Override - protected void generateBadTuples(OutputStream output, int num) throws IOException { - byte[] junk = "<http://broken\n".getBytes(utf8); - for (int i = 0; i < num; i++) { - output.write(junk); - } - output.flush(); - output.close(); - } - - @Override - protected void generateMixedTuples(OutputStream output, int num) throws IOException { - boolean bad = false; - for (int i = 0; i < num; i++, bad = !bad) { - if (bad) { - output.write("<http://broken\n".getBytes(utf8)); - } else { - output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" .\n").getBytes(utf8)); - } - } - output.flush(); - output.close(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java deleted file mode 100644 index 0b6cfde..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.RDFWriterRegistry; - -import com.hp.hpl.jena.query.Dataset; -import com.hp.hpl.jena.query.DatasetFactory; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * Abstract tests for Quad input formats - * - * - * - */ -public abstract class AbstractWholeFileQuadInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { - - private static final Charset utf8 = Charset.forName("utf-8"); - - @Override - protected boolean canSplitInputs() { - return false; - } - - private void writeTuples(Dataset ds, OutputStream output) { - RDFDataMgr.write(output, ds, RDFWriterRegistry.defaultSerialization(this.getRdfLanguage())); - } - - /** - * Gets the RDF language to write out generate tuples in - * - * @return RDF language - */ - protected abstract Lang getRdfLanguage(); - - private void writeGoodTuples(OutputStream output, int num) throws IOException { - Dataset ds = DatasetFactory.createMem(); - Model m = ModelFactory.createDefaultModel(); - Resource currSubj = m.createResource("http://example.org/subjects/0"); - Property predicate = m.createProperty("http://example.org/predicate"); - for (int i = 0; i < num; i++) { - if (i % 100 == 0) { - ds.addNamedModel("http://example.org/graphs/" + (i / 100), m); - m = ModelFactory.createDefaultModel(); - } - if (i % 10 == 0) { - currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); - } - m.add(currSubj, predicate, m.createTypedLiteral(i)); - } - if (!m.isEmpty()) { - ds.addNamedModel("http://example.org/graphs/extra", m); - } - this.writeTuples(ds, output); - } - - @Override - protected final void generateTuples(OutputStream output, int num) throws IOException { - this.writeGoodTuples(output, num); - output.close(); - } - - @Override - protected final void generateMixedTuples(OutputStream output, int num) throws IOException { - // Write good data - this.writeGoodTuples(output, num / 2); - - // Write junk data - byte[] junk = "junk data\n".getBytes(utf8); - for (int i = 0; i < num / 2; i++) { - output.write(junk); - } - - output.flush(); - output.close(); - } - - @Override - protected final void generateBadTuples(OutputStream output, int num) throws IOException { - byte[] junk = "junk data\n".getBytes(utf8); - for (int i = 0; i < num; i++) { - output.write(junk); - } - output.flush(); - output.close(); - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java deleted file mode 100644 index b68d662..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFDataMgr; - -import com.hp.hpl.jena.graph.Triple; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.Resource; - -/** - * Abstract tests for Triple input formats - * - * - * - */ -public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { - - private static final Charset utf8 = Charset.forName("utf-8"); - - @Override - protected boolean canSplitInputs() { - return false; - } - - private void writeTuples(Model m, OutputStream output) { - RDFDataMgr.write(output, m, this.getRdfLanguage()); - } - - /** - * Gets the RDF language to write out generate tuples in - * @return RDF language - */ - protected abstract Lang getRdfLanguage(); - - @Override - protected final void generateTuples(OutputStream output, int num) throws IOException { - Model m = ModelFactory.createDefaultModel(); - Resource currSubj = m.createResource("http://example.org/subjects/0"); - Property predicate = m.createProperty("http://example.org/predicate"); - for (int i = 0; i < num; i++) { - if (i % 10 == 0) { - currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); - } - m.add(currSubj, predicate, m.createTypedLiteral(i)); - } - this.writeTuples(m, output); - output.close(); - } - - @Override - protected final void generateMixedTuples(OutputStream output, int num) throws IOException { - // Write good data - Model m = ModelFactory.createDefaultModel(); - Resource currSubj = m.createResource("http://example.org/subjects/0"); - Property predicate = m.createProperty("http://example.org/predicate"); - for (int i = 0; i < num / 2; i++) { - if (i % 10 == 0) { - currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); - } - m.add(currSubj, predicate, m.createTypedLiteral(i)); - } - this.writeTuples(m, output); - - // Write junk data - byte[] junk = "junk data\n".getBytes(utf8); - for (int i = 0; i < num / 2; i++) { - output.write(junk); - } - - output.flush(); - output.close(); - } - - @Override - protected final void generateBadTuples(OutputStream output, int num) throws IOException { - byte[] junk = "junk data\n".getBytes(utf8); - for (int i = 0; i < num; i++) { - output.write(junk); - } - output.flush(); - output.close(); - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractBlankNodeTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractBlankNodeTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractBlankNodeTests.java deleted file mode 100644 index 4bb0939..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractBlankNodeTests.java +++ /dev/null @@ -1,636 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.input.bnodes; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; -import java.nio.file.attribute.FileAttribute; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.TaskType; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.task.JobContextImpl; -import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import org.apache.jena.hadoop.rdf.io.RdfIOConstants; -import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; -import org.apache.jena.riot.system.ParserProfile; -import org.apache.log4j.BasicConfigurator; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.hp.hpl.jena.graph.Node; -import com.hp.hpl.jena.graph.NodeFactory; - -/** - * Test case that embodies the scenario described in JENA-820 - */ -@SuppressWarnings("unused") -public abstract class AbstractBlankNodeTests<T, TValue extends AbstractNodeTupleWritable<T>> { - - private static final Logger LOGGER = LoggerFactory.getLogger(AbstractBlankNodeTests.class); - - @BeforeClass - public static void setup() { - // Enable if you need to diagnose test failures - // Useful since it includes printing the file names of the temporary - // files being used - // BasicConfigurator.resetConfiguration(); - // BasicConfigurator.configure(); - } - - /** - * Gets the extension for the initial input files - * - * @return Extension including the {@code .} - */ - protected abstract String getInitialInputExtension(); - - /** - * Creates a tuple - * - * @param s - * Subject - * @param p - * Predicate - * @param o - * Object - * @return Tuple - */ - protected abstract T createTuple(Node s, Node p, Node o); - - /** - * Writes out the given tuples to the given file - * - * @param f - * File - * @param tuples - * Tuples - * @throws FileNotFoundException - */ - protected abstract void writeTuples(File f, List<T> tuples) throws FileNotFoundException; - - /** - * Creates the input format for reading the initial inputs - * - * @return Input format - */ - protected abstract InputFormat<LongWritable, TValue> createInitialInputFormat(); - - /** - * Creates the output format for writing the intermediate output - * - * @return Output format - */ - protected abstract OutputFormat<LongWritable, TValue> createIntermediateOutputFormat(); - - /** - * Creates the input format for reading the intermediate outputs back in - * - * @return Input format - */ - protected abstract InputFormat<LongWritable, TValue> createIntermediateInputFormat(); - - /** - * Gets the subject of the tuple - * - * @param value - * Tuple - * @return Subject - */ - protected abstract Node getSubject(T value); - - /** - * Gets whether the format being tested respects the RIOT - * {@link ParserProfile} - * - * @return True if parser profile is respected, false otherwise - */ - protected boolean respectsParserProfile() { - return true; - } - - /** - * Gets whether the format being tested preserves blank node identity - * - * @return True if identity is presereved, false otherwise - */ - protected boolean preservesBlankNodeIdentity() { - return false; - } - - /** - * Test that starts with two blank nodes with the same identity in a single - * file, splits them over two files and checks that we can workaround - * JENA-820 successfully by setting the - * {@link RdfIOConstants#GLOBAL_BNODE_IDENTITY} flag for our subsequent job - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void blank_node_divergence_01() throws IOException, InterruptedException { - Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile()); - Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity()); - - // Temporary files - File a = File.createTempFile("bnode_divergence", getInitialInputExtension()); - File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile(); - - try { - // Prepare the input data - // Two mentions of the same blank node in the same file - List<T> tuples = new ArrayList<>(); - Node bnode = NodeFactory.createAnon(); - Node pred = NodeFactory.createURI("http://example.org/predicate"); - tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first"))); - tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second"))); - writeTuples(a, tuples); - - // Set up fake job which will process the file as a single split - Configuration config = new Configuration(true); - InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - NLineInputFormat.setNumLinesPerSplit(job, 100); - FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath())); - FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath())); - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(1, splits.size()); - - for (InputSplit split : splits) { - // Initialize the input reading - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - createAttemptID(1, 1, 1)); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - // Copy the input to the output - each triple goes to a separate - // output file - // This is how we force multiple files to be produced - int taskID = 1; - while (reader.nextKeyValue()) { - // Prepare the output writing - OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat(); - TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - createAttemptID(1, ++taskID, 1)); - RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext); - - writer.write(reader.getCurrentKey(), reader.getCurrentValue()); - writer.close(outputTaskContext); - } - } - - // Promote outputs from temporary status - promoteInputs(intermediateOutputDir); - - // Now we need to create a subsequent job that reads the - // intermediate outputs - // As described in JENA-820 at this point the blank nodes are - // consistent, however when we read them from different files they - // by default get treated as different nodes and so the blank nodes - // diverge which is incorrect and undesirable behaviour in - // multi-stage pipelines - System.out.println(intermediateOutputDir.getAbsolutePath()); - job = Job.getInstance(config); - inputFormat = createIntermediateInputFormat(); - job.setInputFormatClass(inputFormat.getClass()); - FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath())); - - // Enabling this flag works around the JENA-820 issue - job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, true); - context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - splits = inputFormat.getSplits(context); - Assert.assertEquals(2, splits.size()); - - // Expect to end up with a single blank node - Set<Node> nodes = new HashSet<Node>(); - for (InputSplit split : splits) { - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - new TaskAttemptID()); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - while (reader.nextKeyValue()) { - nodes.add(getSubject(reader.getCurrentValue().get())); - } - } - // Nodes should not have diverged - Assert.assertEquals(1, nodes.size()); - - } finally { - a.delete(); - deleteDirectory(intermediateOutputDir); - } - } - - /** - * Test that starts with two blank nodes with the same identity in a single - * file, splits them over two files and shows that they diverge in the - * subsequent job when the JENA-820 workaround is not enabled - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public void blank_node_divergence_02() throws IOException, InterruptedException { - Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile()); - Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity()); - - // Temporary files - File a = File.createTempFile("bnode_divergence", getInitialInputExtension()); - File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile(); - - try { - // Prepare the input data - // Two mentions of the same blank node in the same file - List<T> tuples = new ArrayList<>(); - Node bnode = NodeFactory.createAnon(); - Node pred = NodeFactory.createURI("http://example.org/predicate"); - tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first"))); - tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second"))); - writeTuples(a, tuples); - - // Set up fake job which will process the file as a single split - Configuration config = new Configuration(true); - InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - NLineInputFormat.setNumLinesPerSplit(job, 100); - FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath())); - FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath())); - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(1, splits.size()); - - for (InputSplit split : splits) { - // Initialize the input reading - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - createAttemptID(1, 1, 1)); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - // Copy the input to the output - each triple goes to a separate - // output file - // This is how we force multiple files to be produced - int taskID = 1; - while (reader.nextKeyValue()) { - // Prepare the output writing - OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat(); - TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - createAttemptID(1, ++taskID, 1)); - RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext); - - writer.write(reader.getCurrentKey(), reader.getCurrentValue()); - writer.close(outputTaskContext); - } - } - - // Promote outputs from temporary status - promoteInputs(intermediateOutputDir); - - // Now we need to create a subsequent job that reads the - // intermediate outputs - // As described in JENA-820 at this point the blank nodes are - // consistent, however when we read them from different files they - // by default get treated as different nodes and so the blank nodes - // diverge which is incorrect and undesirable behaviour in - // multi-stage pipelines. However it is the default behaviour - // because when we start from external inputs we want them to be - // file scoped. - System.out.println(intermediateOutputDir.getAbsolutePath()); - job = Job.getInstance(config); - inputFormat = createIntermediateInputFormat(); - job.setInputFormatClass(inputFormat.getClass()); - FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath())); - - // Make sure JENA-820 flag is disabled - job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false); - context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - splits = inputFormat.getSplits(context); - Assert.assertEquals(2, splits.size()); - - // Expect to end up with a single blank node - Set<Node> nodes = new HashSet<Node>(); - for (InputSplit split : splits) { - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - new TaskAttemptID()); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - while (reader.nextKeyValue()) { - nodes.add(getSubject(reader.getCurrentValue().get())); - } - } - // Nodes should have diverged - Assert.assertEquals(2, nodes.size()); - - } finally { - a.delete(); - deleteDirectory(intermediateOutputDir); - } - } - - /** - * Test that starts with two blank nodes in two different files and checks - * that writing them to a single file does not conflate them - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public void blank_node_identity_01() throws IOException, InterruptedException { - Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile()); - Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity()); - - // Temporary files - File a = File.createTempFile("bnode_identity", getInitialInputExtension()); - File b = File.createTempFile("bnode_identity", getInitialInputExtension()); - File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile(); - - try { - // Prepare the input data - // Different blank nodes in different files - List<T> tuples = new ArrayList<>(); - Node bnode1 = NodeFactory.createAnon(); - Node bnode2 = NodeFactory.createAnon(); - Node pred = NodeFactory.createURI("http://example.org/predicate"); - - tuples.add(createTuple(bnode1, pred, NodeFactory.createLiteral("first"))); - writeTuples(a, tuples); - - tuples.clear(); - tuples.add(createTuple(bnode2, pred, NodeFactory.createLiteral("second"))); - writeTuples(b, tuples); - - // Set up fake job which will process the two files - Configuration config = new Configuration(true); - InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - NLineInputFormat.setNumLinesPerSplit(job, 100); - FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath())); - FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath())); - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(2, splits.size()); - - // Prepare the output writing - putting all output to a single file - OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat(); - TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID( - 1, 2, 1)); - RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext); - - for (InputSplit split : splits) { - // Initialize the input reading - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - createAttemptID(1, 1, 1)); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - // Copy the input to the output - all triples go to a single - // output - while (reader.nextKeyValue()) { - writer.write(reader.getCurrentKey(), reader.getCurrentValue()); - } - } - writer.close(outputTaskContext); - - // Promote outputs from temporary status - promoteInputs(intermediateOutputDir); - - // Now we need to create a subsequent job that reads the - // intermediate outputs - // The Blank nodes should have been given separate identities so we - // should not be conflating them, this is the opposite problem to - // that described in JENA-820 - System.out.println(intermediateOutputDir.getAbsolutePath()); - job = Job.getInstance(config); - inputFormat = createIntermediateInputFormat(); - job.setInputFormatClass(inputFormat.getClass()); - NLineInputFormat.setNumLinesPerSplit(job, 100); - FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath())); - context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - splits = inputFormat.getSplits(context); - Assert.assertEquals(1, splits.size()); - - // Expect to end up with a single blank node - Set<Node> nodes = new HashSet<Node>(); - for (InputSplit split : splits) { - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - new TaskAttemptID()); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - while (reader.nextKeyValue()) { - nodes.add(getSubject(reader.getCurrentValue().get())); - } - } - // Nodes must not have converged - Assert.assertEquals(2, nodes.size()); - - } finally { - a.delete(); - b.delete(); - deleteDirectory(intermediateOutputDir); - } - } - - /** - * Test that starts with two blank nodes in two different files and checks - * that writing them to a single file does not conflate them - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public void blank_node_identity_02() throws IOException, InterruptedException { - Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile()); - Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity()); - - // Temporary files - File a = File.createTempFile("bnode_identity", getInitialInputExtension()); - File b = File.createTempFile("bnode_identity", getInitialInputExtension()); - File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile(); - - try { - // Prepare the input data - // Same blank node but in different files so must be treated as - // different blank nodes and not converge - List<T> tuples = new ArrayList<>(); - Node bnode = NodeFactory.createAnon(); - Node pred = NodeFactory.createURI("http://example.org/predicate"); - - tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first"))); - writeTuples(a, tuples); - - tuples.clear(); - tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second"))); - writeTuples(b, tuples); - - // Set up fake job which will process the two files - Configuration config = new Configuration(true); - InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - NLineInputFormat.setNumLinesPerSplit(job, 100); - FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath())); - FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath())); - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(2, splits.size()); - - // Prepare the output writing - putting all output to a single file - OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat(); - TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID( - 1, 2, 1)); - RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext); - - for (InputSplit split : splits) { - // Initialize the input reading - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - createAttemptID(1, 1, 1)); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - // Copy the input to the output - all triples go to a single - // output - while (reader.nextKeyValue()) { - writer.write(reader.getCurrentKey(), reader.getCurrentValue()); - } - } - writer.close(outputTaskContext); - - // Promote outputs from temporary status - promoteInputs(intermediateOutputDir); - - // Now we need to create a subsequent job that reads the - // intermediate outputs - // The Blank nodes should have been given separate identities so we - // should not be conflating them, this is the opposite problem to - // that described in JENA-820 - System.out.println(intermediateOutputDir.getAbsolutePath()); - job = Job.getInstance(config); - inputFormat = createIntermediateInputFormat(); - job.setInputFormatClass(inputFormat.getClass()); - NLineInputFormat.setNumLinesPerSplit(job, 100); - FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath())); - context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - - // Get the splits - splits = inputFormat.getSplits(context); - Assert.assertEquals(1, splits.size()); - - // Expect to end up with a single blank node - Set<Node> nodes = new HashSet<Node>(); - for (InputSplit split : splits) { - TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), - new TaskAttemptID()); - RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); - reader.initialize(split, inputTaskContext); - - while (reader.nextKeyValue()) { - nodes.add(getSubject(reader.getCurrentValue().get())); - } - } - // Nodes must not diverge - Assert.assertEquals(2, nodes.size()); - - } finally { - a.delete(); - b.delete(); - deleteDirectory(intermediateOutputDir); - } - } - - private TaskAttemptID createAttemptID(int jobID, int taskID, int id) { - return new TaskAttemptID("outputTest", jobID, TaskType.MAP, taskID, 1); - } - - private void promoteInputs(File baseDir) throws IOException { - for (File f : baseDir.listFiles()) { - if (f.isDirectory()) { - promoteInputs(baseDir, f); - } - } - } - - private void promoteInputs(File targetDir, File dir) throws IOException { - java.nio.file.Path target = Paths.get(targetDir.toURI()); - for (File f : dir.listFiles()) { - if (f.isDirectory()) { - promoteInputs(targetDir, f); - } else { - LOGGER.debug("Moving {} to {}", f.getAbsolutePath(), target.resolve(f.getName())); - Files.move(Paths.get(f.toURI()), target.resolve(f.getName()), StandardCopyOption.REPLACE_EXISTING); - } - } - - // Remove defunct sub-directory - dir.delete(); - } - - private void deleteDirectory(File dir) throws IOException { - for (File f : dir.listFiles()) { - if (f.isFile()) - f.delete(); - if (f.isDirectory()) - deleteDirectory(f); - } - dir.delete(); - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractTripleBlankNodeTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractTripleBlankNodeTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractTripleBlankNodeTests.java deleted file mode 100644 index bbd6742..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/AbstractTripleBlankNodeTests.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.input.bnodes; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.util.List; - -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFDataMgr; - -import com.hp.hpl.jena.graph.Graph; -import com.hp.hpl.jena.graph.Node; -import com.hp.hpl.jena.graph.Triple; -import com.hp.hpl.jena.sparql.graph.GraphFactory; - -/** - * - */ -public abstract class AbstractTripleBlankNodeTests extends AbstractBlankNodeTests<Triple, TripleWritable> { - - /** - * Gets the language to use - * - * @return Language - */ - protected abstract Lang getLanguage(); - - @Override - protected Triple createTuple(Node s, Node p, Node o) { - return new Triple(s, p, o); - } - - @Override - protected void writeTuples(File f, List<Triple> tuples) throws FileNotFoundException { - Graph g = GraphFactory.createGraphMem(); - for (Triple t : tuples) { - g.add(t); - } - RDFDataMgr.write(new FileOutputStream(f), g, getLanguage()); - } - - @Override - protected Node getSubject(Triple value) { - return value.getSubject(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/JsonLdTripleBlankNodeTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/JsonLdTripleBlankNodeTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/JsonLdTripleBlankNodeTest.java deleted file mode 100644 index f234127..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/JsonLdTripleBlankNodeTest.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.input.bnodes; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.jena.hadoop.rdf.io.input.jsonld.JsonLDTripleInputFormat; -import org.apache.jena.hadoop.rdf.io.input.rdfjson.RdfJsonInputFormat; -import org.apache.jena.hadoop.rdf.io.output.jsonld.JsonLDTripleOutputFormat; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.Lang; - -/** - * Tests blank node divergence when using the {@link RdfJsonInputFormat} - */ -public class JsonLdTripleBlankNodeTest extends AbstractTripleBlankNodeTests { - - @Override - protected Lang getLanguage() { - return Lang.JSONLD; - } - - @Override - protected String getInitialInputExtension() { - return ".jsonld"; - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createInitialInputFormat() { - return new JsonLDTripleInputFormat(); - } - - @Override - protected OutputFormat<LongWritable, TripleWritable> createIntermediateOutputFormat() { - return new JsonLDTripleOutputFormat<>(); - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createIntermediateInputFormat() { - return new JsonLDTripleInputFormat(); - } - - @Override - protected boolean respectsParserProfile() { - return false; - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/NTriplesBlankNodeTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/NTriplesBlankNodeTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/NTriplesBlankNodeTest.java deleted file mode 100644 index 4c350c7..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/NTriplesBlankNodeTest.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.input.bnodes; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.jena.hadoop.rdf.io.input.ntriples.NTriplesInputFormat; -import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesOutputFormat; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.Lang; - -/** - * Tests blank node divergence when using the {@link NTriplesInputFormat} - */ -public class NTriplesBlankNodeTest extends AbstractTripleBlankNodeTests { - - @Override - protected Lang getLanguage() { - return Lang.NTRIPLES; - } - - @Override - protected String getInitialInputExtension() { - return ".nt"; - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createInitialInputFormat() { - return new NTriplesInputFormat(); - } - - @Override - protected OutputFormat<LongWritable, TripleWritable> createIntermediateOutputFormat() { - return new NTriplesOutputFormat<>(); - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createIntermediateInputFormat() { - return new NTriplesInputFormat(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfJsonBlankNodeTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfJsonBlankNodeTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfJsonBlankNodeTest.java deleted file mode 100644 index 2be1e0e..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfJsonBlankNodeTest.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.input.bnodes; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.jena.hadoop.rdf.io.input.rdfjson.RdfJsonInputFormat; -import org.apache.jena.hadoop.rdf.io.output.rdfjson.RdfJsonOutputFormat; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.Lang; - -/** - * Tests blank node divergence when using the {@link RdfJsonInputFormat} - */ -public class RdfJsonBlankNodeTest extends AbstractTripleBlankNodeTests { - - @Override - protected Lang getLanguage() { - return Lang.RDFJSON; - } - - @Override - protected String getInitialInputExtension() { - return ".rj"; - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createInitialInputFormat() { - return new RdfJsonInputFormat(); - } - - @Override - protected OutputFormat<LongWritable, TripleWritable> createIntermediateOutputFormat() { - return new RdfJsonOutputFormat<>(); - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createIntermediateInputFormat() { - return new RdfJsonInputFormat(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfThriftBlankNodeTest.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfThriftBlankNodeTest.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfThriftBlankNodeTest.java deleted file mode 100644 index d6f32a2..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/bnodes/RdfThriftBlankNodeTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.input.bnodes; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.jena.hadoop.rdf.io.input.thrift.ThriftTripleInputFormat; -import org.apache.jena.hadoop.rdf.io.output.thrift.ThriftTripleOutputFormat; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFLanguages; - -/** - * Tests blank node divergence when using the {@link RdfThriftInputFormat} - */ -public class RdfThriftBlankNodeTest extends AbstractTripleBlankNodeTests { - - @Override - protected Lang getLanguage() { - return RDFLanguages.THRIFT; - } - - @Override - protected String getInitialInputExtension() { - return ".trdf"; - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createInitialInputFormat() { - return new ThriftTripleInputFormat(); - } - - @Override - protected OutputFormat<LongWritable, TripleWritable> createIntermediateOutputFormat() { - return new ThriftTripleOutputFormat<>(); - } - - @Override - protected InputFormat<LongWritable, TripleWritable> createIntermediateInputFormat() { - return new ThriftTripleInputFormat(); - } - - @Override - protected boolean respectsParserProfile() { - return false; - } - - @Override - protected boolean preservesBlankNodeIdentity() { - return true; - } -}
