http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java index bfaff01..155d10a 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java @@ -1,611 +1,611 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; -import org.apache.hadoop.mapreduce.task.JobContextImpl; -import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; -import org.apache.jena.hadoop.rdf.io.RdfIOConstants; -import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; -import org.junit.* ; -import org.junit.rules.TemporaryFolder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Abstract node tuple input format tests - * - * - * - * @param <TValue> - * @param <T> - */ -public abstract class AbstractNodeTupleInputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> { - - private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleInputFormatTests.class); - - protected static final int EMPTY_SIZE = 0, SMALL_SIZE = 100, LARGE_SIZE = 10000, BAD_SIZE = 100, MIXED_SIZE = 100; - protected static final String EMPTY = "empty"; - protected static final String SMALL = "small"; - protected static final String LARGE = "large"; - protected static final String BAD = "bad"; - protected static final String MIXED = "mixed"; - - /** - * Temporary folder for the tests - */ - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - protected File empty, small, large, bad, mixed; - - /** - * Prepares the inputs for the tests - * - * @throws IOException - */ - @Before - public void beforeTest() throws IOException { - this.prepareInputs(); - } - - /** - * Cleans up the inputs after each test - */ - @After - public void afterTest() { - // Should be unnecessary since JUnit will clean up the temporary folder - // anyway but best to do this regardless - if (empty != null) - empty.delete(); - if (small != null) - small.delete(); - if (large != null) - large.delete(); - if (bad != null) - bad.delete(); - if (mixed != null) - mixed.delete(); - } - - /** - * Prepares a fresh configuration - * - * @return Configuration - */ - protected Configuration prepareConfiguration() { - Configuration config = new Configuration(true); - // Nothing else to do - return config; - } - - /** - * Prepares the inputs - * - * @throws IOException - */ - protected void prepareInputs() throws IOException { - String ext = this.getFileExtension(); - empty = folder.newFile(EMPTY + ext); - this.generateTuples(empty, EMPTY_SIZE); - small = folder.newFile(SMALL + ext); - this.generateTuples(small, SMALL_SIZE); - large = folder.newFile(LARGE + ext); - this.generateTuples(large, LARGE_SIZE); - bad = folder.newFile(BAD + ext); - this.generateBadTuples(bad, BAD_SIZE); - mixed = folder.newFile(MIXED + ext); - this.generateMixedTuples(mixed, MIXED_SIZE); - } - - /** - * Gets the extra file extension to add to the filenames - * - * @return File extension - */ - protected abstract String getFileExtension(); - - /** - * Generates tuples used for tests - * - * @param f - * File - * @param num - * Number of tuples to generate - * @throws IOException - */ - protected final void generateTuples(File f, int num) throws IOException { - this.generateTuples(this.getOutputStream(f), num); - } - - /** - * Gets the output stream to use for generating tuples - * - * @param f - * File - * @return Output Stream - * @throws IOException - */ - protected OutputStream getOutputStream(File f) throws IOException { - return new FileOutputStream(f, false); - } - - /** - * Generates tuples used for tests - * - * @param output - * Output Stream to write to - * @param num - * Number of tuples to generate - * @throws IOException - */ - protected abstract void generateTuples(OutputStream output, int num) throws IOException; - - /** - * Generates bad tuples used for tests - * - * @param f - * File - * @param num - * Number of bad tuples to generate - * @throws IOException - */ - protected final void generateBadTuples(File f, int num) throws IOException { - this.generateBadTuples(this.getOutputStream(f), num); - } - - /** - * Generates bad tuples used for tests - * - * @param output - * Output Stream to write to - * @param num - * Number of bad tuples to generate - * @throws IOException - */ - protected abstract void generateBadTuples(OutputStream output, int num) throws IOException; - - /** - * Generates a mixture of good and bad tuples used for tests - * - * @param f - * File - * @param num - * Number of tuples to generate, they should be a 50/50 mix of - * good and bad tuples - * @throws IOException - */ - protected final void generateMixedTuples(File f, int num) throws IOException { - this.generateMixedTuples(this.getOutputStream(f), num); - } - - /** - * Generates a mixture of good and bad tuples used for tests - * - * @param output - * Output Stream to write to - * @param num - * Number of tuples to generate, they should be a 50/50 mix of - * good and bad tuples - * @throws IOException - */ - protected abstract void generateMixedTuples(OutputStream output, int num) throws IOException; - - /** - * Adds an input path to the job configuration - * - * @param f - * File - * @param config - * Configuration - * @param job - * Job - * @throws IOException - */ - protected void addInputPath(File f, Configuration config, Job job) throws IOException { - FileSystem fs = FileSystem.getLocal(config); - Path inputPath = fs.makeQualified(new Path(f.getAbsolutePath())); - FileInputFormat.addInputPath(job, inputPath); - } - - protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException { - int count = 0; - - // Check initial progress - LOG.info(String.format("Initial Reported Progress %f", reader.getProgress())); - float progress = reader.getProgress(); - if (Float.compare(0.0f, progress) == 0) { - Assert.assertEquals(0.0d, reader.getProgress(), 0.0d); - } else if (Float.compare(1.0f, progress) == 0) { - // If reader is reported 1.0 straight away then we expect there to - // be no key values - Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); - Assert.assertFalse(reader.nextKeyValue()); - } else { - Assert.fail(String.format( - "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f", - progress)); - } - - // Count tuples - boolean debug = LOG.isDebugEnabled(); - while (reader.nextKeyValue()) { - count++; - progress = reader.getProgress(); - if (debug) - LOG.debug(String.format("Current Reported Progress %f", progress)); - Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress), - progress > 0.0f && progress <= 1.0f); - } - reader.close(); - LOG.info(String.format("Got %d tuples from this record reader", count)); - - // Check final progress - LOG.info(String.format("Final Reported Progress %f", reader.getProgress())); - Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); - - return count; - } - - protected final void checkTuples(RecordReader<LongWritable, T> reader, int expected) throws IOException, - InterruptedException { - Assert.assertEquals(expected, this.countTuples(reader)); - } - - /** - * Runs a test with a single input - * - * @param input - * Input - * @param expectedTuples - * Expected tuples - * @throws IOException - * @throws InterruptedException - */ - protected final void testSingleInput(File input, int expectedSplits, int expectedTuples) throws IOException, - InterruptedException { - // Prepare configuration - Configuration config = this.prepareConfiguration(); - this.testSingleInput(config, input, expectedSplits, expectedTuples); - } - - /** - * Runs a test with a single input - * - * @param config - * Configuration - * @param input - * Input - * @param expectedTuples - * Expected tuples - * @throws IOException - * @throws InterruptedException - */ - protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples) - throws IOException, InterruptedException { - // Set up fake job - InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - this.addInputPath(input, job.getConfiguration(), job); - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length); - NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE); - - // Check splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(expectedSplits, splits.size()); - - // Check tuples - for (InputSplit split : splits) { - TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); - RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); - reader.initialize(split, taskContext); - this.checkTuples(reader, expectedTuples); - } - } - - protected abstract InputFormat<LongWritable, T> getInputFormat(); - - /** - * Basic tuples input test - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void single_input_01() throws IOException, InterruptedException { - testSingleInput(empty, this.canSplitInputs() ? 0 : 1, EMPTY_SIZE); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void single_input_02() throws IOException, InterruptedException { - testSingleInput(small, 1, SMALL_SIZE); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void single_input_03() throws IOException, InterruptedException { - testSingleInput(large, 1, LARGE_SIZE); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void single_input_04() throws IOException, InterruptedException { - testSingleInput(bad, 1, 0); - } - - /** - * Basic tuples input test - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void single_input_05() throws IOException, InterruptedException { - // JSON-LD overrides this because in JSON-LD parsing a bad document gives no triples. - int x = single_input_05_expected() ; - testSingleInput(mixed, 1, x); - } - - /** Results exected for test single_input_05 */ - protected int single_input_05_expected() { - return MIXED_SIZE / 2 ; - } - - /** - * Tests behaviour when ignoring bad tuples is disabled - * - * @throws InterruptedException - * @throws IOException - */ - @Test(expected = IOException.class) - public final void fail_on_bad_input_01() throws IOException, InterruptedException { - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); - testSingleInput(config, bad, 1, 0); - } - - /** - * Tests behaviour when ignoring bad tuples is disabled - * - * @throws InterruptedException - * @throws IOException - */ - @Test(expected = IOException.class) - public final void fail_on_bad_input_02() throws IOException, InterruptedException { - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); - testSingleInput(config, mixed, 1, MIXED_SIZE / 2); - } - - /** - * Runs a multiple input test - * - * @param inputs - * Inputs - * @param expectedSplits - * Number of splits expected - * @param expectedTuples - * Number of tuples expected - * @throws IOException - * @throws InterruptedException - */ - protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples) throws IOException, - InterruptedException { - // Prepare configuration and inputs - Configuration config = this.prepareConfiguration(); - - // Set up fake job - InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - for (File input : inputs) { - this.addInputPath(input, job.getConfiguration(), job); - } - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); - NLineInputFormat.setNumLinesPerSplit(job, expectedTuples); - - // Check splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(expectedSplits, splits.size()); - - // Check tuples - int count = 0; - for (InputSplit split : splits) { - TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); - RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); - reader.initialize(split, taskContext); - count += this.countTuples(reader); - } - Assert.assertEquals(expectedTuples, count); - } - - /** - * tuples test with multiple inputs - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void multiple_inputs_01() throws IOException, InterruptedException { - testMultipleInputs(new File[] { empty, small, large }, this.canSplitInputs() ? 2 : 3, EMPTY_SIZE + SMALL_SIZE - + LARGE_SIZE); - } - - /** - * tuples test with multiple inputs - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void multiple_inputs_02() throws IOException, InterruptedException { - int expectedTriples = multiple_inputs_02_expected() ; - testMultipleInputs(new File[] { folder.getRoot() }, this.canSplitInputs() ? 4 : 5, expectedTriples); - } - - /** Results exected for test multiple_inputs_02. - * JSON_LD has different characteristics on bad documents. - * See {@link #single_input_05}. - */ - protected int multiple_inputs_02_expected() { - return EMPTY_SIZE + SMALL_SIZE + LARGE_SIZE + (MIXED_SIZE / 2) ; - } - - protected final void testSplitInputs(Configuration config, File[] inputs, int expectedSplits, int expectedTuples) - throws IOException, InterruptedException { - // Set up fake job - InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); - Job job = Job.getInstance(config); - job.setInputFormatClass(inputFormat.getClass()); - for (File input : inputs) { - this.addInputPath(input, job.getConfiguration(), job); - } - JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); - Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); - - // Check splits - List<InputSplit> splits = inputFormat.getSplits(context); - Assert.assertEquals(expectedSplits, splits.size()); - - // Check tuples - int count = 0; - for (InputSplit split : splits) { - // Validate split - Assert.assertTrue(this.isValidSplit(split, config)); - - // Read split - TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); - RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); - reader.initialize(split, taskContext); - count += this.countTuples(reader); - } - Assert.assertEquals(expectedTuples, count); - } - - /** - * Determines whether an input split is valid - * - * @param split - * Input split - * @return True if a valid split, false otherwise - */ - protected boolean isValidSplit(InputSplit split, Configuration config) { - return split instanceof FileSplit; - } - - /** - * Indicates whether inputs can be split, defaults to true - * - * @return Whether inputs can be split - */ - protected boolean canSplitInputs() { - return true; - } - - /** - * Tests for input splitting - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void split_input_01() throws IOException, InterruptedException { - Assume.assumeTrue(this.canSplitInputs()); - - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); - this.testSplitInputs(config, new File[] { small }, 100, SMALL_SIZE); - } - - /** - * Tests for input splitting - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void split_input_02() throws IOException, InterruptedException { - Assume.assumeTrue(this.canSplitInputs()); - - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - config.setLong(NLineInputFormat.LINES_PER_MAP, 10); - Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); - this.testSplitInputs(config, new File[] { small }, 10, SMALL_SIZE); - } - - /** - * Tests for input splitting - * - * @throws IOException - * @throws InterruptedException - */ - @Test - public final void split_input_03() throws IOException, InterruptedException { - Assume.assumeTrue(this.canSplitInputs()); - - Configuration config = this.prepareConfiguration(); - config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); - config.setLong(NLineInputFormat.LINES_PER_MAP, 100); - Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); - this.testSplitInputs(config, new File[] { large }, 100, LARGE_SIZE); - } +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; +import org.apache.hadoop.mapreduce.task.JobContextImpl; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.junit.* ; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Abstract node tuple input format tests + * + * + * + * @param <TValue> + * @param <T> + */ +public abstract class AbstractNodeTupleInputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> { + + private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleInputFormatTests.class); + + protected static final int EMPTY_SIZE = 0, SMALL_SIZE = 100, LARGE_SIZE = 10000, BAD_SIZE = 100, MIXED_SIZE = 100; + protected static final String EMPTY = "empty"; + protected static final String SMALL = "small"; + protected static final String LARGE = "large"; + protected static final String BAD = "bad"; + protected static final String MIXED = "mixed"; + + /** + * Temporary folder for the tests + */ + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + protected File empty, small, large, bad, mixed; + + /** + * Prepares the inputs for the tests + * + * @throws IOException + */ + @Before + public void beforeTest() throws IOException { + this.prepareInputs(); + } + + /** + * Cleans up the inputs after each test + */ + @After + public void afterTest() { + // Should be unnecessary since JUnit will clean up the temporary folder + // anyway but best to do this regardless + if (empty != null) + empty.delete(); + if (small != null) + small.delete(); + if (large != null) + large.delete(); + if (bad != null) + bad.delete(); + if (mixed != null) + mixed.delete(); + } + + /** + * Prepares a fresh configuration + * + * @return Configuration + */ + protected Configuration prepareConfiguration() { + Configuration config = new Configuration(true); + // Nothing else to do + return config; + } + + /** + * Prepares the inputs + * + * @throws IOException + */ + protected void prepareInputs() throws IOException { + String ext = this.getFileExtension(); + empty = folder.newFile(EMPTY + ext); + this.generateTuples(empty, EMPTY_SIZE); + small = folder.newFile(SMALL + ext); + this.generateTuples(small, SMALL_SIZE); + large = folder.newFile(LARGE + ext); + this.generateTuples(large, LARGE_SIZE); + bad = folder.newFile(BAD + ext); + this.generateBadTuples(bad, BAD_SIZE); + mixed = folder.newFile(MIXED + ext); + this.generateMixedTuples(mixed, MIXED_SIZE); + } + + /** + * Gets the extra file extension to add to the filenames + * + * @return File extension + */ + protected abstract String getFileExtension(); + + /** + * Generates tuples used for tests + * + * @param f + * File + * @param num + * Number of tuples to generate + * @throws IOException + */ + protected final void generateTuples(File f, int num) throws IOException { + this.generateTuples(this.getOutputStream(f), num); + } + + /** + * Gets the output stream to use for generating tuples + * + * @param f + * File + * @return Output Stream + * @throws IOException + */ + protected OutputStream getOutputStream(File f) throws IOException { + return new FileOutputStream(f, false); + } + + /** + * Generates tuples used for tests + * + * @param output + * Output Stream to write to + * @param num + * Number of tuples to generate + * @throws IOException + */ + protected abstract void generateTuples(OutputStream output, int num) throws IOException; + + /** + * Generates bad tuples used for tests + * + * @param f + * File + * @param num + * Number of bad tuples to generate + * @throws IOException + */ + protected final void generateBadTuples(File f, int num) throws IOException { + this.generateBadTuples(this.getOutputStream(f), num); + } + + /** + * Generates bad tuples used for tests + * + * @param output + * Output Stream to write to + * @param num + * Number of bad tuples to generate + * @throws IOException + */ + protected abstract void generateBadTuples(OutputStream output, int num) throws IOException; + + /** + * Generates a mixture of good and bad tuples used for tests + * + * @param f + * File + * @param num + * Number of tuples to generate, they should be a 50/50 mix of + * good and bad tuples + * @throws IOException + */ + protected final void generateMixedTuples(File f, int num) throws IOException { + this.generateMixedTuples(this.getOutputStream(f), num); + } + + /** + * Generates a mixture of good and bad tuples used for tests + * + * @param output + * Output Stream to write to + * @param num + * Number of tuples to generate, they should be a 50/50 mix of + * good and bad tuples + * @throws IOException + */ + protected abstract void generateMixedTuples(OutputStream output, int num) throws IOException; + + /** + * Adds an input path to the job configuration + * + * @param f + * File + * @param config + * Configuration + * @param job + * Job + * @throws IOException + */ + protected void addInputPath(File f, Configuration config, Job job) throws IOException { + FileSystem fs = FileSystem.getLocal(config); + Path inputPath = fs.makeQualified(new Path(f.getAbsolutePath())); + FileInputFormat.addInputPath(job, inputPath); + } + + protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException { + int count = 0; + + // Check initial progress + LOG.info(String.format("Initial Reported Progress %f", reader.getProgress())); + float progress = reader.getProgress(); + if (Float.compare(0.0f, progress) == 0) { + Assert.assertEquals(0.0d, reader.getProgress(), 0.0d); + } else if (Float.compare(1.0f, progress) == 0) { + // If reader is reported 1.0 straight away then we expect there to + // be no key values + Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); + Assert.assertFalse(reader.nextKeyValue()); + } else { + Assert.fail(String.format( + "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f", + progress)); + } + + // Count tuples + boolean debug = LOG.isDebugEnabled(); + while (reader.nextKeyValue()) { + count++; + progress = reader.getProgress(); + if (debug) + LOG.debug(String.format("Current Reported Progress %f", progress)); + Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress), + progress > 0.0f && progress <= 1.0f); + } + reader.close(); + LOG.info(String.format("Got %d tuples from this record reader", count)); + + // Check final progress + LOG.info(String.format("Final Reported Progress %f", reader.getProgress())); + Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); + + return count; + } + + protected final void checkTuples(RecordReader<LongWritable, T> reader, int expected) throws IOException, + InterruptedException { + Assert.assertEquals(expected, this.countTuples(reader)); + } + + /** + * Runs a test with a single input + * + * @param input + * Input + * @param expectedTuples + * Expected tuples + * @throws IOException + * @throws InterruptedException + */ + protected final void testSingleInput(File input, int expectedSplits, int expectedTuples) throws IOException, + InterruptedException { + // Prepare configuration + Configuration config = this.prepareConfiguration(); + this.testSingleInput(config, input, expectedSplits, expectedTuples); + } + + /** + * Runs a test with a single input + * + * @param config + * Configuration + * @param input + * Input + * @param expectedTuples + * Expected tuples + * @throws IOException + * @throws InterruptedException + */ + protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples) + throws IOException, InterruptedException { + // Set up fake job + InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); + Job job = Job.getInstance(config); + job.setInputFormatClass(inputFormat.getClass()); + this.addInputPath(input, job.getConfiguration(), job); + JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); + Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length); + NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE); + + // Check splits + List<InputSplit> splits = inputFormat.getSplits(context); + Assert.assertEquals(expectedSplits, splits.size()); + + // Check tuples + for (InputSplit split : splits) { + TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); + RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); + reader.initialize(split, taskContext); + this.checkTuples(reader, expectedTuples); + } + } + + protected abstract InputFormat<LongWritable, T> getInputFormat(); + + /** + * Basic tuples input test + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void single_input_01() throws IOException, InterruptedException { + testSingleInput(empty, this.canSplitInputs() ? 0 : 1, EMPTY_SIZE); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void single_input_02() throws IOException, InterruptedException { + testSingleInput(small, 1, SMALL_SIZE); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void single_input_03() throws IOException, InterruptedException { + testSingleInput(large, 1, LARGE_SIZE); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void single_input_04() throws IOException, InterruptedException { + testSingleInput(bad, 1, 0); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void single_input_05() throws IOException, InterruptedException { + // JSON-LD overrides this because in JSON-LD parsing a bad document gives no triples. + int x = single_input_05_expected() ; + testSingleInput(mixed, 1, x); + } + + /** Results exected for test single_input_05 */ + protected int single_input_05_expected() { + return MIXED_SIZE / 2 ; + } + + /** + * Tests behaviour when ignoring bad tuples is disabled + * + * @throws InterruptedException + * @throws IOException + */ + @Test(expected = IOException.class) + public final void fail_on_bad_input_01() throws IOException, InterruptedException { + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); + testSingleInput(config, bad, 1, 0); + } + + /** + * Tests behaviour when ignoring bad tuples is disabled + * + * @throws InterruptedException + * @throws IOException + */ + @Test(expected = IOException.class) + public final void fail_on_bad_input_02() throws IOException, InterruptedException { + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); + testSingleInput(config, mixed, 1, MIXED_SIZE / 2); + } + + /** + * Runs a multiple input test + * + * @param inputs + * Inputs + * @param expectedSplits + * Number of splits expected + * @param expectedTuples + * Number of tuples expected + * @throws IOException + * @throws InterruptedException + */ + protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples) throws IOException, + InterruptedException { + // Prepare configuration and inputs + Configuration config = this.prepareConfiguration(); + + // Set up fake job + InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); + Job job = Job.getInstance(config); + job.setInputFormatClass(inputFormat.getClass()); + for (File input : inputs) { + this.addInputPath(input, job.getConfiguration(), job); + } + JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); + Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); + NLineInputFormat.setNumLinesPerSplit(job, expectedTuples); + + // Check splits + List<InputSplit> splits = inputFormat.getSplits(context); + Assert.assertEquals(expectedSplits, splits.size()); + + // Check tuples + int count = 0; + for (InputSplit split : splits) { + TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); + RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); + reader.initialize(split, taskContext); + count += this.countTuples(reader); + } + Assert.assertEquals(expectedTuples, count); + } + + /** + * tuples test with multiple inputs + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void multiple_inputs_01() throws IOException, InterruptedException { + testMultipleInputs(new File[] { empty, small, large }, this.canSplitInputs() ? 2 : 3, EMPTY_SIZE + SMALL_SIZE + + LARGE_SIZE); + } + + /** + * tuples test with multiple inputs + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void multiple_inputs_02() throws IOException, InterruptedException { + int expectedTriples = multiple_inputs_02_expected() ; + testMultipleInputs(new File[] { folder.getRoot() }, this.canSplitInputs() ? 4 : 5, expectedTriples); + } + + /** Results exected for test multiple_inputs_02. + * JSON_LD has different characteristics on bad documents. + * See {@link #single_input_05}. + */ + protected int multiple_inputs_02_expected() { + return EMPTY_SIZE + SMALL_SIZE + LARGE_SIZE + (MIXED_SIZE / 2) ; + } + + protected final void testSplitInputs(Configuration config, File[] inputs, int expectedSplits, int expectedTuples) + throws IOException, InterruptedException { + // Set up fake job + InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); + Job job = Job.getInstance(config); + job.setInputFormatClass(inputFormat.getClass()); + for (File input : inputs) { + this.addInputPath(input, job.getConfiguration(), job); + } + JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); + Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); + + // Check splits + List<InputSplit> splits = inputFormat.getSplits(context); + Assert.assertEquals(expectedSplits, splits.size()); + + // Check tuples + int count = 0; + for (InputSplit split : splits) { + // Validate split + Assert.assertTrue(this.isValidSplit(split, config)); + + // Read split + TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); + RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); + reader.initialize(split, taskContext); + count += this.countTuples(reader); + } + Assert.assertEquals(expectedTuples, count); + } + + /** + * Determines whether an input split is valid + * + * @param split + * Input split + * @return True if a valid split, false otherwise + */ + protected boolean isValidSplit(InputSplit split, Configuration config) { + return split instanceof FileSplit; + } + + /** + * Indicates whether inputs can be split, defaults to true + * + * @return Whether inputs can be split + */ + protected boolean canSplitInputs() { + return true; + } + + /** + * Tests for input splitting + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void split_input_01() throws IOException, InterruptedException { + Assume.assumeTrue(this.canSplitInputs()); + + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); + this.testSplitInputs(config, new File[] { small }, 100, SMALL_SIZE); + } + + /** + * Tests for input splitting + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void split_input_02() throws IOException, InterruptedException { + Assume.assumeTrue(this.canSplitInputs()); + + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + config.setLong(NLineInputFormat.LINES_PER_MAP, 10); + Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); + this.testSplitInputs(config, new File[] { small }, 10, SMALL_SIZE); + } + + /** + * Tests for input splitting + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public final void split_input_03() throws IOException, InterruptedException { + Assume.assumeTrue(this.canSplitInputs()); + + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + config.setLong(NLineInputFormat.LINES_PER_MAP, 100); + Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); + this.testSplitInputs(config, new File[] { large }, 100, LARGE_SIZE); + } } \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java index febed20..ec2a40d 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java @@ -1,37 +1,37 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.sparql.core.Quad ; +import java.nio.charset.Charset; + +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.sparql.core.Quad ; /** * Abstract tests for Quad input formats * * */ -public abstract class AbstractQuadsInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { - +public abstract class AbstractQuadsInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { + private static final Charset utf8 = Charset.forName("utf-8"); @Override http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java index edd99c7..1f1f652 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java @@ -1,29 +1,29 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input; import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.types.TripleWritable; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.types.TripleWritable; /** * Abstract tests for Triple input formats @@ -31,8 +31,8 @@ import org.apache.jena.hadoop.rdf.types.TripleWritable; * * */ -public abstract class AbstractTriplesInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { - +public abstract class AbstractTriplesInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { + private static final Charset utf8 = Charset.forName("utf-8"); @Override @@ -45,7 +45,7 @@ public abstract class AbstractTriplesInputFormatTests extends AbstractNodeTupleI } @Override - protected void generateBadTuples(OutputStream output, int num) throws IOException { + protected void generateBadTuples(OutputStream output, int num) throws IOException { byte[] junk = "<http://broken\n".getBytes(utf8); for (int i = 0; i < num; i++) { output.write(junk); http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java index 8181148..50a468b 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java @@ -1,38 +1,38 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input; import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.query.Dataset ; -import org.apache.jena.query.DatasetFactory ; -import org.apache.jena.rdf.model.Model ; -import org.apache.jena.rdf.model.ModelFactory ; -import org.apache.jena.rdf.model.Property ; -import org.apache.jena.rdf.model.Resource ; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.query.Dataset ; +import org.apache.jena.query.DatasetFactory ; +import org.apache.jena.rdf.model.Model ; +import org.apache.jena.rdf.model.ModelFactory ; +import org.apache.jena.rdf.model.Property ; +import org.apache.jena.rdf.model.Resource ; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFDataMgr; import org.apache.jena.riot.RDFWriterRegistry; -import org.apache.jena.sparql.core.Quad ; +import org.apache.jena.sparql.core.Quad ; /** * Abstract tests for Quad input formats @@ -40,8 +40,8 @@ import org.apache.jena.sparql.core.Quad ; * * */ -public abstract class AbstractWholeFileQuadInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { - +public abstract class AbstractWholeFileQuadInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { + private static final Charset utf8 = Charset.forName("utf-8"); @Override @@ -92,7 +92,7 @@ public abstract class AbstractWholeFileQuadInputFormatTests extends AbstractNode // Write good data this.writeGoodTuples(output, num / 2); - // Write junk data + // Write junk data byte[] junk = "junk data\n".getBytes(utf8); for (int i = 0; i < num / 2; i++) { output.write(junk); @@ -103,7 +103,7 @@ public abstract class AbstractWholeFileQuadInputFormatTests extends AbstractNode } @Override - protected final void generateBadTuples(OutputStream output, int num) throws IOException { + protected final void generateBadTuples(OutputStream output, int num) throws IOException { byte[] junk = "junk data\n".getBytes(utf8); for (int i = 0; i < num; i++) { output.write(junk); http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java index 0a9d0f9..da3789a 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java @@ -1,33 +1,33 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input; import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.rdf.model.Model ; -import org.apache.jena.rdf.model.ModelFactory ; -import org.apache.jena.rdf.model.Property ; -import org.apache.jena.rdf.model.Resource ; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.rdf.model.Model ; +import org.apache.jena.rdf.model.ModelFactory ; +import org.apache.jena.rdf.model.Property ; +import org.apache.jena.rdf.model.Resource ; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFDataMgr; @@ -37,8 +37,8 @@ import org.apache.jena.riot.RDFDataMgr; * * */ -public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { - +public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { + private static final Charset utf8 = Charset.forName("utf-8"); @Override @@ -48,7 +48,7 @@ public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNo private void writeTuples(Model m, OutputStream output) { RDFDataMgr.write(output, m, this.getRdfLanguage()); - } + } /** * Gets the RDF language to write out generate tuples in @@ -69,7 +69,7 @@ public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNo } this.writeTuples(m, output); output.close(); - } + } @Override protected final void generateMixedTuples(OutputStream output, int num) throws IOException { @@ -85,7 +85,7 @@ public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNo } this.writeTuples(m, output); - // Write junk data + // Write junk data byte[] junk = "junk data\n".getBytes(utf8); for (int i = 0; i < num / 2; i++) { output.write(junk); @@ -96,7 +96,7 @@ public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNo } @Override - protected final void generateBadTuples(OutputStream output, int num) throws IOException { + protected final void generateBadTuples(OutputStream output, int num) throws IOException { byte[] junk = "junk data\n".getBytes(utf8); for (int i = 0; i < num; i++) { output.write(junk); http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java index b34aa74..1f18a95 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java @@ -1,21 +1,21 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input.compressed; import java.io.File; @@ -25,10 +25,10 @@ import java.io.OutputStream; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; -import org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests; -import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; - +import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; +import org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; + /** * @@ -37,8 +37,8 @@ import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; * @param <T> */ public abstract class AbstractCompressedNodeTupleInputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> extends - AbstractNodeTupleInputFormatTests<TValue, T> { - + AbstractNodeTupleInputFormatTests<TValue, T> { + @Override protected Configuration prepareConfiguration() { Configuration config = super.prepareConfiguration(); http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java index 06bdbf4..40f3733 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java @@ -1,29 +1,29 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input.compressed; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.sparql.core.Quad ; +import java.nio.charset.Charset; + +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.sparql.core.Quad ; /** * Abstract tests for Quad input formats @@ -32,8 +32,8 @@ import org.apache.jena.sparql.core.Quad ; * */ public abstract class AbstractCompressedQuadsInputFormatTests extends - AbstractCompressedNodeTupleInputFormatTests<Quad, QuadWritable> { - + AbstractCompressedNodeTupleInputFormatTests<Quad, QuadWritable> { + private static final Charset utf8 = Charset.forName("utf-8"); @Override http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedTriplesInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedTriplesInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedTriplesInputFormatTests.java index 675ccab..f5e3d5a 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedTriplesInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedTriplesInputFormatTests.java @@ -1,29 +1,29 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input.compressed; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.types.TripleWritable; +import java.nio.charset.Charset; + +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.types.TripleWritable; /** * Abstract tests for Triple input formats @@ -32,8 +32,8 @@ import org.apache.jena.hadoop.rdf.types.TripleWritable; * */ public abstract class AbstractCompressedTriplesInputFormatTests extends - AbstractCompressedNodeTupleInputFormatTests<Triple, TripleWritable> { - + AbstractCompressedNodeTupleInputFormatTests<Triple, TripleWritable> { + private static final Charset utf8 = Charset.forName("utf-8"); @Override http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileQuadInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileQuadInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileQuadInputFormatTests.java index ecd4616..029203b 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileQuadInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileQuadInputFormatTests.java @@ -1,45 +1,45 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.io.input.compressed; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; - +import java.nio.charset.Charset; + import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; -import org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.query.Dataset ; -import org.apache.jena.query.DatasetFactory ; -import org.apache.jena.rdf.model.Model ; -import org.apache.jena.rdf.model.ModelFactory ; -import org.apache.jena.rdf.model.Property ; -import org.apache.jena.rdf.model.Resource ; +import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; +import org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.query.Dataset ; +import org.apache.jena.query.DatasetFactory ; +import org.apache.jena.rdf.model.Model ; +import org.apache.jena.rdf.model.ModelFactory ; +import org.apache.jena.rdf.model.Property ; +import org.apache.jena.rdf.model.Resource ; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.RDFWriterRegistry; -import org.apache.jena.sparql.core.Quad ; +import org.apache.jena.riot.RDFWriterRegistry; +import org.apache.jena.sparql.core.Quad ; /** * Abstract tests for compressed whole file quad formats @@ -47,8 +47,8 @@ import org.apache.jena.sparql.core.Quad ; * */ public abstract class AbstractCompressedWholeFileQuadInputFormatTests extends - AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { - + AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { + private static final Charset utf8 = Charset.forName("utf-8"); @Override @@ -127,7 +127,7 @@ public abstract class AbstractCompressedWholeFileQuadInputFormatTests extends // Write good data this.writeGoodTuples(output, num / 2); - // Write junk data + // Write junk data byte[] junk = "junk data\n".getBytes(utf8); for (int i = 0; i < num / 2; i++) { output.write(junk); @@ -138,7 +138,7 @@ public abstract class AbstractCompressedWholeFileQuadInputFormatTests extends } @Override - protected final void generateBadTuples(OutputStream output, int num) throws IOException { + protected final void generateBadTuples(OutputStream output, int num) throws IOException { byte[] junk = "junk data\n".getBytes(utf8); for (int i = 0; i < num; i++) { output.write(junk); http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java index a55729c..f6454ea 100644 --- a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java +++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java @@ -1,143 +1,143 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.compressed; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.Charset; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; -import org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.rdf.model.Model ; -import org.apache.jena.rdf.model.ModelFactory ; -import org.apache.jena.rdf.model.Property ; -import org.apache.jena.rdf.model.Resource ; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFDataMgr; - -/** - * Abstract tests for compressed whole file triple formats - * - * - */ -public abstract class AbstractCompressedWholeFileTripleInputFormatTests extends - AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { - - private static final Charset utf8 = Charset.forName("utf-8"); - - @Override - protected Configuration prepareConfiguration() { - Configuration config = super.prepareConfiguration(); - config.set(HadoopIOConstants.IO_COMPRESSION_CODECS, this.getCompressionCodec().getClass().getCanonicalName()); - return config; - } - - @Override - protected OutputStream getOutputStream(File f) throws IOException { - CompressionCodec codec = this.getCompressionCodec(); - if (codec instanceof Configurable) { - ((Configurable) codec).setConf(this.prepareConfiguration()); - } - FileOutputStream fileOutput = new FileOutputStream(f, false); - return codec.createOutputStream(fileOutput); - } - - /** - * Gets the compression codec to use - * - * @return Compression codec - */ - protected abstract CompressionCodec getCompressionCodec(); - - /** - * Indicates whether inputs can be split, defaults to false for compressed - * input tests - */ - @Override - protected boolean canSplitInputs() { - return false; - } - - private void writeTuples(Model m, OutputStream output) { - RDFDataMgr.write(output, m, this.getRdfLanguage()); - } - - /** - * Gets the RDF language to write out generated tuples in - * - * @return RDF language - */ - protected abstract Lang getRdfLanguage(); - - @Override - protected final void generateTuples(OutputStream output, int num) throws IOException { - Model m = ModelFactory.createDefaultModel(); - Resource currSubj = m.createResource("http://example.org/subjects/0"); - Property predicate = m.createProperty("http://example.org/predicate"); - for (int i = 0; i < num; i++) { - if (i % 10 == 0) { - currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); - } - m.add(currSubj, predicate, m.createTypedLiteral(i)); - } - this.writeTuples(m, output); - output.close(); - } - - @Override - protected final void generateMixedTuples(OutputStream output, int num) throws IOException { - // Write good data - Model m = ModelFactory.createDefaultModel(); - Resource currSubj = m.createResource("http://example.org/subjects/0"); - Property predicate = m.createProperty("http://example.org/predicate"); - for (int i = 0; i < num / 2; i++) { - if (i % 10 == 0) { - currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); - } - m.add(currSubj, predicate, m.createTypedLiteral(i)); - } - this.writeTuples(m, output); - - // Write junk data - byte[] junk = "junk data\n".getBytes(utf8); - for (int i = 0; i < num / 2; i++) { - output.write(junk); - } - - output.flush(); - output.close(); - } - - @Override - protected final void generateBadTuples(OutputStream output, int num) throws IOException { - byte[] junk = "junk data\n".getBytes(utf8); - for (int i = 0; i < num; i++) { - output.write(junk); - } - output.flush(); - output.close(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input.compressed; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; +import org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.rdf.model.Model ; +import org.apache.jena.rdf.model.ModelFactory ; +import org.apache.jena.rdf.model.Property ; +import org.apache.jena.rdf.model.Resource ; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; + +/** + * Abstract tests for compressed whole file triple formats + * + * + */ +public abstract class AbstractCompressedWholeFileTripleInputFormatTests extends + AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { + + private static final Charset utf8 = Charset.forName("utf-8"); + + @Override + protected Configuration prepareConfiguration() { + Configuration config = super.prepareConfiguration(); + config.set(HadoopIOConstants.IO_COMPRESSION_CODECS, this.getCompressionCodec().getClass().getCanonicalName()); + return config; + } + + @Override + protected OutputStream getOutputStream(File f) throws IOException { + CompressionCodec codec = this.getCompressionCodec(); + if (codec instanceof Configurable) { + ((Configurable) codec).setConf(this.prepareConfiguration()); + } + FileOutputStream fileOutput = new FileOutputStream(f, false); + return codec.createOutputStream(fileOutput); + } + + /** + * Gets the compression codec to use + * + * @return Compression codec + */ + protected abstract CompressionCodec getCompressionCodec(); + + /** + * Indicates whether inputs can be split, defaults to false for compressed + * input tests + */ + @Override + protected boolean canSplitInputs() { + return false; + } + + private void writeTuples(Model m, OutputStream output) { + RDFDataMgr.write(output, m, this.getRdfLanguage()); + } + + /** + * Gets the RDF language to write out generated tuples in + * + * @return RDF language + */ + protected abstract Lang getRdfLanguage(); + + @Override + protected final void generateTuples(OutputStream output, int num) throws IOException { + Model m = ModelFactory.createDefaultModel(); + Resource currSubj = m.createResource("http://example.org/subjects/0"); + Property predicate = m.createProperty("http://example.org/predicate"); + for (int i = 0; i < num; i++) { + if (i % 10 == 0) { + currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); + } + m.add(currSubj, predicate, m.createTypedLiteral(i)); + } + this.writeTuples(m, output); + output.close(); + } + + @Override + protected final void generateMixedTuples(OutputStream output, int num) throws IOException { + // Write good data + Model m = ModelFactory.createDefaultModel(); + Resource currSubj = m.createResource("http://example.org/subjects/0"); + Property predicate = m.createProperty("http://example.org/predicate"); + for (int i = 0; i < num / 2; i++) { + if (i % 10 == 0) { + currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); + } + m.add(currSubj, predicate, m.createTypedLiteral(i)); + } + this.writeTuples(m, output); + + // Write junk data + byte[] junk = "junk data\n".getBytes(utf8); + for (int i = 0; i < num / 2; i++) { + output.write(junk); + } + + output.flush(); + output.close(); + } + + @Override + protected final void generateBadTuples(OutputStream output, int num) throws IOException { + byte[] junk = "junk data\n".getBytes(utf8); + for (int i = 0; i < num; i++) { + output.write(junk); + } + output.flush(); + output.close(); + } +}
