http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputFormat.java deleted file mode 100644 index b60380d..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputFormat.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.thrift; - -import java.io.IOException; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; -import org.apache.jena.hadoop.rdf.io.input.readers.thrift.ThriftTripleReader; -import org.apache.jena.hadoop.rdf.types.TripleWritable; - -public class ThriftTripleInputFormat extends AbstractWholeFileInputFormat<LongWritable, TripleWritable> { - - @Override - public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context) - throws IOException, InterruptedException { - return new ThriftTripleReader(); - } - -}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java deleted file mode 100644 index 0b36e93..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.trig; - -import java.io.IOException; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; -import org.apache.jena.hadoop.rdf.io.input.readers.trig.TriGReader; -import org.apache.jena.hadoop.rdf.types.QuadWritable; - - -/** - * Input format for TriG - * - * - * - */ -public class TriGInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> { - - @Override - public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context) - throws IOException, InterruptedException { - return new TriGReader(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java deleted file mode 100644 index 723c5c3..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.trix; - -import java.io.IOException; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; -import org.apache.jena.hadoop.rdf.io.input.readers.trix.TriXReader; -import org.apache.jena.hadoop.rdf.types.QuadWritable; - -/** - * Input format for TriX - */ -public class TriXInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> { - - @Override - public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context) - throws IOException, InterruptedException { - return new TriXReader(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java deleted file mode 100644 index c7771b6..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.turtle; - -import java.io.IOException; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; -import org.apache.jena.hadoop.rdf.io.input.readers.turtle.TurtleReader; -import org.apache.jena.hadoop.rdf.types.TripleWritable; - - -/** - * Turtle input format - * - * - * - */ -public class TurtleInputFormat extends AbstractWholeFileInputFormat<LongWritable, TripleWritable> { - - @Override - public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context) - throws IOException, InterruptedException { - return new TurtleReader(); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java deleted file mode 100644 index a9e692e..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.util; - -import java.io.IOException; -import java.io.InputStream; - -/** - * A block input stream which can is a wrapper around another input stream which - * restricts reading to a specific number of bytes and can report the number of - * bytes read - * <p> - * The class assumes that the underlying input stream has already been seeked to - * the appropriate start point - * </p> - * - * - * - */ -public final class BlockInputStream extends TrackedInputStream { - - private long limit = Long.MAX_VALUE; - - /** - * Creates a new tracked input stream - * - * @param input - * Input stream to track - * @param limit - * Maximum number of bytes to read from the stream - */ - public BlockInputStream(InputStream input, long limit) { - super(input); - if (limit < 0) - throw new IllegalArgumentException("limit must be >= 0"); - this.limit = limit; - } - - @Override - public int read() throws IOException { - if (this.bytesRead >= this.limit) { - return -1; - } - return super.read(); - } - - @Override - public int available() throws IOException { - if (this.bytesRead >= this.limit) { - return 0; - } - return super.available(); - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - if (len == 0) { - return 0; - } else if (this.bytesRead >= this.limit) { - return -1; - } else if (len > this.limit - this.bytesRead) { - len = (int) (this.limit - this.bytesRead); - } - return super.read(b, off, len); - } - - @Override - public long skip(long n) throws IOException { - if (n == 0) { - return 0; - } else if (this.bytesRead >= this.limit) { - return -1; - } else if (n > this.limit - this.bytesRead) { - n = this.limit - this.bytesRead; - } - return super.skip(n); - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java deleted file mode 100644 index 372b22c..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.util; - -import java.util.UUID; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.jena.hadoop.rdf.io.RdfIOConstants; -import org.apache.jena.riot.lang.LabelToNode; -import org.apache.jena.riot.system.ErrorHandlerFactory; -import org.apache.jena.riot.system.IRIResolver; -import org.apache.jena.riot.system.ParserProfile; -import org.apache.jena.riot.system.ParserProfileBase; -import org.apache.jena.riot.system.Prologue; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * RDF IO utility functions - * - * - * - */ -public class RdfIOUtils { - private static final Logger LOGGER = LoggerFactory.getLogger(RdfIOUtils.class); - - /** - * Private constructor prevents instantiation - */ - private RdfIOUtils() { - } - - /** - * Creates a parser profile for the given job context - * - * @param context - * Context - * @param path - * File path - * @return Parser profile - */ - public static ParserProfile createParserProfile(JobContext context, Path path) { - Prologue prologue = new Prologue(null, IRIResolver.createNoResolve()); - UUID seed = RdfIOUtils.getSeed(context, path); - LabelToNode labelMapping = LabelToNode.createScopeByDocumentHash(seed); - return new ParserProfileBase(prologue, ErrorHandlerFactory.errorHandlerStd, labelMapping); - } - - /** - * Selects a seed for use in generating blank node identifiers - * - * @param context - * Job Context - * @param path - * File path - * @return Seed - */ - public static UUID getSeed(JobContext context, Path path) { - // This is to ensure that blank node allocation policy is constant when - // subsequent MapReduce jobs need that - String jobId = context.getJobID().toString(); - if (jobId == null) { - jobId = String.valueOf(System.currentTimeMillis()); - LOGGER.warn( - "Job ID was not set, using current milliseconds of {}. Sequence of MapReduce jobs must carefully handle blank nodes.", - jobId); - } - - if (!context.getConfiguration().getBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false)) { - // Using normal file scoped blank node allocation - LOGGER.debug("Generating Blank Node Seed from Job Details (ID={}, Input Path={})", jobId, path); - - // Form a reproducible seed for the run - return new UUID(jobId.hashCode(), path.hashCode()); - } else { - // Using globally scoped blank node allocation - LOGGER.warn( - "Using globally scoped blank node allocation policy from Job Details (ID={}) - this is unsafe if your RDF inputs did not originate from a previous job", - jobId); - - return new UUID(jobId.hashCode(), 0); - } - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java deleted file mode 100644 index 92e2df5..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.util; - -import java.io.InputStream; - -/** - * An input stream that tracks the number of bytes read - * - * - * - */ -public abstract class TrackableInputStream extends InputStream { - - /** - * Gets the number of bytes read - * - * @return Number of bytes read - */ - public abstract long getBytesRead(); - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java deleted file mode 100644 index e51a866..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.util; - -import java.io.IOException; -import java.io.InputStream; - -/** - * A tracked input stream which can is a wrapper around another input stream and - * can report the number of bytes read - * - * - * - */ -public class TrackedInputStream extends TrackableInputStream { - - protected InputStream input; - protected long bytesRead = 0, lastMark; - - /** - * Creates a new tracked input stream - * - * @param input - * Input stream to track - */ - public TrackedInputStream(InputStream input) { - if (input == null) - throw new NullPointerException("Input cannot be null"); - this.input = input; - } - - @Override - public int read() throws IOException { - int read = this.input.read(); - if (read >= 0) - this.bytesRead++; - return read; - } - - @Override - public long getBytesRead() { - return this.bytesRead; - } - - @Override - public void close() throws IOException { - this.input.close(); - } - - @Override - public int available() throws IOException { - return this.input.available(); - } - - @Override - public synchronized void mark(int readlimit) { - this.input.mark(readlimit); - this.lastMark = this.bytesRead; - } - - @Override - public boolean markSupported() { - return this.input.markSupported(); - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - if (len == 0) return 0; - int read = this.input.read(b, off, len); - if (read > 0) - this.bytesRead += read; - return read; - } - - @Override - public int read(byte[] b) throws IOException { - return this.read(b, 0, b.length); - } - - @Override - public synchronized void reset() throws IOException { - this.input.reset(); - this.bytesRead = this.lastMark; - } - - @Override - public long skip(long n) throws IOException { - if (n == 0) - return 0; - long skipped = 0; - byte[] buffer = new byte[16]; - int readSize = Math.min(buffer.length, n > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) n); - int read; - do { - if (n - skipped > readSize) { - read = this.input.read(buffer, 0, readSize); - } else { - read = this.input.read(buffer, 0, (int) (n - skipped)); - } - if (read > 0) { - this.bytesRead += read; - skipped += read; - } - } while (skipped < n && read >= 0); - - return skipped; - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java deleted file mode 100644 index 845c709..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.util; - -import org.apache.jena.riot.lang.PipedRDFIterator; - -import com.hp.hpl.jena.graph.Triple; -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * A tracked piped quads stream - * - * - * - */ -public class TrackedPipedQuadsStream extends TrackedPipedRDFStream<Quad> { - - /** - * Creates a new stream - * - * @param sink - * Sink - * @param input - * Input stream - */ - public TrackedPipedQuadsStream(PipedRDFIterator<Quad> sink, TrackableInputStream input) { - super(sink, input); - } - - @Override - public void triple(Triple triple) { - // Triples are discarded - } - - @Override - public void quad(Quad quad) { - this.receive(quad); - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java deleted file mode 100644 index 6e910be..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.util; - -import java.util.LinkedList; -import java.util.Queue; - -import org.apache.jena.riot.lang.PipedRDFIterator; -import org.apache.jena.riot.lang.PipedRDFStream; - -/** - * A tracked piped RDF stream - * - * - * - * @param <T> - * Type corresponding to a supported RDF primitive - */ -public abstract class TrackedPipedRDFStream<T> extends PipedRDFStream<T> { - - private TrackableInputStream input; - private Queue<Long> positions = new LinkedList<Long>(); - - protected TrackedPipedRDFStream(PipedRDFIterator<T> sink, TrackableInputStream input) { - super(sink); - this.input = input; - } - - @Override - protected void receive(T t) { - // Track positions the input stream is at as we receive inputs - synchronized (this.positions) { - this.positions.add(this.input.getBytesRead()); - } - super.receive(t); - } - - /** - * Gets the next position - * - * @return Position - */ - public Long getPosition() { - synchronized (this.positions) { - return this.positions.poll(); - } - } -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java deleted file mode 100644 index 2040c4f..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.input.util; - -import org.apache.jena.riot.lang.PipedRDFIterator; - -import com.hp.hpl.jena.graph.Triple; -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * A tracked piped triples stream - * - * - * - */ -public class TrackedPipedTriplesStream extends TrackedPipedRDFStream<Triple> { - - /** - * Creates a tracked triples stream - * - * @param sink - * Sink - * @param input - * Input stream - */ - public TrackedPipedTriplesStream(PipedRDFIterator<Triple> sink, TrackableInputStream input) { - super(sink, input); - } - - @Override - public void triple(Triple triple) { - receive(triple); - } - - @Override - public void quad(Quad quad) { - // Quads are discarded - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java deleted file mode 100644 index 02fbf9c..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.RdfIOConstants; -import org.apache.jena.hadoop.rdf.io.output.writers.AbstractBatchedNodeTupleWriter; -import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; - - -/** - * Abstract output format for formats that use a - * {@link AbstractBatchedNodeTupleWriter} as their writer - * - * - * - * @param <TKey> - * Key type - * @param <TTuple> - * Tuple type - * @param <TValue> - * Writable tuple type i.e. the value type - */ -public abstract class AbstractBatchedNodeTupleOutputFormat<TKey, TTuple, TValue extends AbstractNodeTupleWritable<TTuple>> extends - AbstractNodeTupleOutputFormat<TKey, TTuple, TValue> { - - @Override - protected RecordWriter<TKey, TValue> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - long batchSize = config.getLong(RdfIOConstants.OUTPUT_BATCH_SIZE, RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE); - return this.getRecordWriter(writer, batchSize); - } - - protected abstract RecordWriter<TKey, TValue> getRecordWriter(Writer writer, long batchSize); - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java deleted file mode 100644 index cfc98bd..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output; - -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * Abstract output format which takes pairs with Node keys and arbitrary values - * and writes them as a simple line based text file - * - * - * - * @param <TValue> Value type - */ -public abstract class AbstractNodeOutputFormat<TValue> extends FileOutputFormat<NodeWritable, TValue> { - - private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeOutputFormat.class); - - @Override - public RecordWriter<NodeWritable, TValue> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - Configuration config = context.getConfiguration(); - boolean isCompressed = getCompressOutput(context); - CompressionCodec codec = null; - String extension = this.getFileExtension(); - if (isCompressed) { - Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); - codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config); - extension += codec.getDefaultExtension(); - } - Path file = getDefaultWorkFile(context, extension); - LOG.info("Writing output to file " + file); - FileSystem fs = file.getFileSystem(config); - if (!isCompressed) { - FSDataOutputStream fileOut = fs.create(file, false); - return this.getRecordWriter(new OutputStreamWriter(fileOut), config); - } else { - FSDataOutputStream fileOut = fs.create(file, false); - return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config); - } - } - - /** - * Gets the file extension to use for output - * - * @return File extension including the '.' - */ - protected String getFileExtension() { - return ".nodes"; - } - - /** - * Gets the record writer to use - * - * @param writer - * Writer to write output to - * @param config - * Configuration - * @return Record writer - */ - protected abstract RecordWriter<NodeWritable, TValue> getRecordWriter(Writer writer, Configuration config); -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java deleted file mode 100644 index c4a34f5..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output; - -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * An abstract implementation of an output format for line based tuple formats - * where the key is ignored and only the tuple values will be output - * - * - * @param <TKey> - * Key type - * @param <TValue> - * Tuple value type - * @param <T> - * Writable node tuple type - * - */ -public abstract class AbstractNodeTupleOutputFormat<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends - FileOutputFormat<TKey, T> { - - private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleOutputFormat.class); - - @Override - public RecordWriter<TKey, T> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - Configuration config = context.getConfiguration(); - boolean isCompressed = getCompressOutput(context); - CompressionCodec codec = null; - - // Build the output file path - String extension = this.getFileExtension(); - if (isCompressed) { - // Add compression extension if applicable - Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); - codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config); - extension += codec.getDefaultExtension(); - } - Path file = getDefaultWorkFile(context, extension); - LOG.info("Writing output to file " + file); - - // Open the file appropriately and create a record writer for it - FileSystem fs = file.getFileSystem(config); - if (!isCompressed) { - FSDataOutputStream fileOut = fs.create(file, false); - return this.getRecordWriter(new OutputStreamWriter(fileOut), config, file); - } else { - FSDataOutputStream fileOut = fs.create(file, false); - return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config, file); - } - } - - /** - * Gets the file extension to use for output - * - * @return File extension including the '.' - */ - protected abstract String getFileExtension(); - - /** - * Gets the record writer to use - * - * @param writer - * Writer to write output to - * @param config - * Configuration - * @param outputPath - * Output path being written to - * @return Record writer - * @throws IOException - * May be thrown if a record writer cannot be obtained for any - * reason - */ - protected abstract RecordWriter<TKey, T> getRecordWriter(Writer writer, Configuration config, Path outputPath) - throws IOException; - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java deleted file mode 100644 index 30999ae..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; -import org.apache.jena.riot.system.StreamRDF; - -/** - * Abstract output format for formats that use the RIOT {@link StreamRDF} API to - * stream the writes - * - * @param <TKey> - * Key type - * @param <TTuple> - * Tuple type - * @param <TValue> - * Writable tuple type i.e. the value type - */ -public abstract class AbstractStreamRdfNodeTupleOutputFormat<TKey, TTuple, TValue extends AbstractNodeTupleWritable<TTuple>> - extends AbstractNodeTupleOutputFormat<TKey, TTuple, TValue> { - - @Override - protected RecordWriter<TKey, TValue> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - return getRecordWriter(getStream(writer, config), writer, config); - } - - /** - * Gets a writer which provides a bridge between the {@link RecordWriter} - * and {@link StreamRDF} APIs - * - * @param stream - * RDF Stream - * @param writer - * Writer - * @param config - * Configuration - * @return Record Writer - */ - protected abstract RecordWriter<TKey, TValue> getRecordWriter(StreamRDF stream, Writer writer, Configuration config); - - /** - * Gets a {@link StreamRDF} to which the tuples to be output should be - * passed - * - * @param writer - * Writer - * @param config - * Configuration - * @return RDF Stream - */ - protected abstract StreamRDF getStream(Writer writer, Configuration config); -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/QuadsOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/QuadsOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/QuadsOutputFormat.java deleted file mode 100644 index cc9fe2f..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/QuadsOutputFormat.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.output; - -import java.io.IOException; -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.registry.HadoopRdfIORegistry; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFLanguages; - -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * An output format for RDF quads that dynamically selects the appropriate quad - * writer to use based on the file extension of the output file. - * <p> - * For example this is useful when the output format may be controlled by a user - * supplied filename i.e. the desired RDF output format is not precisely known - * in advance - * </p> - * - * @param <TKey> - * Key type - */ -public abstract class QuadsOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) - throws IOException { - Lang lang = RDFLanguages.filenameToLang(outputPath.getName()); - if (lang == null) - throw new IOException("There is no registered RDF language for the output file " + outputPath.toString()); - - if (!RDFLanguages.isQuads(lang)) - throw new IOException( - lang.getName() - + " is not a RDF quads format, perhaps you wanted TriplesOutputFormat or TriplesOrQuadsOutputFormat instead?"); - - // This will throw an appropriate error if the language does not support - // writing quads - return HadoopRdfIORegistry.<TKey> createQuadWriter(lang, writer, config); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOrQuadsOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOrQuadsOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOrQuadsOutputFormat.java deleted file mode 100644 index 3eaf0d7..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOrQuadsOutputFormat.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.output; - -import java.io.IOException; -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.writers.QuadsToTriplesWriter; -import org.apache.jena.hadoop.rdf.io.registry.HadoopRdfIORegistry; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFLanguages; - -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * An output format for RDF triples/quads that dynamically selects the - * appropriate triple/quad writer to use based on the file extension of the - * output file. - * <p> - * For example this is useful when the output format may be controlled by a user - * supplied filename i.e. the desired RDF output format is not precisely known - * in advance. - * </p> - * <h3>Warning</h3> - * <p> - * Where the format is determined to be triples the quads are converted into - * triples are thus will lose any graph information that might be carried. - * </p> - * - * @param <TKey> - * Key type - */ -public abstract class TriplesOrQuadsOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) - throws IOException { - Lang lang = RDFLanguages.filenameToLang(outputPath.getName()); - if (lang == null) - throw new IOException("There is no registered RDF language for the output file " + outputPath.toString()); - - if (!RDFLanguages.isQuads(lang) && !RDFLanguages.isTriples(lang)) - throw new IOException(lang.getName() + " is not a RDF triples/quads format"); - - if (HadoopRdfIORegistry.hasQuadWriter(lang)) { - // Supports quads directly - return HadoopRdfIORegistry.<TKey> createQuadWriter(lang, writer, config); - } else { - // Try to create a triples writer and wrap downwards from quads - // This will throw an error if a triple writer is not available - return new QuadsToTriplesWriter<TKey>(HadoopRdfIORegistry.<TKey> createTripleWriter(lang, writer, config)); - } - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOutputFormat.java deleted file mode 100644 index d9d4189..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriplesOutputFormat.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jena.hadoop.rdf.io.output; - -import java.io.IOException; -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.registry.HadoopRdfIORegistry; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFLanguages; - -import com.hp.hpl.jena.graph.Triple; - -/** - * An output format for RDF triples that dynamically selects the appropriate triple - * writer to use based on the file extension of the output file. - * <p> - * For example this is useful when the output format may be controlled by a user - * supplied filename i.e. the desired RDF output format is not precisely known - * in advance - * </p> - * - * @param <TKey> - * Key type - */ -public abstract class TriplesOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> { - - @Override - protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) throws IOException { - Lang lang = RDFLanguages.filenameToLang(outputPath.getName()); - if (lang == null) - throw new IOException("There is no registered RDF language for the output file " + outputPath.toString()); - - if (!RDFLanguages.isTriples(lang)) throw new IOException( - lang.getName() - + " is not a RDF triples format, perhaps you wanted QuadsOutputFormat or TriplesOrQuadsOutputFormat instead?"); - - // This will throw an appropriate error if the language does not support writing triples - return HadoopRdfIORegistry.<TKey>createTripleWriter(lang, writer, config); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDQuadOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDQuadOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDQuadOutputFormat.java deleted file mode 100644 index 8f4797a..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDQuadOutputFormat.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.jsonld; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.jsonld.JsonLDQuadWriter; -import org.apache.jena.hadoop.rdf.types.QuadWritable; - -import com.hp.hpl.jena.sparql.core.Quad; - -public class JsonLDQuadOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected String getFileExtension() { - return ".jsonld"; - } - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - return new JsonLDQuadWriter<TKey>(writer); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDTripleOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDTripleOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDTripleOutputFormat.java deleted file mode 100644 index a8cbeac..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/jsonld/JsonLDTripleOutputFormat.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.jsonld; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.jsonld.JsonLDTripleWriter; -import org.apache.jena.hadoop.rdf.types.TripleWritable; - -import com.hp.hpl.jena.graph.Triple; - -public class JsonLDTripleOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> { - - @Override - protected String getFileExtension() { - return ".jsonld"; - } - - @Override - protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - return new JsonLDTripleWriter<TKey>(writer); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java deleted file mode 100644 index a8ab017..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.nquads; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.nquads.NQuadsWriter; -import org.apache.jena.hadoop.rdf.types.QuadWritable; - -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * NQuads output format - * - * - * - * @param <TKey> - * Key type - */ -public class NQuadsOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - return new NQuadsWriter<TKey>(writer); - } - - @Override - protected String getFileExtension() { - return ".nq"; - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java deleted file mode 100644 index 56935bb..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.ntriples; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractNodeOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.ntriples.NTriplesNodeWriter; -import org.apache.jena.hadoop.rdf.types.NodeWritable; - - -/** - * NTriples based node output format - * - * - * - * @param <TValue> - * Value type - */ -public class NTriplesNodeOutputFormat<TValue> extends AbstractNodeOutputFormat<TValue> { - - @Override - protected RecordWriter<NodeWritable, TValue> getRecordWriter(Writer writer, Configuration config) { - return new NTriplesNodeWriter<TValue>(writer); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java deleted file mode 100644 index 51b9b75..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.ntriples; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.ntriples.NTriplesWriter; -import org.apache.jena.hadoop.rdf.types.TripleWritable; - -import com.hp.hpl.jena.graph.Triple; - -/** - * NTriples output format - * - * - * @param <TKey> - * - */ -public class NTriplesOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> { - - @Override - protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - return new NTriplesWriter<TKey>(writer); - } - - @Override - protected String getFileExtension() { - return ".nt"; - } - - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java deleted file mode 100644 index e5fa114..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.rdfjson; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.rdfjson.RdfJsonWriter; -import org.apache.jena.hadoop.rdf.types.TripleWritable; - -import com.hp.hpl.jena.graph.Triple; - -/** - * RDF/JSON output format - * - * - * - * @param <TKey> - * Key type - */ -public class RdfJsonOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> { - - @Override - protected String getFileExtension() { - return ".rj"; - } - - @Override - protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - return new RdfJsonWriter<TKey>(writer); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java deleted file mode 100644 index 6c9a9ea..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.rdfxml; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.rdfxml.RdfXmlWriter; -import org.apache.jena.hadoop.rdf.types.TripleWritable; - -import com.hp.hpl.jena.graph.Triple; - -/** - * RDF/XML output format - * - * - * - * @param <TKey> - * Key type - */ -public class RdfXmlOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> { - - @Override - protected String getFileExtension() { - return ".rdf"; - } - - @Override - protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config, Path outputPath) { - return new RdfXmlWriter<TKey>(writer); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputFormat.java deleted file mode 100644 index bd07bff..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftQuadOutputFormat.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.thrift; - -import java.io.Writer; -import java.nio.charset.Charset; - -import org.apache.commons.io.output.WriterOutputStream; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractStreamRdfNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfQuadWriter; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.riot.system.StreamRDF; -import org.apache.jena.riot.thrift.StreamRDF2Thrift; -import com.hp.hpl.jena.sparql.core.Quad; - -public class ThriftQuadOutputFormat<TKey> extends AbstractStreamRdfNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected String getFileExtension() { - return ".trdf"; - } - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(StreamRDF stream, Writer writer, Configuration config) { - return new StreamRdfQuadWriter<TKey>(stream, writer); - } - - @Override - protected StreamRDF getStream(Writer writer, Configuration config) { - return new StreamRDF2Thrift(new WriterOutputStream(writer, Charset.forName("utf-8")), false); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputFormat.java deleted file mode 100644 index 73e40bc..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/thrift/ThriftTripleOutputFormat.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.thrift; - -import java.io.Writer; -import java.nio.charset.Charset; - -import org.apache.commons.io.output.WriterOutputStream; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractStreamRdfNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfTripleWriter; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.riot.system.StreamRDF; -import org.apache.jena.riot.thrift.StreamRDF2Thrift; - -import com.hp.hpl.jena.graph.Triple; - -public class ThriftTripleOutputFormat<TKey> extends AbstractStreamRdfNodeTupleOutputFormat<TKey, Triple, TripleWritable> { - - @Override - protected String getFileExtension() { - return ".trdf"; - } - - @Override - protected RecordWriter<TKey, TripleWritable> getRecordWriter(StreamRDF stream, Writer writer, Configuration config) { - return new StreamRdfTripleWriter<TKey>(stream, writer); - } - - @Override - protected StreamRDF getStream(Writer writer, Configuration config) { - return new StreamRDF2Thrift(new WriterOutputStream(writer, Charset.forName("utf-8")), false); - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java deleted file mode 100644 index 6f33e29..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.trig; - -import java.io.Writer; - -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractBatchedNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.trig.BatchedTriGWriter; -import org.apache.jena.hadoop.rdf.types.QuadWritable; - -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * Output format for TriG that uses a batched approach, note that this will - * produce invalid data where blank nodes span batches so it is typically better - * to use the {@link TriGOutputFormat} instead - * - * - * - * @param <TKey> - * Key type - */ -public class BatchedTriGOutputFormat<TKey> extends - AbstractBatchedNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, - long batchSize) { - return new BatchedTriGWriter<TKey>(writer, batchSize); - } - - @Override - protected String getFileExtension() { - return ".trig"; - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGOutputFormat.java deleted file mode 100644 index 0047095..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGOutputFormat.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.trig; - -import java.io.Writer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractStreamRdfNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfQuadWriter; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.riot.system.StreamRDF; -import org.apache.jena.riot.writer.WriterStreamRDFBlocks; - -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * Output format for TriG - * - * - * - * @param <TKey> - * Key type - */ -public class TriGOutputFormat<TKey> extends AbstractStreamRdfNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(StreamRDF stream, Writer writer, Configuration config) { - return new StreamRdfQuadWriter<TKey>(stream, writer); - } - - @Override - protected StreamRDF getStream(Writer writer, Configuration config) { - return new WriterStreamRDFBlocks(writer); - } - - @Override - protected String getFileExtension() { - return ".trig"; - } - -} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputFormat.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputFormat.java deleted file mode 100644 index c67b3da..0000000 --- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputFormat.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.io.output.trix; - -import java.io.Writer; -import java.nio.charset.Charset; - -import org.apache.commons.io.output.WriterOutputStream; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.jena.hadoop.rdf.io.output.AbstractStreamRdfNodeTupleOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfQuadWriter; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.riot.system.StreamRDF; -import org.apache.jena.riot.writer.StreamWriterTriX; -import com.hp.hpl.jena.sparql.core.Quad; - -/** - * Output format for TriX - * - * @param <TKey> - * Key type - */ -public class TriXOutputFormat<TKey> extends AbstractStreamRdfNodeTupleOutputFormat<TKey, Quad, QuadWritable> { - - @Override - protected RecordWriter<TKey, QuadWritable> getRecordWriter(StreamRDF stream, Writer writer, Configuration config) { - return new StreamRdfQuadWriter<TKey>(stream, writer); - } - - @Override - protected StreamRDF getStream(Writer writer, Configuration config) { - return new StreamWriterTriX(new WriterOutputStream(writer, Charset.forName("utf-8"))); - } - - @Override - protected String getFileExtension() { - return ".trix"; - } - -}
