http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/turtle/TurtleReader.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/turtle/TurtleReader.java index b3fb377,b3fb377..b0417f6 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/turtle/TurtleReader.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/turtle/TurtleReader.java @@@ -1,24 -1,24 +1,24 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.readers.turtle; --import org.apache.jena.hadoop.rdf.io.input.readers.AbstractWholeFileTripleReader; ++import org.apache.jena.hadoop.rdf.io.input.readers.AbstractWholeFileTripleReader; import org.apache.jena.riot.Lang; /**
http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java index e9785ea,e9785ea..e4b4c40 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trig/TriGInputFormat.java @@@ -1,30 -1,30 +1,30 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.trig; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; --import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; --import org.apache.jena.hadoop.rdf.io.input.readers.trig.TriGReader; --import org.apache.jena.hadoop.rdf.types.QuadWritable; ++import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; ++import org.apache.jena.hadoop.rdf.io.input.readers.trig.TriGReader; ++import org.apache.jena.hadoop.rdf.types.QuadWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java index 1759958,1759958..9f11fe9 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/trix/TriXInputFormat.java @@@ -1,39 -1,39 +1,39 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --package org.apache.jena.hadoop.rdf.io.input.trix; -- --import org.apache.hadoop.io.LongWritable; --import org.apache.hadoop.mapreduce.InputSplit; --import org.apache.hadoop.mapreduce.RecordReader; --import org.apache.hadoop.mapreduce.TaskAttemptContext; --import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; --import org.apache.jena.hadoop.rdf.io.input.readers.trix.TriXReader; --import org.apache.jena.hadoop.rdf.types.QuadWritable; -- --/** -- * Input format for TriX -- */ --public class TriXInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> { -- -- @Override -- public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context) { -- return new TriXReader(); -- } -- --} ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.jena.hadoop.rdf.io.input.trix; ++ ++import org.apache.hadoop.io.LongWritable; ++import org.apache.hadoop.mapreduce.InputSplit; ++import org.apache.hadoop.mapreduce.RecordReader; ++import org.apache.hadoop.mapreduce.TaskAttemptContext; ++import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; ++import org.apache.jena.hadoop.rdf.io.input.readers.trix.TriXReader; ++import org.apache.jena.hadoop.rdf.types.QuadWritable; ++ ++/** ++ * Input format for TriX ++ */ ++public class TriXInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> { ++ ++ @Override ++ public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context) { ++ return new TriXReader(); ++ } ++ ++} http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java index 2734f24,2734f24..983b21e --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/turtle/TurtleInputFormat.java @@@ -1,30 -1,30 +1,30 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.turtle; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; --import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; --import org.apache.jena.hadoop.rdf.io.input.readers.turtle.TurtleReader; --import org.apache.jena.hadoop.rdf.types.TripleWritable; ++import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat; ++import org.apache.jena.hadoop.rdf.io.input.readers.turtle.TurtleReader; ++import org.apache.jena.hadoop.rdf.types.TripleWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java index a9e692e,a9e692e..9dd4ccd --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java @@@ -1,21 -1,21 +1,21 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.util; import java.io.IOException; http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java index 26fe661,26fe661..6ba14d7 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java @@@ -1,97 -1,97 +1,97 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --package org.apache.jena.hadoop.rdf.io.input.util; -- --import java.util.UUID; -- --import org.apache.hadoop.fs.Path; --import org.apache.hadoop.mapreduce.JobContext; --import org.apache.jena.hadoop.rdf.io.RdfIOConstants; --import org.apache.jena.riot.lang.LabelToNode; --import org.apache.jena.riot.system.* ; --import org.slf4j.Logger; --import org.slf4j.LoggerFactory; -- --/** -- * RDF IO utility functions -- * -- * -- * -- */ --public class RdfIOUtils { -- private static final Logger LOGGER = LoggerFactory.getLogger(RdfIOUtils.class); -- -- /** -- * Private constructor prevents instantiation -- */ -- private RdfIOUtils() { -- } -- -- /** -- * Creates a parser profile for the given job context -- * -- * @param context -- * Context -- * @param path -- * File path -- * @return Parser profile -- */ -- public static ParserProfile createParserProfile(JobContext context, Path path) { -- Prologue prologue = new Prologue(PrefixMapFactory.createForInput(), IRIResolver.createNoResolve()); -- UUID seed = RdfIOUtils.getSeed(context, path); -- LabelToNode labelMapping = LabelToNode.createScopeByDocumentHash(seed); -- return new ParserProfileBase(prologue, ErrorHandlerFactory.errorHandlerStd, RiotLib.factoryRDF(labelMapping)); -- } -- -- /** -- * Selects a seed for use in generating blank node identifiers -- * -- * @param context -- * Job Context -- * @param path -- * File path -- * @return Seed -- */ -- public static UUID getSeed(JobContext context, Path path) { -- // This is to ensure that blank node allocation policy is constant when -- // subsequent MapReduce jobs need that -- String jobId = context.getJobID().toString(); -- if (jobId == null) { -- jobId = String.valueOf(System.currentTimeMillis()); -- LOGGER.warn( -- "Job ID was not set, using current milliseconds of {}. Sequence of MapReduce jobs must carefully handle blank nodes.", -- jobId); -- } -- -- if (!context.getConfiguration().getBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false)) { -- // Using normal file scoped blank node allocation -- LOGGER.debug("Generating Blank Node Seed from Job Details (ID={}, Input Path={})", jobId, path); -- -- // Form a reproducible seed for the run -- return new UUID(jobId.hashCode(), path.hashCode()); -- } else { -- // Using globally scoped blank node allocation -- LOGGER.warn( -- "Using globally scoped blank node allocation policy from Job Details (ID={}) - this is unsafe if your RDF inputs did not originate from a previous job", -- jobId); -- -- return new UUID(jobId.hashCode(), 0); -- } -- } --} ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.jena.hadoop.rdf.io.input.util; ++ ++import java.util.UUID; ++ ++import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.mapreduce.JobContext; ++import org.apache.jena.hadoop.rdf.io.RdfIOConstants; ++import org.apache.jena.riot.lang.LabelToNode; ++import org.apache.jena.riot.system.* ; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++/** ++ * RDF IO utility functions ++ * ++ * ++ * ++ */ ++public class RdfIOUtils { ++ private static final Logger LOGGER = LoggerFactory.getLogger(RdfIOUtils.class); ++ ++ /** ++ * Private constructor prevents instantiation ++ */ ++ private RdfIOUtils() { ++ } ++ ++ /** ++ * Creates a parser profile for the given job context ++ * ++ * @param context ++ * Context ++ * @param path ++ * File path ++ * @return Parser profile ++ */ ++ public static ParserProfile createParserProfile(JobContext context, Path path) { ++ Prologue prologue = new Prologue(PrefixMapFactory.createForInput(), IRIResolver.createNoResolve()); ++ UUID seed = RdfIOUtils.getSeed(context, path); ++ LabelToNode labelMapping = LabelToNode.createScopeByDocumentHash(seed); ++ return new ParserProfileBase(prologue, ErrorHandlerFactory.errorHandlerStd, RiotLib.factoryRDF(labelMapping)); ++ } ++ ++ /** ++ * Selects a seed for use in generating blank node identifiers ++ * ++ * @param context ++ * Job Context ++ * @param path ++ * File path ++ * @return Seed ++ */ ++ public static UUID getSeed(JobContext context, Path path) { ++ // This is to ensure that blank node allocation policy is constant when ++ // subsequent MapReduce jobs need that ++ String jobId = context.getJobID().toString(); ++ if (jobId == null) { ++ jobId = String.valueOf(System.currentTimeMillis()); ++ LOGGER.warn( ++ "Job ID was not set, using current milliseconds of {}. Sequence of MapReduce jobs must carefully handle blank nodes.", ++ jobId); ++ } ++ ++ if (!context.getConfiguration().getBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false)) { ++ // Using normal file scoped blank node allocation ++ LOGGER.debug("Generating Blank Node Seed from Job Details (ID={}, Input Path={})", jobId, path); ++ ++ // Form a reproducible seed for the run ++ return new UUID(jobId.hashCode(), path.hashCode()); ++ } else { ++ // Using globally scoped blank node allocation ++ LOGGER.warn( ++ "Using globally scoped blank node allocation policy from Job Details (ID={}) - this is unsafe if your RDF inputs did not originate from a previous job", ++ jobId); ++ ++ return new UUID(jobId.hashCode(), 0); ++ } ++ } ++} http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java index 92e2df5,92e2df5..cc0455c --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java @@@ -1,21 -1,21 +1,21 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.util; import java.io.InputStream; http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java index e51a866,e51a866..73b7aa1 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java @@@ -1,21 -1,21 +1,21 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.util; import java.io.IOException; http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java index 3759496,3759496..e78d884 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java @@@ -1,26 -1,26 +1,26 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.util; --import org.apache.jena.graph.Triple ; ++import org.apache.jena.graph.Triple ; import org.apache.jena.riot.lang.PipedRDFIterator; --import org.apache.jena.sparql.core.Quad ; ++import org.apache.jena.sparql.core.Quad ; /** * A tracked piped quads stream http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java index 6e910be,6e910be..4af12e6 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java @@@ -1,21 -1,21 +1,21 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.util; import java.util.LinkedList; http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java index 1095fa8,1095fa8..d37cee1 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java @@@ -1,26 -1,26 +1,26 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.input.util; --import org.apache.jena.graph.Triple ; ++import org.apache.jena.graph.Triple ; import org.apache.jena.riot.lang.PipedRDFIterator; --import org.apache.jena.sparql.core.Quad ; ++import org.apache.jena.sparql.core.Quad ; /** * A tracked piped triples stream http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java index 02fbf9c,02fbf9c..175d9ff --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java @@@ -1,31 -1,31 +1,31 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output; import java.io.Writer; import org.apache.hadoop.conf.Configuration; --import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.hadoop.rdf.io.RdfIOConstants; --import org.apache.jena.hadoop.rdf.io.output.writers.AbstractBatchedNodeTupleWriter; --import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; ++import org.apache.jena.hadoop.rdf.io.RdfIOConstants; ++import org.apache.jena.hadoop.rdf.io.output.writers.AbstractBatchedNodeTupleWriter; ++import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java index aec3309,aec3309..64b0d1b --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java @@@ -1,21 -1,21 +1,21 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output; import java.io.IOException; @@@ -32,7 -32,7 +32,7 @@@ import org.apache.hadoop.mapreduce.Reco import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.ReflectionUtils; --import org.apache.jena.hadoop.rdf.types.NodeWritable; ++import org.apache.jena.hadoop.rdf.types.NodeWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java index fd86f00,fd86f00..b98d652 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java @@@ -1,109 -1,109 +1,109 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --package org.apache.jena.hadoop.rdf.io.output; -- --import java.io.IOException; --import java.io.OutputStreamWriter; --import java.io.Writer; -- --import org.apache.hadoop.conf.Configuration; --import org.apache.hadoop.fs.FSDataOutputStream; --import org.apache.hadoop.fs.FileSystem; --import org.apache.hadoop.fs.Path; --import org.apache.hadoop.io.compress.CompressionCodec; --import org.apache.hadoop.io.compress.GzipCodec; --import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.hadoop.mapreduce.TaskAttemptContext; --import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; --import org.apache.hadoop.util.ReflectionUtils; --import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; --import org.slf4j.Logger; --import org.slf4j.LoggerFactory; -- --/** -- * An abstract implementation of an output format for line based tuple formats -- * where the key is ignored and only the tuple values will be output -- * -- * -- * @param <TKey> -- * Key type -- * @param <TValue> -- * Tuple value type -- * @param <T> -- * Writable node tuple type -- * -- */ --public abstract class AbstractNodeTupleOutputFormat<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends -- FileOutputFormat<TKey, T> { -- -- private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleOutputFormat.class); -- -- @Override -- public RecordWriter<TKey, T> getRecordWriter(TaskAttemptContext context) throws IOException { -- Configuration config = context.getConfiguration(); -- boolean isCompressed = getCompressOutput(context); -- CompressionCodec codec = null; -- -- // Build the output file path -- String extension = this.getFileExtension(); -- if (isCompressed) { -- // Add compression extension if applicable -- Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); -- codec = ReflectionUtils.newInstance(codecClass, config); -- extension += codec.getDefaultExtension(); -- } -- Path file = getDefaultWorkFile(context, extension); -- LOG.info("Writing output to file " + file); -- -- // Open the file appropriately and create a record writer for it -- FileSystem fs = file.getFileSystem(config); -- if (!isCompressed) { -- FSDataOutputStream fileOut = fs.create(file, false); -- return this.getRecordWriter(new OutputStreamWriter(fileOut), config, file); -- } else { -- FSDataOutputStream fileOut = fs.create(file, false); -- return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config, file); -- } -- } -- -- /** -- * Gets the file extension to use for output -- * -- * @return File extension including the '.' -- */ -- protected abstract String getFileExtension(); -- -- /** -- * Gets the record writer to use -- * -- * @param writer -- * Writer to write output to -- * @param config -- * Configuration -- * @param outputPath -- * Output path being written to -- * @return Record writer -- * @throws IOException -- * May be thrown if a record writer cannot be obtained for any -- * reason -- */ -- protected abstract RecordWriter<TKey, T> getRecordWriter(Writer writer, Configuration config, Path outputPath) -- throws IOException; -- --} ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.jena.hadoop.rdf.io.output; ++ ++import java.io.IOException; ++import java.io.OutputStreamWriter; ++import java.io.Writer; ++ ++import org.apache.hadoop.conf.Configuration; ++import org.apache.hadoop.fs.FSDataOutputStream; ++import org.apache.hadoop.fs.FileSystem; ++import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.io.compress.CompressionCodec; ++import org.apache.hadoop.io.compress.GzipCodec; ++import org.apache.hadoop.mapreduce.RecordWriter; ++import org.apache.hadoop.mapreduce.TaskAttemptContext; ++import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; ++import org.apache.hadoop.util.ReflectionUtils; ++import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++/** ++ * An abstract implementation of an output format for line based tuple formats ++ * where the key is ignored and only the tuple values will be output ++ * ++ * ++ * @param <TKey> ++ * Key type ++ * @param <TValue> ++ * Tuple value type ++ * @param <T> ++ * Writable node tuple type ++ * ++ */ ++public abstract class AbstractNodeTupleOutputFormat<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends ++ FileOutputFormat<TKey, T> { ++ ++ private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleOutputFormat.class); ++ ++ @Override ++ public RecordWriter<TKey, T> getRecordWriter(TaskAttemptContext context) throws IOException { ++ Configuration config = context.getConfiguration(); ++ boolean isCompressed = getCompressOutput(context); ++ CompressionCodec codec = null; ++ ++ // Build the output file path ++ String extension = this.getFileExtension(); ++ if (isCompressed) { ++ // Add compression extension if applicable ++ Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); ++ codec = ReflectionUtils.newInstance(codecClass, config); ++ extension += codec.getDefaultExtension(); ++ } ++ Path file = getDefaultWorkFile(context, extension); ++ LOG.info("Writing output to file " + file); ++ ++ // Open the file appropriately and create a record writer for it ++ FileSystem fs = file.getFileSystem(config); ++ if (!isCompressed) { ++ FSDataOutputStream fileOut = fs.create(file, false); ++ return this.getRecordWriter(new OutputStreamWriter(fileOut), config, file); ++ } else { ++ FSDataOutputStream fileOut = fs.create(file, false); ++ return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config, file); ++ } ++ } ++ ++ /** ++ * Gets the file extension to use for output ++ * ++ * @return File extension including the '.' ++ */ ++ protected abstract String getFileExtension(); ++ ++ /** ++ * Gets the record writer to use ++ * ++ * @param writer ++ * Writer to write output to ++ * @param config ++ * Configuration ++ * @param outputPath ++ * Output path being written to ++ * @return Record writer ++ * @throws IOException ++ * May be thrown if a record writer cannot be obtained for any ++ * reason ++ */ ++ protected abstract RecordWriter<TKey, T> getRecordWriter(Writer writer, Configuration config, Path outputPath) ++ throws IOException; ++ ++} http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java index 926a708,926a708..5d2c10b --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/nquads/NQuadsOutputFormat.java @@@ -1,32 -1,32 +1,32 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output.nquads; import java.io.Writer; -- ++ import org.apache.hadoop.conf.Configuration; --import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.nquads.NQuadsWriter; --import org.apache.jena.hadoop.rdf.types.QuadWritable; --import org.apache.jena.sparql.core.Quad ; ++import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.nquads.NQuadsWriter; ++import org.apache.jena.hadoop.rdf.types.QuadWritable; ++import org.apache.jena.sparql.core.Quad ; /** * NQuads output format http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java index 56935bb,56935bb..a33b32e --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesNodeOutputFormat.java @@@ -1,30 -1,30 +1,30 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output.ntriples; import java.io.Writer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.hadoop.rdf.io.output.AbstractNodeOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.ntriples.NTriplesNodeWriter; --import org.apache.jena.hadoop.rdf.types.NodeWritable; ++import org.apache.jena.hadoop.rdf.io.output.AbstractNodeOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.ntriples.NTriplesNodeWriter; ++import org.apache.jena.hadoop.rdf.types.NodeWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java index 65a92c6,65a92c6..65e1665 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/ntriples/NTriplesOutputFormat.java @@@ -1,32 -1,32 +1,32 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output.ntriples; import java.io.Writer; -- ++ import org.apache.hadoop.conf.Configuration; --import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.graph.Triple ; --import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.ntriples.NTriplesWriter; --import org.apache.jena.hadoop.rdf.types.TripleWritable; ++import org.apache.jena.graph.Triple ; ++import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.ntriples.NTriplesWriter; ++import org.apache.jena.hadoop.rdf.types.TripleWritable; /** * NTriples output format http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java index 3b1e340,3b1e340..13e996b --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfjson/RdfJsonOutputFormat.java @@@ -1,32 -1,32 +1,32 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output.rdfjson; import java.io.Writer; -- ++ import org.apache.hadoop.conf.Configuration; --import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.graph.Triple ; --import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.rdfjson.RdfJsonWriter; --import org.apache.jena.hadoop.rdf.types.TripleWritable; ++import org.apache.jena.graph.Triple ; ++import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.rdfjson.RdfJsonWriter; ++import org.apache.jena.hadoop.rdf.types.TripleWritable; /** * RDF/JSON output format http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java index 24cc752,24cc752..8955e4c --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/rdfxml/RdfXmlOutputFormat.java @@@ -1,32 -1,32 +1,32 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output.rdfxml; import java.io.Writer; -- ++ import org.apache.hadoop.conf.Configuration; --import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.graph.Triple ; --import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.rdfxml.RdfXmlWriter; --import org.apache.jena.hadoop.rdf.types.TripleWritable; ++import org.apache.jena.graph.Triple ; ++import org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.rdfxml.RdfXmlWriter; ++import org.apache.jena.hadoop.rdf.types.TripleWritable; /** * RDF/XML output format http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java index 9d37f9e,9d37f9e..48a8694 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputFormat.java @@@ -1,53 -1,53 +1,53 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --package org.apache.jena.hadoop.rdf.io.output.trig; -- --import java.io.Writer; -- --import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.hadoop.rdf.io.output.AbstractBatchedNodeTupleOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.trig.BatchedTriGWriter; --import org.apache.jena.hadoop.rdf.types.QuadWritable; --import org.apache.jena.sparql.core.Quad ; -- --/** -- * Output format for TriG that uses a batched approach, note that this will -- * produce invalid data where blank nodes span batches so it is typically better -- * to use the {@link TriGOutputFormat} instead -- * -- * -- * -- * @param <TKey> -- * Key type -- */ --public class BatchedTriGOutputFormat<TKey> extends -- AbstractBatchedNodeTupleOutputFormat<TKey, Quad, QuadWritable> { -- -- @Override -- protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, -- long batchSize) { -- return new BatchedTriGWriter<TKey>(writer, batchSize); -- } -- -- @Override -- protected String getFileExtension() { -- return ".trig"; -- } -- --} ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.jena.hadoop.rdf.io.output.trig; ++ ++import java.io.Writer; ++ ++import org.apache.hadoop.mapreduce.RecordWriter; ++import org.apache.jena.hadoop.rdf.io.output.AbstractBatchedNodeTupleOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.trig.BatchedTriGWriter; ++import org.apache.jena.hadoop.rdf.types.QuadWritable; ++import org.apache.jena.sparql.core.Quad ; ++ ++/** ++ * Output format for TriG that uses a batched approach, note that this will ++ * produce invalid data where blank nodes span batches so it is typically better ++ * to use the {@link TriGOutputFormat} instead ++ * ++ * ++ * ++ * @param <TKey> ++ * Key type ++ */ ++public class BatchedTriGOutputFormat<TKey> extends ++ AbstractBatchedNodeTupleOutputFormat<TKey, Quad, QuadWritable> { ++ ++ @Override ++ protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, ++ long batchSize) { ++ return new BatchedTriGWriter<TKey>(writer, batchSize); ++ } ++ ++ @Override ++ protected String getFileExtension() { ++ return ".trig"; ++ } ++ ++} http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputFormat.java index 1493d7f,1493d7f..3d528bc --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputFormat.java @@@ -1,49 -1,49 +1,49 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --package org.apache.jena.hadoop.rdf.io.output.turtle; -- --import java.io.Writer; -- --import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.graph.Triple ; --import org.apache.jena.hadoop.rdf.io.output.AbstractBatchedNodeTupleOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.turtle.BatchedTurtleWriter; --import org.apache.jena.hadoop.rdf.types.TripleWritable; -- --/** -- * Output format for Turtle that uses a batched approach, note that this will -- * produce invalid data where blank nodes span batches so it is typically better -- * to use the {@link TurtleOutputFormat} instead -- * -- * @param <TKey> -- * Key type -- */ --public class BatchedTurtleOutputFormat<TKey> extends AbstractBatchedNodeTupleOutputFormat<TKey, Triple, TripleWritable> { -- -- @Override -- protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, long batchSize) { -- return new BatchedTurtleWriter<TKey>(writer, batchSize); -- } -- -- @Override -- protected String getFileExtension() { -- return ".ttl"; -- } -- --} ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.jena.hadoop.rdf.io.output.turtle; ++ ++import java.io.Writer; ++ ++import org.apache.hadoop.mapreduce.RecordWriter; ++import org.apache.jena.graph.Triple ; ++import org.apache.jena.hadoop.rdf.io.output.AbstractBatchedNodeTupleOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.turtle.BatchedTurtleWriter; ++import org.apache.jena.hadoop.rdf.types.TripleWritable; ++ ++/** ++ * Output format for Turtle that uses a batched approach, note that this will ++ * produce invalid data where blank nodes span batches so it is typically better ++ * to use the {@link TurtleOutputFormat} instead ++ * ++ * @param <TKey> ++ * Key type ++ */ ++public class BatchedTurtleOutputFormat<TKey> extends AbstractBatchedNodeTupleOutputFormat<TKey, Triple, TripleWritable> { ++ ++ @Override ++ protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, long batchSize) { ++ return new BatchedTurtleWriter<TKey>(writer, batchSize); ++ } ++ ++ @Override ++ protected String getFileExtension() { ++ return ".ttl"; ++ } ++ ++} http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleOutputFormat.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleOutputFormat.java index 85ac0c5,85ac0c5..f46fb0c --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleOutputFormat.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleOutputFormat.java @@@ -1,55 -1,55 +1,55 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --package org.apache.jena.hadoop.rdf.io.output.turtle; -- --import java.io.Writer; -- --import org.apache.hadoop.conf.Configuration; --import org.apache.hadoop.mapreduce.RecordWriter; --import org.apache.jena.graph.Triple ; --import org.apache.jena.hadoop.rdf.io.output.AbstractStreamRdfNodeTupleOutputFormat; --import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfTripleWriter; --import org.apache.jena.hadoop.rdf.types.TripleWritable; --import org.apache.jena.riot.system.StreamRDF; --import org.apache.jena.riot.writer.WriterStreamRDFBlocks; -- --/** -- * Turtle output format -- * -- * @param <TKey> -- * Key type -- */ --public class TurtleOutputFormat<TKey> extends AbstractStreamRdfNodeTupleOutputFormat<TKey, Triple, TripleWritable> { -- -- @Override -- protected String getFileExtension() { -- return ".ttl"; -- } -- -- @Override -- protected RecordWriter<TKey, TripleWritable> getRecordWriter(StreamRDF stream, Writer writer, Configuration config) { -- return new StreamRdfTripleWriter<TKey>(stream, writer); -- } -- -- @Override -- protected StreamRDF getStream(Writer writer, Configuration config) { -- return new WriterStreamRDFBlocks(writer); -- } -- --} ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.jena.hadoop.rdf.io.output.turtle; ++ ++import java.io.Writer; ++ ++import org.apache.hadoop.conf.Configuration; ++import org.apache.hadoop.mapreduce.RecordWriter; ++import org.apache.jena.graph.Triple ; ++import org.apache.jena.hadoop.rdf.io.output.AbstractStreamRdfNodeTupleOutputFormat; ++import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfTripleWriter; ++import org.apache.jena.hadoop.rdf.types.TripleWritable; ++import org.apache.jena.riot.system.StreamRDF; ++import org.apache.jena.riot.writer.WriterStreamRDFBlocks; ++ ++/** ++ * Turtle output format ++ * ++ * @param <TKey> ++ * Key type ++ */ ++public class TurtleOutputFormat<TKey> extends AbstractStreamRdfNodeTupleOutputFormat<TKey, Triple, TripleWritable> { ++ ++ @Override ++ protected String getFileExtension() { ++ return ".ttl"; ++ } ++ ++ @Override ++ protected RecordWriter<TKey, TripleWritable> getRecordWriter(StreamRDF stream, Writer writer, Configuration config) { ++ return new StreamRdfTripleWriter<TKey>(stream, writer); ++ } ++ ++ @Override ++ protected StreamRDF getStream(Writer writer, Configuration config) { ++ return new WriterStreamRDFBlocks(writer); ++ } ++ ++} http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedNodeTupleWriter.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedNodeTupleWriter.java index cb51b6e,cb51b6e..ac09463 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedNodeTupleWriter.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedNodeTupleWriter.java @@@ -1,21 -1,21 +1,21 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output.writers; import java.io.IOException; @@@ -23,7 -23,7 +23,7 @@@ import java.io.Writer import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; --import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; ++import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; http://git-wip-us.apache.org/repos/asf/jena/blob/4b5cd267/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedQuadWriter.java ---------------------------------------------------------------------- diff --cc jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedQuadWriter.java index 8c8df50,8c8df50..22b7771 --- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedQuadWriter.java +++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/writers/AbstractBatchedQuadWriter.java @@@ -1,35 -1,35 +1,35 @@@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one -- * or more contributor license agreements. See the NOTICE file -- * distributed with this work for additional information -- * regarding copyright ownership. The ASF licenses this file -- * to you under the Apache License, Version 2.0 (the -- * "License"); you may not use this file except in compliance -- * with the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ package org.apache.jena.hadoop.rdf.io.output.writers; import java.io.Writer; import java.util.List; -- ++ import org.apache.commons.collections.IteratorUtils; --import org.apache.jena.graph.Node ; --import org.apache.jena.hadoop.rdf.types.QuadWritable; ++import org.apache.jena.graph.Node ; ++import org.apache.jena.hadoop.rdf.types.QuadWritable; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFDataMgr; import org.apache.jena.riot.RDFWriterRegistry; --import org.apache.jena.sparql.core.DatasetGraph ; --import org.apache.jena.sparql.core.DatasetGraphFactory ; --import org.apache.jena.sparql.core.Quad ; ++import org.apache.jena.sparql.core.DatasetGraph ; ++import org.apache.jena.sparql.core.DatasetGraphFactory ; ++import org.apache.jena.sparql.core.Quad ; /** * Abstract batched record writer for quad formats
