http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractQuadsOnlyWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractQuadsOnlyWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractQuadsOnlyWriterFactory.java new file mode 100644 index 0000000..0cf137e --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractQuadsOnlyWriterFactory.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.registry.WriterFactory; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * Abstract writer factory for languages that only support quads + */ +public abstract class AbstractQuadsOnlyWriterFactory implements WriterFactory { + + private Lang lang; + private Collection<Lang> alternateLangs = Collections.unmodifiableList(Collections.<Lang> emptyList()); + + public AbstractQuadsOnlyWriterFactory(Lang lang) { + this(lang, (Collection<Lang>) null); + } + + public AbstractQuadsOnlyWriterFactory(Lang lang, Lang... altLangs) { + this(lang, Arrays.asList(altLangs)); + } + + public AbstractQuadsOnlyWriterFactory(Lang lang, Collection<Lang> altLangs) { + this.lang = lang; + if (altLangs != null) + this.alternateLangs = Collections.unmodifiableCollection(altLangs); + } + + @Override + public final Lang getPrimaryLanguage() { + return this.lang; + } + + @Override + public final Collection<Lang> getAlternativeLanguages() { + return this.alternateLangs; + } + + @Override + public final boolean canWriteQuads() { + return true; + } + + @Override + public final boolean canWriteTriples() { + return false; + } + + @Override + public abstract <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) + throws IOException; + + @Override + public final <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException { + throw new IOException(this.lang.getName() + " does not support writing triples"); + } + +}
http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractTriplesOnlyWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractTriplesOnlyWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractTriplesOnlyWriterFactory.java new file mode 100644 index 0000000..e45c3da --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractTriplesOnlyWriterFactory.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.registry.WriterFactory; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * Abstract writer factory for languages that only support triples + */ +public abstract class AbstractTriplesOnlyWriterFactory implements WriterFactory { + + private Lang lang; + private Collection<Lang> alternateLangs = Collections.unmodifiableList(Collections.<Lang> emptyList()); + + public AbstractTriplesOnlyWriterFactory(Lang lang) { + this(lang, (Collection<Lang>) null); + } + + public AbstractTriplesOnlyWriterFactory(Lang lang, Lang... altLangs) { + this(lang, Arrays.asList(altLangs)); + } + + public AbstractTriplesOnlyWriterFactory(Lang lang, Collection<Lang> altLangs) { + this.lang = lang; + if (altLangs != null) + this.alternateLangs = Collections.unmodifiableCollection(altLangs); + } + + @Override + public final Lang getPrimaryLanguage() { + return this.lang; + } + + @Override + public final Collection<Lang> getAlternativeLanguages() { + return this.alternateLangs; + } + + @Override + public final boolean canWriteQuads() { + return false; + } + + @Override + public final boolean canWriteTriples() { + return true; + } + + @Override + public final <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) + throws IOException { + throw new IOException(this.lang.getName() + " does not support writing quads"); + } + + @Override + public abstract <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException; + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractWriterFactory.java new file mode 100644 index 0000000..669b9c4 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/AbstractWriterFactory.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.registry.WriterFactory; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * Abstract writer factory for languages that support triples and quads + */ +public abstract class AbstractWriterFactory implements WriterFactory { + + private Lang lang; + private Collection<Lang> alternateLangs = Collections.unmodifiableList(Collections.<Lang>emptyList()); + + public AbstractWriterFactory(Lang lang) { + this(lang, (Collection<Lang>)null); + } + + public AbstractWriterFactory(Lang lang, Lang...altLangs) { + this(lang, Arrays.asList(altLangs)); + } + + public AbstractWriterFactory(Lang lang, Collection<Lang> altLangs) { + this.lang = lang; + if (altLangs != null) + this.alternateLangs = Collections.unmodifiableCollection(altLangs); + } + + @Override + public final Lang getPrimaryLanguage() { + return this.lang; + } + + @Override + public final Collection<Lang> getAlternativeLanguages() { + return this.alternateLangs; + } + + @Override + public final boolean canWriteQuads() { + return true; + } + + @Override + public final boolean canWriteTriples() { + return true; + } + + @Override + public abstract <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) throws IOException; + + @Override + public abstract <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) throws IOException; + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/JsonLDWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/JsonLDWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/JsonLDWriterFactory.java new file mode 100644 index 0000000..89e93ed --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/JsonLDWriterFactory.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.jsonld.JsonLDQuadWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.jsonld.JsonLDTripleWriter; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * + */ +public class JsonLDWriterFactory extends AbstractWriterFactory { + + public JsonLDWriterFactory() { + super(Lang.JSONLD); + } + + @Override + public <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) + throws IOException { + return new JsonLDQuadWriter<>(writer); + } + + @Override + public <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException { + return new JsonLDTripleWriter<>(writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NQuadsWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NQuadsWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NQuadsWriterFactory.java new file mode 100644 index 0000000..abbbd0f --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NQuadsWriterFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.nquads.NQuadsWriter; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; + +/** + * + */ +public class NQuadsWriterFactory extends AbstractQuadsOnlyWriterFactory { + + public NQuadsWriterFactory() { + super(Lang.NQUADS, Lang.NQ); + } + + @Override + public <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) + throws IOException { + return new NQuadsWriter<TKey>(writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NTriplesWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NTriplesWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NTriplesWriterFactory.java new file mode 100644 index 0000000..88c9551 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/NTriplesWriterFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.ntriples.NTriplesWriter; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * + */ +public class NTriplesWriterFactory extends AbstractTriplesOnlyWriterFactory { + + public NTriplesWriterFactory() { + super(Lang.NTRIPLES, Lang.NT); + } + + @Override + public <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException { + return new NTriplesWriter<TKey>(writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfJsonWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfJsonWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfJsonWriterFactory.java new file mode 100644 index 0000000..8252422 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfJsonWriterFactory.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.rdfjson.RdfJsonWriter; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * + */ +public class RdfJsonWriterFactory extends AbstractTriplesOnlyWriterFactory { + + public RdfJsonWriterFactory() { + super(Lang.RDFJSON); + } + + @Override + public <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException { + return new RdfJsonWriter<TKey>(writer); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfXmlWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfXmlWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfXmlWriterFactory.java new file mode 100644 index 0000000..b4ac8e3 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/RdfXmlWriterFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.rdfxml.RdfXmlWriter; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; + +/** + * + */ +public class RdfXmlWriterFactory extends AbstractTriplesOnlyWriterFactory { + + public RdfXmlWriterFactory() { + super(Lang.RDFXML); + } + + @Override + public <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException { + return new RdfXmlWriter<TKey>(writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/ThriftWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/ThriftWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/ThriftWriterFactory.java new file mode 100644 index 0000000..757472c --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/ThriftWriterFactory.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; +import java.nio.charset.Charset; + +import org.apache.commons.io.output.WriterOutputStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfQuadWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfTripleWriter; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.RDFLanguages; +import org.apache.jena.riot.thrift.StreamRDF2Thrift; + +/** + * + */ +public class ThriftWriterFactory extends AbstractWriterFactory { + + public ThriftWriterFactory() { + super(RDFLanguages.THRIFT); + } + + @Override + public <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) + throws IOException { + return new StreamRdfQuadWriter<TKey>(new StreamRDF2Thrift(new WriterOutputStream(writer, Charset.forName("utf-8")), + false), writer); + } + + @Override + public <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException { + return new StreamRdfTripleWriter<TKey>(new StreamRDF2Thrift(new WriterOutputStream(writer, Charset.forName("utf-8")), + false), writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriGWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriGWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriGWriterFactory.java new file mode 100644 index 0000000..6d8b08a --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriGWriterFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfQuadWriter; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.writer.WriterStreamRDFBlocks; + +/** + * + */ +public class TriGWriterFactory extends AbstractQuadsOnlyWriterFactory { + + public TriGWriterFactory() { + super(Lang.TRIG); + } + + @Override + public <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) + throws IOException { + return new StreamRdfQuadWriter<TKey>(new WriterStreamRDFBlocks(writer), writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriXWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriXWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriXWriterFactory.java new file mode 100644 index 0000000..0e1b7b2 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TriXWriterFactory.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; +import java.nio.charset.Charset; + +import org.apache.commons.io.output.WriterOutputStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfQuadWriter; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.writer.StreamWriterTriX; + +/** + * + */ +public class TriXWriterFactory extends AbstractQuadsOnlyWriterFactory { + + public TriXWriterFactory() { + super(Lang.TRIX); + } + + @Override + public <TKey> RecordWriter<TKey, QuadWritable> createQuadWriter(Writer writer, Configuration config) + throws IOException { + return new StreamRdfQuadWriter<>(new StreamWriterTriX(new WriterOutputStream(writer, Charset.forName("utf-8"))), writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TurtleWriterFactory.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TurtleWriterFactory.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TurtleWriterFactory.java new file mode 100644 index 0000000..c837f12 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/registry/writers/TurtleWriterFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jena.hadoop.rdf.io.registry.writers; + +import java.io.IOException; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfTripleWriter; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.writer.WriterStreamRDFBlocks; + +/** + * + */ +public class TurtleWriterFactory extends AbstractTriplesOnlyWriterFactory { + + public TurtleWriterFactory() { + super(Lang.TURTLE, Lang.TTL, Lang.N3); + } + + @Override + public <TKey> RecordWriter<TKey, TripleWritable> createTripleWriter(Writer writer, Configuration config) + throws IOException { + return new StreamRdfTripleWriter<>(new WriterStreamRDFBlocks(writer), writer); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.ReaderFactory ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.ReaderFactory b/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.ReaderFactory new file mode 100644 index 0000000..ec0e48a --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.ReaderFactory @@ -0,0 +1,10 @@ +# Default Reader Factory implementations +org.apache.jena.hadoop.rdf.io.registry.readers.JsonLDReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.NQuadsReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.NTriplesReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.RdfJsonReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.RdfXmlReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.ThriftReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.TriGReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.TriXReaderFactory +org.apache.jena.hadoop.rdf.io.registry.readers.TurtleReaderFactory \ No newline at end of file http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.WriterFactory ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.WriterFactory b/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.WriterFactory new file mode 100644 index 0000000..164880d --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/main/resources/META-INF/services/org.apache.jena.hadoop.rdf.io.registry.WriterFactory @@ -0,0 +1,10 @@ +# Default Writer Factory implementations +org.apache.jena.hadoop.rdf.io.registry.writers.JsonLDWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.NQuadsWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.NTriplesWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.RdfJsonWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.RdfXmlWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.ThriftWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.TriGWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.TriXWriterFactory +org.apache.jena.hadoop.rdf.io.registry.writers.TurtleWriterFactory \ No newline at end of file http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/RdfTriplesInputTestMapper.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/RdfTriplesInputTestMapper.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/RdfTriplesInputTestMapper.java new file mode 100644 index 0000000..5762fb7 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/RdfTriplesInputTestMapper.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.log4j.Logger; + + +/** + * A test mapper which takes in line based RDF triple input and just produces triples + * + * + */ +public class RdfTriplesInputTestMapper extends Mapper<LongWritable, TripleWritable, NullWritable, TripleWritable> { + + private static final Logger LOG = Logger.getLogger(RdfTriplesInputTestMapper.class); + + @Override + protected void map(LongWritable key, TripleWritable value, Context context) + throws IOException, InterruptedException { + LOG.info("Line " + key.toString() + " => " + value.toString()); + context.write(NullWritable.get(), value); + } + + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java new file mode 100644 index 0000000..1cda0bd --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +/** + * Abstract tests for blocked triple input formats + * + * + * + */ +public abstract class AbstractBlockedQuadInputFormatTests extends AbstractWholeFileQuadInputFormatTests { + + @Override + protected boolean canSplitInputs() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java new file mode 100644 index 0000000..2e1e865 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +/** + * Abstract tests for blocked triple input formats + * + * + * + */ +public abstract class AbstractBlockedTripleInputFormatTests extends AbstractWholeFileTripleInputFormatTests { + + @Override + protected boolean canSplitInputs() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java new file mode 100644 index 0000000..e22650f --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java @@ -0,0 +1,612 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; +import org.apache.hadoop.mapreduce.task.JobContextImpl; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import org.apache.jena.hadoop.rdf.io.HadoopIOConstants; +import org.apache.jena.hadoop.rdf.io.RdfIOConstants; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.junit.After; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Abstract node tuple input format tests + * + * + * + * @param <TValue> + * @param <T> + */ +public abstract class AbstractNodeTupleInputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> { + + private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleInputFormatTests.class); + + protected static final int EMPTY_SIZE = 0, SMALL_SIZE = 100, LARGE_SIZE = 10000, BAD_SIZE = 100, MIXED_SIZE = 100; + protected static final String EMPTY = "empty"; + protected static final String SMALL = "small"; + protected static final String LARGE = "large"; + protected static final String BAD = "bad"; + protected static final String MIXED = "mixed"; + + /** + * Temporary folder for the tests + */ + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + protected File empty, small, large, bad, mixed; + + /** + * Prepares the inputs for the tests + * + * @throws IOException + */ + @Before + public void beforeTest() throws IOException { + this.prepareInputs(); + } + + /** + * Cleans up the inputs after each test + */ + @After + public void afterTest() { + // Should be unnecessary since JUnit will clean up the temporary folder + // anyway but best to do this regardless + if (empty != null) + empty.delete(); + if (small != null) + small.delete(); + if (large != null) + large.delete(); + if (bad != null) + bad.delete(); + if (mixed != null) + mixed.delete(); + } + + /** + * Prepares a fresh configuration + * + * @return Configuration + */ + protected Configuration prepareConfiguration() { + Configuration config = new Configuration(true); + // Nothing else to do + return config; + } + + /** + * Prepares the inputs + * + * @throws IOException + */ + protected void prepareInputs() throws IOException { + String ext = this.getFileExtension(); + empty = folder.newFile(EMPTY + ext); + this.generateTuples(empty, EMPTY_SIZE); + small = folder.newFile(SMALL + ext); + this.generateTuples(small, SMALL_SIZE); + large = folder.newFile(LARGE + ext); + this.generateTuples(large, LARGE_SIZE); + bad = folder.newFile(BAD + ext); + this.generateBadTuples(bad, BAD_SIZE); + mixed = folder.newFile(MIXED + ext); + this.generateMixedTuples(mixed, MIXED_SIZE); + } + + /** + * Gets the extra file extension to add to the filenames + * + * @return File extension + */ + protected abstract String getFileExtension(); + + /** + * Generates tuples used for tests + * + * @param f + * File + * @param num + * Number of tuples to generate + * @throws IOException + */ + protected final void generateTuples(File f, int num) throws IOException { + this.generateTuples(this.getOutputStream(f), num); + } + + /** + * Gets the output stream to use for generating tuples + * + * @param f + * File + * @return Output Stream + * @throws IOException + */ + protected OutputStream getOutputStream(File f) throws IOException { + return new FileOutputStream(f, false); + } + + /** + * Generates tuples used for tests + * + * @param output + * Output Stream to write to + * @param num + * Number of tuples to generate + * @throws IOException + */ + protected abstract void generateTuples(OutputStream output, int num) throws IOException; + + /** + * Generates bad tuples used for tests + * + * @param f + * File + * @param num + * Number of bad tuples to generate + * @throws IOException + */ + protected final void generateBadTuples(File f, int num) throws IOException { + this.generateBadTuples(this.getOutputStream(f), num); + } + + /** + * Generates bad tuples used for tests + * + * @param output + * Output Stream to write to + * @param num + * Number of bad tuples to generate + * @throws IOException + */ + protected abstract void generateBadTuples(OutputStream output, int num) throws IOException; + + /** + * Generates a mixture of good and bad tuples used for tests + * + * @param f + * File + * @param num + * Number of tuples to generate, they should be a 50/50 mix of + * good and bad tuples + * @throws IOException + */ + protected final void generateMixedTuples(File f, int num) throws IOException { + this.generateMixedTuples(this.getOutputStream(f), num); + } + + /** + * Generates a mixture of good and bad tuples used for tests + * + * @param output + * Output Stream to write to + * @param num + * Number of tuples to generate, they should be a 50/50 mix of + * good and bad tuples + * @throws IOException + */ + protected abstract void generateMixedTuples(OutputStream output, int num) throws IOException; + + /** + * Adds an input path to the job configuration + * + * @param f + * File + * @param config + * Configuration + * @param job + * Job + * @throws IOException + */ + protected void addInputPath(File f, Configuration config, Job job) throws IOException { + FileSystem fs = FileSystem.getLocal(config); + Path inputPath = fs.makeQualified(new Path(f.getAbsolutePath())); + FileInputFormat.addInputPath(job, inputPath); + } + + protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException { + int count = 0; + + // Check initial progress + LOG.info(String.format("Initial Reported Progress %f", reader.getProgress())); + float progress = reader.getProgress(); + if (Float.compare(0.0f, progress) == 0) { + Assert.assertEquals(0.0d, reader.getProgress(), 0.0d); + } else if (Float.compare(1.0f, progress) == 0) { + // If reader is reported 1.0 straight away then we expect there to + // be no key values + Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); + Assert.assertFalse(reader.nextKeyValue()); + } else { + Assert.fail(String.format( + "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f", + progress)); + } + + // Count tuples + boolean debug = LOG.isDebugEnabled(); + while (reader.nextKeyValue()) { + count++; + progress = reader.getProgress(); + if (debug) + LOG.debug(String.format("Current Reported Progress %f", progress)); + Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress), + progress > 0.0f && progress <= 1.0f); + } + reader.close(); + LOG.info(String.format("Got %d tuples from this record reader", count)); + + // Check final progress + LOG.info(String.format("Final Reported Progress %f", reader.getProgress())); + Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); + + return count; + } + + protected final void checkTuples(RecordReader<LongWritable, T> reader, int expected) throws IOException, + InterruptedException { + Assert.assertEquals(expected, this.countTuples(reader)); + } + + /** + * Runs a test with a single input + * + * @param input + * Input + * @param expectedTuples + * Expected tuples + * @throws IOException + * @throws InterruptedException + */ + protected final void testSingleInput(File input, int expectedSplits, int expectedTuples) throws IOException, + InterruptedException { + // Prepare configuration + Configuration config = this.prepareConfiguration(); + this.testSingleInput(config, input, expectedSplits, expectedTuples); + } + + /** + * Runs a test with a single input + * + * @param config + * Configuration + * @param input + * Input + * @param expectedTuples + * Expected tuples + * @throws IOException + * @throws InterruptedException + */ + protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples) + throws IOException, InterruptedException { + // Set up fake job + InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); + Job job = Job.getInstance(config); + job.setInputFormatClass(inputFormat.getClass()); + this.addInputPath(input, job.getConfiguration(), job); + JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); + Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length); + NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE); + + // Check splits + List<InputSplit> splits = inputFormat.getSplits(context); + Assert.assertEquals(expectedSplits, splits.size()); + + // Check tuples + for (InputSplit split : splits) { + TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); + RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); + reader.initialize(split, taskContext); + this.checkTuples(reader, expectedTuples); + } + } + + protected abstract InputFormat<LongWritable, T> getInputFormat(); + + /** + * Basic tuples input test + * + * @throws IOException + * @throws ClassNotFoundException + * @throws InterruptedException + */ + @Test + public final void single_input_01() throws IOException, InterruptedException, ClassNotFoundException { + testSingleInput(empty, this.canSplitInputs() ? 0 : 1, EMPTY_SIZE); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws ClassNotFoundException + * @throws InterruptedException + */ + @Test + public final void single_input_02() throws IOException, InterruptedException, ClassNotFoundException { + testSingleInput(small, 1, SMALL_SIZE); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws ClassNotFoundException + * @throws InterruptedException + */ + @Test + public final void single_input_03() throws IOException, InterruptedException, ClassNotFoundException { + testSingleInput(large, 1, LARGE_SIZE); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws ClassNotFoundException + * @throws InterruptedException + */ + @Test + public final void single_input_04() throws IOException, InterruptedException, ClassNotFoundException { + testSingleInput(bad, 1, 0); + } + + /** + * Basic tuples input test + * + * @throws IOException + * @throws ClassNotFoundException + * @throws InterruptedException + */ + @Test + public final void single_input_05() throws IOException, InterruptedException, ClassNotFoundException { + testSingleInput(mixed, 1, MIXED_SIZE / 2); + } + + /** + * Tests behaviour when ignoring bad tuples is disabled + * + * @throws InterruptedException + * @throws IOException + */ + @Test(expected = IOException.class) + public final void fail_on_bad_input_01() throws IOException, InterruptedException { + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); + testSingleInput(config, bad, 1, 0); + } + + /** + * Tests behaviour when ignoring bad tuples is disabled + * + * @throws InterruptedException + * @throws IOException + */ + @Test(expected = IOException.class) + public final void fail_on_bad_input_02() throws IOException, InterruptedException { + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true)); + testSingleInput(config, mixed, 1, MIXED_SIZE / 2); + } + + /** + * Runs a multiple input test + * + * @param inputs + * Inputs + * @param expectedSplits + * Number of splits expected + * @param expectedTuples + * Number of tuples expected + * @throws IOException + * @throws InterruptedException + */ + protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples) throws IOException, + InterruptedException { + // Prepare configuration and inputs + Configuration config = this.prepareConfiguration(); + + // Set up fake job + InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); + Job job = Job.getInstance(config); + job.setInputFormatClass(inputFormat.getClass()); + for (File input : inputs) { + this.addInputPath(input, job.getConfiguration(), job); + } + JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); + Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); + NLineInputFormat.setNumLinesPerSplit(job, expectedTuples); + + // Check splits + List<InputSplit> splits = inputFormat.getSplits(context); + Assert.assertEquals(expectedSplits, splits.size()); + + // Check tuples + int count = 0; + for (InputSplit split : splits) { + TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); + RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); + reader.initialize(split, taskContext); + count += this.countTuples(reader); + } + Assert.assertEquals(expectedTuples, count); + } + + /** + * tuples test with multiple inputs + * + * @throws IOException + * @throws ClassNotFoundException + * @throws InterruptedException + */ + @Test + public final void multiple_inputs_01() throws IOException, InterruptedException, ClassNotFoundException { + testMultipleInputs(new File[] { empty, small, large }, this.canSplitInputs() ? 2 : 3, EMPTY_SIZE + SMALL_SIZE + + LARGE_SIZE); + } + + /** + * tuples test with multiple inputs + * + * @throws IOException + * @throws ClassNotFoundException + * @throws InterruptedException + */ + @Test + public final void multiple_inputs_02() throws IOException, InterruptedException, ClassNotFoundException { + testMultipleInputs(new File[] { folder.getRoot() }, this.canSplitInputs() ? 4 : 5, EMPTY_SIZE + SMALL_SIZE + + LARGE_SIZE + (MIXED_SIZE / 2)); + } + + protected final void testSplitInputs(Configuration config, File[] inputs, int expectedSplits, int expectedTuples) + throws IOException, InterruptedException { + // Set up fake job + InputFormat<LongWritable, T> inputFormat = this.getInputFormat(); + Job job = Job.getInstance(config); + job.setInputFormatClass(inputFormat.getClass()); + for (File input : inputs) { + this.addInputPath(input, job.getConfiguration(), job); + } + JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); + Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length); + + // Check splits + List<InputSplit> splits = inputFormat.getSplits(context); + Assert.assertEquals(expectedSplits, splits.size()); + + // Check tuples + int count = 0; + for (InputSplit split : splits) { + // Validate split + Assert.assertTrue(this.isValidSplit(split, config)); + + // Read split + TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); + RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext); + reader.initialize(split, taskContext); + count += this.countTuples(reader); + } + Assert.assertEquals(expectedTuples, count); + } + + /** + * Determines whether an input split is valid + * + * @param split + * Input split + * @return True if a valid split, false otherwise + * @throws IOException + */ + protected boolean isValidSplit(InputSplit split, Configuration config) throws IOException { + return split instanceof FileSplit; + } + + /** + * Indicates whether inputs can be split, defaults to true + * + * @return Whether inputs can be split + */ + protected boolean canSplitInputs() { + return true; + } + + /** + * Tests for input splitting + * + * @throws IOException + * @throws InterruptedException + * @throws ClassNotFoundException + */ + @Test + public final void split_input_01() throws IOException, InterruptedException, ClassNotFoundException { + Assume.assumeTrue(this.canSplitInputs()); + + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); + this.testSplitInputs(config, new File[] { small }, 100, SMALL_SIZE); + } + + /** + * Tests for input splitting + * + * @throws IOException + * @throws InterruptedException + * @throws ClassNotFoundException + */ + @Test + public final void split_input_02() throws IOException, InterruptedException, ClassNotFoundException { + Assume.assumeTrue(this.canSplitInputs()); + + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + config.setLong(NLineInputFormat.LINES_PER_MAP, 10); + Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); + this.testSplitInputs(config, new File[] { small }, 10, SMALL_SIZE); + } + + /** + * Tests for input splitting + * + * @throws IOException + * @throws InterruptedException + * @throws ClassNotFoundException + */ + @Test + public final void split_input_03() throws IOException, InterruptedException, ClassNotFoundException { + Assume.assumeTrue(this.canSplitInputs()); + + Configuration config = this.prepareConfiguration(); + config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false); + config.setLong(NLineInputFormat.LINES_PER_MAP, 100); + Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE)); + this.testSplitInputs(config, new File[] { large }, 100, LARGE_SIZE); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java new file mode 100644 index 0000000..78d7f33 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Abstract tests for Quad input formats + * + * + */ +public abstract class AbstractQuadsInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { + + private static final Charset utf8 = Charset.forName("utf-8"); + + @Override + protected void generateTuples(OutputStream output, int num) throws IOException { + for (int i = 0; i < num; i++) { + output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n").getBytes(utf8)); + } + output.flush(); + output.close(); + } + + @Override + protected void generateBadTuples(OutputStream output, int num) throws IOException { + for (int i = 0; i < num; i++) { + output.write("<http://broken\n".getBytes(utf8)); + } + output.flush(); + output.close(); + } + + @Override + protected void generateMixedTuples(OutputStream output, int num) throws IOException { + boolean bad = false; + for (int i = 0; i < num; i++, bad = !bad) { + if (bad) { + output.write("<http://broken\n".getBytes(utf8)); + } else { + output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n").getBytes(utf8)); + } + } + output.flush(); + output.close(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java new file mode 100644 index 0000000..65a9889 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * Abstract tests for Triple input formats + * + * + * + */ +public abstract class AbstractTriplesInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { + + private static final Charset utf8 = Charset.forName("utf-8"); + + @Override + protected void generateTuples(OutputStream output, int num) throws IOException { + for (int i = 0; i < num; i++) { + output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" .\n").getBytes(utf8)); + } + output.flush(); + output.close(); + } + + @Override + protected void generateBadTuples(OutputStream output, int num) throws IOException { + byte[] junk = "<http://broken\n".getBytes(utf8); + for (int i = 0; i < num; i++) { + output.write(junk); + } + output.flush(); + output.close(); + } + + @Override + protected void generateMixedTuples(OutputStream output, int num) throws IOException { + boolean bad = false; + for (int i = 0; i < num; i++, bad = !bad) { + if (bad) { + output.write("<http://broken\n".getBytes(utf8)); + } else { + output.write(("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" .\n").getBytes(utf8)); + } + } + output.flush(); + output.close(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java new file mode 100644 index 0000000..0b6cfde --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RDFWriterRegistry; + +import com.hp.hpl.jena.query.Dataset; +import com.hp.hpl.jena.query.DatasetFactory; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Abstract tests for Quad input formats + * + * + * + */ +public abstract class AbstractWholeFileQuadInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> { + + private static final Charset utf8 = Charset.forName("utf-8"); + + @Override + protected boolean canSplitInputs() { + return false; + } + + private void writeTuples(Dataset ds, OutputStream output) { + RDFDataMgr.write(output, ds, RDFWriterRegistry.defaultSerialization(this.getRdfLanguage())); + } + + /** + * Gets the RDF language to write out generate tuples in + * + * @return RDF language + */ + protected abstract Lang getRdfLanguage(); + + private void writeGoodTuples(OutputStream output, int num) throws IOException { + Dataset ds = DatasetFactory.createMem(); + Model m = ModelFactory.createDefaultModel(); + Resource currSubj = m.createResource("http://example.org/subjects/0"); + Property predicate = m.createProperty("http://example.org/predicate"); + for (int i = 0; i < num; i++) { + if (i % 100 == 0) { + ds.addNamedModel("http://example.org/graphs/" + (i / 100), m); + m = ModelFactory.createDefaultModel(); + } + if (i % 10 == 0) { + currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); + } + m.add(currSubj, predicate, m.createTypedLiteral(i)); + } + if (!m.isEmpty()) { + ds.addNamedModel("http://example.org/graphs/extra", m); + } + this.writeTuples(ds, output); + } + + @Override + protected final void generateTuples(OutputStream output, int num) throws IOException { + this.writeGoodTuples(output, num); + output.close(); + } + + @Override + protected final void generateMixedTuples(OutputStream output, int num) throws IOException { + // Write good data + this.writeGoodTuples(output, num / 2); + + // Write junk data + byte[] junk = "junk data\n".getBytes(utf8); + for (int i = 0; i < num / 2; i++) { + output.write(junk); + } + + output.flush(); + output.close(); + } + + @Override + protected final void generateBadTuples(OutputStream output, int num) throws IOException { + byte[] junk = "junk data\n".getBytes(utf8); + for (int i = 0; i < num; i++) { + output.write(junk); + } + output.flush(); + output.close(); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java ---------------------------------------------------------------------- diff --git a/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java new file mode 100644 index 0000000..b68d662 --- /dev/null +++ b/jena-hadoop-rdf/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.io.input; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; + +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.Resource; + +/** + * Abstract tests for Triple input formats + * + * + * + */ +public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> { + + private static final Charset utf8 = Charset.forName("utf-8"); + + @Override + protected boolean canSplitInputs() { + return false; + } + + private void writeTuples(Model m, OutputStream output) { + RDFDataMgr.write(output, m, this.getRdfLanguage()); + } + + /** + * Gets the RDF language to write out generate tuples in + * @return RDF language + */ + protected abstract Lang getRdfLanguage(); + + @Override + protected final void generateTuples(OutputStream output, int num) throws IOException { + Model m = ModelFactory.createDefaultModel(); + Resource currSubj = m.createResource("http://example.org/subjects/0"); + Property predicate = m.createProperty("http://example.org/predicate"); + for (int i = 0; i < num; i++) { + if (i % 10 == 0) { + currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); + } + m.add(currSubj, predicate, m.createTypedLiteral(i)); + } + this.writeTuples(m, output); + output.close(); + } + + @Override + protected final void generateMixedTuples(OutputStream output, int num) throws IOException { + // Write good data + Model m = ModelFactory.createDefaultModel(); + Resource currSubj = m.createResource("http://example.org/subjects/0"); + Property predicate = m.createProperty("http://example.org/predicate"); + for (int i = 0; i < num / 2; i++) { + if (i % 10 == 0) { + currSubj = m.createResource("http://example.org/subjects/" + (i / 10)); + } + m.add(currSubj, predicate, m.createTypedLiteral(i)); + } + this.writeTuples(m, output); + + // Write junk data + byte[] junk = "junk data\n".getBytes(utf8); + for (int i = 0; i < num / 2; i++) { + output.write(junk); + } + + output.flush(); + output.close(); + } + + @Override + protected final void generateBadTuples(OutputStream output, int num) throws IOException { + byte[] junk = "junk data\n".getBytes(utf8); + for (int i = 0; i < num; i++) { + output.write(junk); + } + output.flush(); + output.close(); + } +}
