http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java ---------------------------------------------------------------------- diff --git a/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java b/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java deleted file mode 100644 index b138292..0000000 --- a/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java +++ /dev/null @@ -1,119 +0,0 @@ -package mvm.rya.accumulo.mr.upgrade; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import mvm.rya.api.resolver.impl.*; -import org.junit.Test; - -import static mvm.rya.accumulo.mr.upgrade.Upgrade322Tool.UpgradeObjectSerialization; -import static org.junit.Assert.*; - -public class UpgradeObjectSerializationTest { - - @Test - public void testBooleanUpgrade() throws Exception { - String object = "true"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, BooleanRyaTypeResolver.BOOLEAN_LITERAL_MARKER); - - assertEquals("1", upgrade); - } - - @Test - public void testBooleanUpgradeFalse() throws Exception { - String object = "false"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, BooleanRyaTypeResolver.BOOLEAN_LITERAL_MARKER); - - assertEquals("0", upgrade); - } - - @Test - public void testByteUpgradeLowest() throws Exception { - String object = "-127"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, ByteRyaTypeResolver.LITERAL_MARKER); - - assertEquals("81", upgrade); - } - - @Test - public void testByteUpgradeHighest() throws Exception { - String object = "127"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, ByteRyaTypeResolver.LITERAL_MARKER); - - assertEquals("7f", upgrade); - } - - @Test - public void testLongUpgrade() throws Exception { - String object = "00000000000000000010"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, LongRyaTypeResolver.LONG_LITERAL_MARKER); - - assertEquals("800000000000000a", upgrade); - } - - @Test - public void testIntUpgrade() throws Exception { - String object = "00000000010"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, IntegerRyaTypeResolver.INTEGER_LITERAL_MARKER); - - assertEquals("8000000a", upgrade); - } - - @Test - public void testDateTimeUpgrade() throws Exception { - String object = "9223370726404375807"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, DateTimeRyaTypeResolver.DATETIME_LITERAL_MARKER); - - assertEquals("800001311cee3b00", upgrade); - } - - @Test - public void testDoubleUpgrade() throws Exception { - String object = "00001 1.0"; - final UpgradeObjectSerialization upgradeObjectSerialization - = new UpgradeObjectSerialization(); - final String upgrade = upgradeObjectSerialization - .upgrade(object, DoubleRyaTypeResolver.DOUBLE_LITERAL_MARKER); - - assertEquals("c024000000000000", upgrade); - } -}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/dao/accumulo.rya/src/test/resources/namedgraphs.trig ---------------------------------------------------------------------- diff --git a/dao/accumulo.rya/src/test/resources/namedgraphs.trig b/dao/accumulo.rya/src/test/resources/namedgraphs.trig deleted file mode 100644 index b647632..0000000 --- a/dao/accumulo.rya/src/test/resources/namedgraphs.trig +++ /dev/null @@ -1,7 +0,0 @@ -@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . -@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . -@prefix swp: <http://www.w3.org/2004/03/trix/swp-1/> . -@prefix dc: <http://purl.org/dc/elements/1.1/> . -@prefix ex: <http://www.example.org/vocabulary#> . -@prefix : <http://www.example.org/exampleDocument#> . -:G1 { :Monica ex:name "Monica Murphy" . } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/dao/accumulo.rya/src/test/resources/test.ntriples ---------------------------------------------------------------------- diff --git a/dao/accumulo.rya/src/test/resources/test.ntriples b/dao/accumulo.rya/src/test/resources/test.ntriples deleted file mode 100644 index 26a0a17..0000000 --- a/dao/accumulo.rya/src/test/resources/test.ntriples +++ /dev/null @@ -1 +0,0 @@ -<urn:lubm:rdfts#GraduateStudent01> <urn:lubm:rdfts#hasFriend> <urn:lubm:rdfts#GraduateStudent02> . \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java deleted file mode 100644 index 147050f..0000000 --- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java +++ /dev/null @@ -1,102 +0,0 @@ -package mvm.rya.accumulo.mr; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import info.aduna.iteration.CloseableIteration; - -import java.io.IOException; -import java.util.Set; - -import mvm.rya.accumulo.experimental.AbstractAccumuloIndexer; -import mvm.rya.api.RdfCloudTripleStoreConfiguration; -import mvm.rya.api.domain.RyaStatement; -import mvm.rya.indexing.FreeTextIndexer; -import mvm.rya.indexing.StatementConstraints; - -import org.apache.accumulo.core.client.Connector; -import org.apache.hadoop.conf.Configuration; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.query.QueryEvaluationException; - -public class NullFreeTextIndexer extends AbstractAccumuloIndexer implements FreeTextIndexer { - - @Override - public String getTableName() { - return null; - } - - @Override - public void storeStatement(RyaStatement statement) throws IOException { - } - - @Override - public Configuration getConf() { - return null; - } - - @Override - public void setConf(Configuration arg0) { - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryText(String query, StatementConstraints contraints) - throws IOException { - return null; - } - - @Override - public Set<URI> getIndexablePredicates() { - return null; - } - - @Override - public void init() { - // TODO Auto-generated method stub - - } - - @Override - public void setConnector(Connector connector) { - // TODO Auto-generated method stub - - } - - @Override - public void destroy() { - // TODO Auto-generated method stub - - } - - @Override - public void purge(RdfCloudTripleStoreConfiguration configuration) { - // TODO Auto-generated method stub - - } - - @Override - public void dropAndDestroy() { - // TODO Auto-generated method stub - - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java deleted file mode 100644 index fe26f6f..0000000 --- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java +++ /dev/null @@ -1,153 +0,0 @@ -package mvm.rya.accumulo.mr; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import info.aduna.iteration.CloseableIteration; - -import java.io.IOException; -import java.util.Set; - -import mvm.rya.accumulo.experimental.AbstractAccumuloIndexer; -import mvm.rya.api.RdfCloudTripleStoreConfiguration; -import mvm.rya.api.domain.RyaStatement; -import mvm.rya.indexing.GeoIndexer; -import mvm.rya.indexing.StatementConstraints; - -import org.apache.accumulo.core.client.Connector; -import org.apache.hadoop.conf.Configuration; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.query.QueryEvaluationException; - -import com.vividsolutions.jts.geom.Geometry; - -public class NullGeoIndexer extends AbstractAccumuloIndexer implements GeoIndexer { - - @Override - public String getTableName() { - - return null; - } - - @Override - public void storeStatement(RyaStatement statement) throws IOException { - - - } - - @Override - public Configuration getConf() { - - return null; - } - - @Override - public void setConf(Configuration arg0) { - - - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryEquals(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryDisjoint(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryIntersects(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryTouches(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryCrosses(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryWithin(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryContains(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryOverlaps(Geometry query, StatementConstraints contraints) { - - return null; - } - - @Override - public Set<URI> getIndexablePredicates() { - - return null; - } - - @Override - public void init() { - // TODO Auto-generated method stub - - } - - @Override - public void setConnector(Connector connector) { - // TODO Auto-generated method stub - - } - - @Override - public void destroy() { - // TODO Auto-generated method stub - - } - - @Override - public void purge(RdfCloudTripleStoreConfiguration configuration) { - // TODO Auto-generated method stub - - } - - @Override - public void dropAndDestroy() { - // TODO Auto-generated method stub - - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java deleted file mode 100644 index cbe36b8..0000000 --- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java +++ /dev/null @@ -1,186 +0,0 @@ -package mvm.rya.accumulo.mr; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import info.aduna.iteration.CloseableIteration; - -import java.io.IOException; -import java.util.Collection; -import java.util.Set; - -import mvm.rya.accumulo.experimental.AbstractAccumuloIndexer; -import mvm.rya.api.RdfCloudTripleStoreConfiguration; -import mvm.rya.api.domain.RyaStatement; -import mvm.rya.api.domain.RyaURI; -import mvm.rya.indexing.StatementConstraints; -import mvm.rya.indexing.TemporalIndexer; -import mvm.rya.indexing.TemporalInstant; -import mvm.rya.indexing.TemporalInterval; - -import org.apache.accumulo.core.client.Connector; -import org.apache.hadoop.conf.Configuration; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.query.QueryEvaluationException; - -/** - * Temporal Indexer that does nothing, like when disabled. - * - */ -public class NullTemporalIndexer extends AbstractAccumuloIndexer implements TemporalIndexer { - - @Override - public String getTableName() { - - return null; - } - - @Override - public void storeStatement(RyaStatement statement) throws IOException { - - - } - - @Override - public Configuration getConf() { - - return null; - } - - @Override - public void setConf(Configuration arg0) { - - - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantEqualsInstant(TemporalInstant queryInstant, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInstant(TemporalInstant queryInstant, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInstant(TemporalInstant queryInstant, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInterval(TemporalInterval givenInterval, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInterval(TemporalInterval givenInterval, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantInsideInterval(TemporalInterval givenInterval, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantHasBeginningInterval(TemporalInterval queryInterval, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryInstantHasEndInterval(TemporalInterval queryInterval, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryIntervalEquals(TemporalInterval query, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryIntervalBefore(TemporalInterval query, - StatementConstraints contraints) throws QueryEvaluationException { - - return null; - } - - @Override - public CloseableIteration<Statement, QueryEvaluationException> queryIntervalAfter(TemporalInterval query, StatementConstraints contraints) - throws QueryEvaluationException { - - return null; - } - - @Override - public Set<URI> getIndexablePredicates() { - - return null; - } - - @Override - public void init() { - // TODO Auto-generated method stub - - } - - @Override - public void setConnector(Connector connector) { - // TODO Auto-generated method stub - - } - - @Override - public void destroy() { - // TODO Auto-generated method stub - - } - - @Override - public void purge(RdfCloudTripleStoreConfiguration configuration) { - // TODO Auto-generated method stub - - } - - @Override - public void dropAndDestroy() { - // TODO Auto-generated method stub - - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java deleted file mode 100644 index 7e690f4..0000000 --- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java +++ /dev/null @@ -1,329 +0,0 @@ -package mvm.rya.accumulo.mr; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import java.io.Closeable; -import java.io.Flushable; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; - -import mvm.rya.accumulo.AccumuloRdfConfiguration; -import mvm.rya.accumulo.AccumuloRyaDAO; -import mvm.rya.accumulo.mr.utils.MRUtils; -import mvm.rya.api.domain.RyaStatement; -import mvm.rya.api.persist.RyaDAOException; -import mvm.rya.api.resolver.RdfToRyaConversions; -import mvm.rya.indexing.FreeTextIndexer; -import mvm.rya.indexing.GeoIndexer; -import mvm.rya.indexing.StatementSerializer; -import mvm.rya.indexing.TemporalIndexer; -import mvm.rya.indexing.accumulo.ConfigUtils; -import mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer; -import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; -import mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.accumulo.core.client.Connector; -import org.apache.accumulo.core.client.TableExistsException; -import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.data.Mutation; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; -import org.apache.log4j.Logger; -import org.geotools.feature.SchemaException; -import org.openrdf.model.Statement; - -/** - * Hadoop Map/Reduce class to use Rya, the {@link GeoIndexer}, the {@link FreeTextIndexer}, and the {@link TemporalIndexer} as the sink of {@link Statement} data. - * wrapped in an {@link StatementWritable} objects. This {@link OutputFormat} ignores the Keys and only writes the Values to Rya. - * - * The user must specify connection parameters for Rya, {@link GeoIndexer}, {@link FreeTextIndexer}, and {@link TemporalIndexer}. - */ -public class RyaOutputFormat extends OutputFormat<Writable, StatementWritable> { - private static final Logger logger = Logger.getLogger(RyaOutputFormat.class); - - private static final String PREFIX = RyaOutputFormat.class.getSimpleName(); - private static final String MAX_MUTATION_BUFFER_SIZE = PREFIX + ".maxmemory"; - private static final String ENABLE_FREETEXT = PREFIX + ".freetext.enable"; - private static final String ENABLE_GEO = PREFIX + ".geo.enable"; - private static final String ENABLE_TEMPORAL = PREFIX + ".temporal.enable";; - - - @Override - public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException { - Configuration conf = jobContext.getConfiguration(); - - // make sure that all of the indexers can connect - getGeoIndexer(conf); - getFreeTextIndexer(conf); - getTemporalIndexer(conf); - getRyaIndexer(conf); - } - - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - // copied from AccumuloOutputFormat - return new NullOutputFormat<Text, Mutation>().getOutputCommitter(context); - } - - @Override - public RecordWriter<Writable, StatementWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - return new RyaRecordWriter(context); - } - - private static GeoIndexer getGeoIndexer(Configuration conf) throws IOException { - if (!conf.getBoolean(ENABLE_GEO, true)) { - return new NullGeoIndexer(); - } - - GeoMesaGeoIndexer geo = new GeoMesaGeoIndexer(); - geo.setConf(conf); - return geo; - - } - - private static FreeTextIndexer getFreeTextIndexer(Configuration conf) throws IOException { - if (!conf.getBoolean(ENABLE_FREETEXT, true)) { - return new NullFreeTextIndexer(); - } - - AccumuloFreeTextIndexer freeText = new AccumuloFreeTextIndexer(); - freeText.setConf(conf); - return freeText; - - } - - private static TemporalIndexer getTemporalIndexer(Configuration conf) throws IOException { - if (!conf.getBoolean(ENABLE_TEMPORAL, true)) { - return new NullTemporalIndexer(); - } - AccumuloTemporalIndexer temporal = new AccumuloTemporalIndexer(); - temporal.setConf(conf); - return temporal; - } - - private static AccumuloRyaDAO getRyaIndexer(Configuration conf) throws IOException { - try { - AccumuloRyaDAO ryaIndexer = new AccumuloRyaDAO(); - Connector conn = ConfigUtils.getConnector(conf); - ryaIndexer.setConnector(conn); - - AccumuloRdfConfiguration ryaConf = new AccumuloRdfConfiguration(); - - String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); - if (tablePrefix != null) { - ryaConf.setTablePrefix(tablePrefix); - } - ryaConf.setDisplayQueryPlan(false); - ryaIndexer.setConf(ryaConf); - ryaIndexer.init(); - return ryaIndexer; - } catch (AccumuloException e) { - logger.error("Cannot create RyaIndexer", e); - throw new IOException(e); - } catch (AccumuloSecurityException e) { - logger.error("Cannot create RyaIndexer", e); - throw new IOException(e); - } catch (RyaDAOException e) { - logger.error("Cannot create RyaIndexer", e); - throw new IOException(e); - } - } - - public static class RyaRecordWriter extends RecordWriter<Writable, StatementWritable> implements Closeable, Flushable { - private static final Logger logger = Logger.getLogger(RyaRecordWriter.class); - - private FreeTextIndexer freeTextIndexer; - private GeoIndexer geoIndexer; - private TemporalIndexer temporalIndexer; - private AccumuloRyaDAO ryaIndexer; - - private static final long ONE_MEGABYTE = 1024L * 1024L; - private static final long AVE_STATEMENT_SIZE = 100L; - - private long bufferSizeLimit; - private long bufferCurrentSize = 0; - - private ArrayList<RyaStatement> buffer; - - public RyaRecordWriter(TaskAttemptContext context) throws IOException { - this(context.getConfiguration()); - } - - public RyaRecordWriter(Configuration conf) throws IOException { - // set up the buffer - bufferSizeLimit = conf.getLong(MAX_MUTATION_BUFFER_SIZE, ONE_MEGABYTE); - int bufferCapacity = (int) (bufferSizeLimit / AVE_STATEMENT_SIZE); - buffer = new ArrayList<RyaStatement>(bufferCapacity); - - // set up the indexers - freeTextIndexer = getFreeTextIndexer(conf); - geoIndexer = getGeoIndexer(conf); - temporalIndexer = getTemporalIndexer(conf); - ryaIndexer = getRyaIndexer(conf); - - // update fields used for metrics - startTime = System.currentTimeMillis(); - lastCommitFinishTime = startTime; - } - - @Override - public void flush() throws IOException { - flushBuffer(); - } - - @Override - public void close() throws IOException { - close(null); - } - - @Override - public void close(TaskAttemptContext paramTaskAttemptContext) throws IOException { - // close everything. log errors - try { - flush(); - } catch (IOException e) { - logger.error("Error flushing the buffer on RyaOutputFormat Close", e); - } - try { - if (geoIndexer != null) - geoIndexer.close(); - } catch (IOException e) { - logger.error("Error closing the geoIndexer on RyaOutputFormat Close", e); - } - try { - if (freeTextIndexer != null) - freeTextIndexer.close(); - } catch (IOException e) { - logger.error("Error closing the freetextIndexer on RyaOutputFormat Close", e); - } - try { - if (temporalIndexer != null) - temporalIndexer.close(); - } catch (IOException e) { - logger.error("Error closing the temporalIndexer on RyaOutputFormat Close", e); - } - try { - ryaIndexer.destroy(); - } catch (RyaDAOException e) { - logger.error("Error closing RyaDAO on RyaOutputFormat Close", e); - } - } - - public void write(Statement statement) throws IOException, InterruptedException { - write(null, new StatementWritable(statement)); - } - - @Override - public void write(Writable key, StatementWritable value) throws IOException, InterruptedException { - buffer.add(RdfToRyaConversions.convertStatement(value)); - - bufferCurrentSize += StatementSerializer.writeStatement(value).length(); - - if (bufferCurrentSize >= bufferSizeLimit) { - flushBuffer(); - } - } - - // fields for storing metrics - private long startTime = 0; - private long lastCommitFinishTime = 0; - private long totalCommitRecords = 0; - - private double totalReadDuration = 0; - private double totalWriteDuration = 0; - - private long commitCount = 0; - - private void flushBuffer() throws IOException { - totalCommitRecords += buffer.size(); - commitCount++; - - long startCommitTime = System.currentTimeMillis(); - - logger.info(String.format("(C-%d) Flushing buffer with %,d objects and %,d bytes", commitCount, buffer.size(), - bufferCurrentSize)); - - double readingDuration = (startCommitTime - lastCommitFinishTime) / 1000.; - totalReadDuration += readingDuration; - double currentReadRate = buffer.size() / readingDuration; - double totalReadRate = totalCommitRecords / totalReadDuration; - - // Print "reading" metrics - logger.info(String.format("(C-%d) (Reading) Duration, Current Rate, Total Rate: %.2f %.2f %.2f ", commitCount, readingDuration, - currentReadRate, totalReadRate)); - - // write to geo - geoIndexer.storeStatements(buffer); - geoIndexer.flush(); - - // write to free text - freeTextIndexer.storeStatements(buffer); - freeTextIndexer.flush(); - - // write to temporal - temporalIndexer.storeStatements(buffer); - temporalIndexer.flush(); - - // write to rya - try { - ryaIndexer.add(buffer.iterator()); - } catch (RyaDAOException e) { - logger.error("Cannot writing statement to Rya", e); - throw new IOException(e); - } - - lastCommitFinishTime = System.currentTimeMillis(); - - double writingDuration = (lastCommitFinishTime - startCommitTime) / 1000.; - totalWriteDuration += writingDuration; - double currentWriteRate = buffer.size() / writingDuration; - double totalWriteRate = totalCommitRecords / totalWriteDuration; - - // Print "writing" stats - logger.info(String.format("(C-%d) (Writing) Duration, Current Rate, Total Rate: %.2f %.2f %.2f ", commitCount, writingDuration, - currentWriteRate, totalWriteRate)); - - double processDuration = writingDuration + readingDuration; - double totalProcessDuration = totalWriteDuration + totalReadDuration; - double currentProcessRate = buffer.size() / processDuration; - double totalProcessRate = totalCommitRecords / (totalProcessDuration); - - // Print "total" stats - logger.info(String.format("(C-%d) (Total) Duration, Current Rate, Total Rate: %.2f %.2f %.2f ", commitCount, processDuration, - currentProcessRate, totalProcessRate)); - - // clear the buffer - buffer.clear(); - bufferCurrentSize = 0L; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java deleted file mode 100644 index 629baf2..0000000 --- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java +++ /dev/null @@ -1,86 +0,0 @@ -package mvm.rya.accumulo.mr; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.io.Writable; -import org.openrdf.model.Resource; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.model.Value; - -import mvm.rya.indexing.StatementSerializer; - -/** - * A {@link Writable} wrapper for {@link Statement} objects. - */ -@SuppressWarnings("serial") -public class StatementWritable implements Statement, Writable { - - private Statement statement; - - public StatementWritable(Statement statement) { - setStatement(statement); - } - - public void setStatement(Statement statement) { - this.statement = statement; - } - - public Statement getStatement() { - return statement; - } - - @Override - public void readFields(DataInput paramDataInput) throws IOException { - statement = StatementSerializer.readStatement(paramDataInput.readUTF()); - } - - @Override - public void write(DataOutput paramDataOutput) throws IOException { - paramDataOutput.writeUTF(StatementSerializer.writeStatement(statement)); - } - - @Override - public Resource getSubject() { - return statement.getSubject(); - } - - @Override - public URI getPredicate() { - return statement.getPredicate(); - } - - @Override - public Value getObject() { - return statement.getObject(); - } - - @Override - public Resource getContext() { - return statement.getContext(); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java deleted file mode 100644 index ecc2354..0000000 --- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java +++ /dev/null @@ -1,227 +0,0 @@ -package mvm.rya.accumulo.mr.fileinput; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import static com.google.common.base.Preconditions.checkNotNull; - -import java.io.IOException; -import java.io.StringReader; - -import mvm.rya.accumulo.AccumuloRdfConfiguration; -import mvm.rya.accumulo.mr.utils.MRUtils; -import mvm.rya.api.resolver.RdfToRyaConversions; -import mvm.rya.indexing.FreeTextIndexer; -import mvm.rya.indexing.GeoIndexer; -import mvm.rya.indexing.accumulo.ConfigUtils; -import mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer; -import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.accumulo.core.client.TableExistsException; -import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Logger; -import org.geotools.feature.SchemaException; -import org.openrdf.model.Resource; -import org.openrdf.model.Statement; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ContextStatementImpl; -import org.openrdf.model.impl.ValueFactoryImpl; -import org.openrdf.rio.ParserConfig; -import org.openrdf.rio.RDFFormat; -import org.openrdf.rio.RDFHandlerException; -import org.openrdf.rio.RDFParseException; -import org.openrdf.rio.RDFParser; -import org.openrdf.rio.Rio; -import org.openrdf.rio.helpers.RDFHandlerBase; - -import com.google.common.base.Preconditions; - -/** - * Take large ntrips files and use MapReduce to ingest into other indexing - */ -public class BulkNtripsInputToolIndexing extends Configured implements Tool { - - private String userName = null; - private String pwd = null; - private String instance = null; - private String zk = null; - - private String format = RDFFormat.NTRIPLES.getName(); - - @Override - public int run(final String[] args) throws Exception { - final Configuration conf = getConf(); - // conf - zk = conf.get(MRUtils.AC_ZK_PROP, zk); - instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance); - userName = conf.get(MRUtils.AC_USERNAME_PROP, userName); - pwd = conf.get(MRUtils.AC_PWD_PROP, pwd); - format = conf.get(MRUtils.FORMAT_PROP, format); - - String auths = conf.get(MRUtils.AC_CV_PROP, ""); - - conf.set(MRUtils.FORMAT_PROP, format); - Preconditions.checkNotNull(zk, MRUtils.AC_ZK_PROP + " not set"); - Preconditions.checkNotNull(instance, MRUtils.AC_INSTANCE_PROP + " not set"); - Preconditions.checkNotNull(userName, MRUtils.AC_USERNAME_PROP + " not set"); - Preconditions.checkNotNull(pwd, MRUtils.AC_PWD_PROP + " not set"); - - // map the config values to free text configu values - conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); - conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance); - conf.set(ConfigUtils.CLOUDBASE_USER, userName); - conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd); - conf.set(ConfigUtils.CLOUDBASE_AUTHS, auths); - - final String inputDir = args[0]; - - String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); - Preconditions.checkNotNull(tablePrefix, MRUtils.TABLE_PREFIX_PROPERTY + " not set"); - - String docTextTable = tablePrefix + "text"; - conf.set(ConfigUtils.FREE_TEXT_DOC_TABLENAME, docTextTable); - - String docTermTable = tablePrefix + "terms"; - conf.set(ConfigUtils.FREE_TEXT_TERM_TABLENAME, docTermTable); - - String geoTable = tablePrefix + "geo"; - conf.set(ConfigUtils.GEO_TABLENAME, geoTable); - - System.out.println("Loading data into tables[freetext, geo]"); - System.out.println("Loading data into tables[" + docTermTable + " " + docTextTable + " " + geoTable + "]"); - - Job job = new Job(new Configuration(conf), "Bulk Ingest load data into Indexing Tables"); - job.setJarByClass(this.getClass()); - - // setting long job - Configuration jobConf = job.getConfiguration(); - jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); - jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false); - jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256")); - jobConf.setBoolean("mapred.compress.map.output", true); - - job.setInputFormatClass(TextInputFormat.class); - - job.setMapperClass(ParseNtripsMapper.class); - - // I'm not actually going to write output. - job.setOutputFormatClass(NullOutputFormat.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(Text.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(Text.class); - - TextInputFormat.setInputPaths(job, new Path(inputDir)); - - job.setNumReduceTasks(0); - - job.waitForCompletion(true); - - return 0; - } - - public static void main(String[] args) throws Exception { - ToolRunner.run(new Configuration(), new BulkNtripsInputToolIndexing(), args); - } - - public static class ParseNtripsMapper extends Mapper<LongWritable, Text, Text, Text> { - private static final Logger logger = Logger.getLogger(ParseNtripsMapper.class); - - public static final String TABLE_PROPERTY = "parsentripsmapper.table"; - - private RDFParser parser; - private FreeTextIndexer freeTextIndexer; - private GeoIndexer geoIndexer; - private String rdfFormat; - - @Override - protected void setup(final Context context) throws IOException, InterruptedException { - super.setup(context); - Configuration conf = context.getConfiguration(); - - freeTextIndexer = new AccumuloFreeTextIndexer(); - freeTextIndexer.setConf(conf); - geoIndexer = new GeoMesaGeoIndexer(); - geoIndexer.setConf(conf); - final ValueFactory vf = new ValueFactoryImpl(); - - rdfFormat = conf.get(MRUtils.FORMAT_PROP); - checkNotNull(rdfFormat, "Rdf format cannot be null"); - - String namedGraphString = conf.get(MRUtils.NAMED_GRAPH_PROP); - checkNotNull(namedGraphString, MRUtils.NAMED_GRAPH_PROP + " cannot be null"); - - final Resource namedGraph = vf.createURI(namedGraphString); - - parser = Rio.createParser(RDFFormat.valueOf(rdfFormat)); - parser.setParserConfig(new ParserConfig(true, true, true, RDFParser.DatatypeHandling.VERIFY)); - parser.setRDFHandler(new RDFHandlerBase() { - - @Override - public void handleStatement(Statement statement) throws RDFHandlerException { - Statement contextStatement = new ContextStatementImpl(statement.getSubject(), statement - .getPredicate(), statement.getObject(), namedGraph); - try { - freeTextIndexer.storeStatement(RdfToRyaConversions.convertStatement(contextStatement)); - geoIndexer.storeStatement(RdfToRyaConversions.convertStatement(contextStatement)); - } catch (IOException e) { - logger.error("Error creating indexers", e); - } - } - }); - } - - @Override - public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException { - String rdf = value.toString(); - try { - parser.parse(new StringReader(rdf), ""); - } catch (RDFParseException e) { - System.out.println("Line[" + rdf + "] cannot be formatted with format[" + rdfFormat + "]. Exception[" + e.getMessage() - + "]"); - } catch (Exception e) { - logger.error("error during map", e); - throw new IOException("Exception occurred parsing triple[" + rdf + "]"); - } - } - - @Override - public void cleanup(Context context) { - IOUtils.closeStream(freeTextIndexer); - IOUtils.closeStream(geoIndexer); - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java deleted file mode 100644 index fb80804..0000000 --- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java +++ /dev/null @@ -1,243 +0,0 @@ -package mvm.rya.accumulo.mr.fileinput; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import static com.google.common.base.Preconditions.checkNotNull; - -import java.io.IOException; -import java.io.StringReader; - -import mvm.rya.accumulo.mr.RyaOutputFormat; -import mvm.rya.accumulo.mr.StatementWritable; -import mvm.rya.accumulo.mr.utils.MRUtils; -import mvm.rya.indexing.accumulo.ConfigUtils; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Logger; -import org.openrdf.model.Resource; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ValueFactoryImpl; -import org.openrdf.rio.ParserConfig; -import org.openrdf.rio.RDFFormat; -import org.openrdf.rio.RDFHandlerException; -import org.openrdf.rio.RDFParseException; -import org.openrdf.rio.RDFParser; -import org.openrdf.rio.Rio; -import org.openrdf.rio.helpers.RDFHandlerBase; - -import com.google.common.base.Preconditions; - -/** - * Take large ntrips files and use MapReduce to ingest into other indexing - */ -public class RyaBatchWriterInputTool extends Configured implements Tool { - private static final Logger logger = Logger.getLogger(RyaBatchWriterInputTool.class); - - @Override - public int run(final String[] args) throws Exception { - String userName = null; - String pwd = null; - String instance = null; - String zk = null; - String format = null; - - final Configuration conf = getConf(); - // conf - zk = conf.get(MRUtils.AC_ZK_PROP, zk); - instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance); - userName = conf.get(MRUtils.AC_USERNAME_PROP, userName); - pwd = conf.get(MRUtils.AC_PWD_PROP, pwd); - format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.NTRIPLES.getName()); - - String auths = conf.get(MRUtils.AC_CV_PROP, ""); - - conf.set(MRUtils.FORMAT_PROP, format); - Preconditions.checkNotNull(zk, MRUtils.AC_ZK_PROP + " not set"); - Preconditions.checkNotNull(instance, MRUtils.AC_INSTANCE_PROP + " not set"); - Preconditions.checkNotNull(userName, MRUtils.AC_USERNAME_PROP + " not set"); - Preconditions.checkNotNull(pwd, MRUtils.AC_PWD_PROP + " not set"); - - // map the config values to free text configure values - conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); - conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance); - conf.set(ConfigUtils.CLOUDBASE_USER, userName); - conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd); - conf.set(ConfigUtils.CLOUDBASE_AUTHS, auths); - - final String inputDir = args[0]; - - String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); - Preconditions.checkNotNull(tablePrefix, MRUtils.TABLE_PREFIX_PROPERTY + " not set"); - - String docTextTable = tablePrefix + "text"; - conf.set(ConfigUtils.FREE_TEXT_DOC_TABLENAME, docTextTable); - - String docTermTable = tablePrefix + "terms"; - conf.set(ConfigUtils.FREE_TEXT_TERM_TABLENAME, docTermTable); - - String geoTable = tablePrefix + "geo"; - conf.set(ConfigUtils.GEO_TABLENAME, geoTable); - - logger.info("Loading data into tables[rya, freetext, geo]"); - logger.info("Loading data into tables[" + docTermTable + " " + docTextTable + " " + geoTable + "]"); - - Job job = new Job(new Configuration(conf), "Batch Writer load data into Rya Core and Indexing Tables"); - job.setJarByClass(this.getClass()); - - // setting long job - Configuration jobConf = job.getConfiguration(); - jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); - - jobConf.setInt("mapred.task.timeout", 1000 * 60 * 60 * 24); // timeout after 1 day - - job.setInputFormatClass(TextInputFormat.class); - - job.setMapperClass(ParseNtripsMapper.class); - - job.setNumReduceTasks(0); - - // Use Rya Output Format - job.setOutputFormatClass(RyaOutputFormat.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(StatementWritable.class); - job.setMapOutputKeyClass(NullWritable.class); - job.setMapOutputValueClass(StatementWritable.class); - - TextInputFormat.setInputPaths(job, new Path(inputDir)); - - job.waitForCompletion(true); - - return 0; - } - - public static void main(String[] args) throws Exception { - ToolRunner.run(new Configuration(), new RyaBatchWriterInputTool(), args); - } - - public static class ParseNtripsMapper extends Mapper<LongWritable, Text, Writable, Statement> { - private static final Logger logger = Logger.getLogger(ParseNtripsMapper.class); - - private RDFParser parser; - private RDFFormat rdfFormat; - - @Override - protected void setup(final Context context) throws IOException, InterruptedException { - super.setup(context); - Configuration conf = context.getConfiguration(); - - final ValueFactory vf = new ValueFactoryImpl(); - - String rdfFormatName = conf.get(MRUtils.FORMAT_PROP); - checkNotNull(rdfFormatName, "Rdf format cannot be null"); - rdfFormat = RDFFormat.valueOf(rdfFormatName); - - String namedGraphString = conf.get(MRUtils.NAMED_GRAPH_PROP); - checkNotNull(namedGraphString, MRUtils.NAMED_GRAPH_PROP + " cannot be null"); - - final Resource namedGraph = vf.createURI(namedGraphString); - - parser = Rio.createParser(rdfFormat); - parser.setParserConfig(new ParserConfig(true, true, true, RDFParser.DatatypeHandling.VERIFY)); - parser.setRDFHandler(new RDFHandlerBase() { - @Override - public void handleStatement(Statement statement) throws RDFHandlerException { - Statement output; - if (rdfFormat.equals(RDFFormat.NTRIPLES)) { - output = new ConextStatementWrapper(statement, namedGraph); - } else { - output = statement; - } - try { - context.write(NullWritable.get(), new StatementWritable(output)); - } catch (IOException e) { - logger.error("Error writing statement", e); - throw new RDFHandlerException(e); - } catch (InterruptedException e) { - logger.error("Error writing statement", e); - throw new RDFHandlerException(e); - } - } - - }); - } - - @Override - public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException { - String rdf = value.toString(); - try { - parser.parse(new StringReader(rdf), ""); - } catch (RDFParseException e) { - logger.error("Line[" + rdf + "] cannot be formatted with format[" + rdfFormat + "]. Exception[" + e.getMessage() - + "]", e); - } catch (Exception e) { - logger.error("error during map", e); - throw new IOException("Exception occurred parsing triple[" + rdf + "]", e); - } - } - } - - @SuppressWarnings("serial") - private static class ConextStatementWrapper implements Statement { - private Statement statementWithoutConext; - private Resource context; - - public ConextStatementWrapper(Statement statementWithoutConext, Resource context) { - this.statementWithoutConext = statementWithoutConext; - this.context = context; - } - - @Override - public Resource getSubject() { - return statementWithoutConext.getSubject(); - } - - @Override - public URI getPredicate() { - return statementWithoutConext.getPredicate(); - } - - @Override - public Value getObject() { - return statementWithoutConext.getObject(); - } - - @Override - public Resource getContext() { - return context; - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/_index.md ---------------------------------------------------------------------- diff --git a/extras/rya.manual/src/site/markdown/_index.md b/extras/rya.manual/src/site/markdown/_index.md index bf030a3..6e45779 100644 --- a/extras/rya.manual/src/site/markdown/_index.md +++ b/extras/rya.manual/src/site/markdown/_index.md @@ -28,6 +28,7 @@ - [Evaluation Table](eval.md) - [Pre-computed Joins](loadPrecomputedJoin.md) - [Inferencing](infer.md) +- [MapReduce Interface](mapreduce.md) # Samples - [Typical First Steps](sm-firststeps.md) http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/index.md ---------------------------------------------------------------------- diff --git a/extras/rya.manual/src/site/markdown/index.md b/extras/rya.manual/src/site/markdown/index.md index 0748284..2840d10 100644 --- a/extras/rya.manual/src/site/markdown/index.md +++ b/extras/rya.manual/src/site/markdown/index.md @@ -30,6 +30,7 @@ This project contains documentation about the Rya, a scalable RDF triple store o - [Evaluation Table](eval.md) - [Pre-computed Joins](loadPrecomputedJoin.md) - [Inferencing](infer.md) +- [MapReduce Interface](mapreduce.md) # Samples - [Typical First Steps](sm-firststeps.md) http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/loaddata.md ---------------------------------------------------------------------- diff --git a/extras/rya.manual/src/site/markdown/loaddata.md b/extras/rya.manual/src/site/markdown/loaddata.md index 2c6bc00..74fb90b 100644 --- a/extras/rya.manual/src/site/markdown/loaddata.md +++ b/extras/rya.manual/src/site/markdown/loaddata.md @@ -96,10 +96,10 @@ Bulk loading data is done through Map Reduce jobs ### Bulk Load RDF data -This Map Reduce job will read a full file into memory and parse it into statements. The statements are saved into the store. Here is an example for storing in Accumulo: +This Map Reduce job will read files into memory and parse them into statements. The statements are saved into the store. Here is an example for storing in Accumulo: ``` -hadoop jar target/accumulo.rya-3.0.4-SNAPSHOT-shaded.jar mvm.rya.accumulo.mr.fileinput.BulkNtripsInputTool -Dac.zk=localhost:2181 -Dac.instance=accumulo -Dac.username=root -Dac.pwd=secret -Drdf.tablePrefix=triplestore_ -Dio.sort.mb=64 /tmp/temp.ntrips +hadoop jar target/rya.mapreduce-3.2.10-SNAPSHOT-shaded.jar mvm.rya.accumulo.mr.RdfFileInputTool -Dac.zk=localhost:2181 -Dac.instance=accumulo -Dac.username=root -Dac.pwd=secret -Drdf.tablePrefix=triplestore_ -Drdf.format=N-Triples /tmp/temp.ntrips ``` Options: @@ -107,9 +107,14 @@ Options: - rdf.tablePrefix : The tables (spo, po, osp) are prefixed with this qualifier. The tables become: (rdf.tablePrefix)spo,(rdf.tablePrefix)po,(rdf.tablePrefix)osp - ac.* : Accumulo connection parameters - rdf.format : See RDFFormat from openrdf, samples include (Trig, N-Triples, RDF/XML) -- io.sort.mb : Higher the value, the faster the job goes. Just remember that you will need this much ram at least per mapper +- sc.use_freetext, sc.use_geo, sc.use_temporal, sc.use_entity : If any of these are set to true, statements will also be + added to the enabled secondary indices. +- sc.freetext.predicates, sc.geo.predicates, sc.temporal.predicates: If the associated indexer is enabled, these options specify + which statements should be sent to that indexer (based on the predicate). If not given, all indexers will attempt to index + all statements. -The argument is the directory/file to load. This file needs to be loaded into HDFS before running. +The argument is the directory/file to load. This file needs to be loaded into HDFS before running. If loading a directory, all files should have the same RDF +format. ## Direct OpenRDF API @@ -139,4 +144,4 @@ conn.commit(); conn.close(); myRepository.shutDown(); -``` \ No newline at end of file +``` http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/mapreduce.md ---------------------------------------------------------------------- diff --git a/extras/rya.manual/src/site/markdown/mapreduce.md b/extras/rya.manual/src/site/markdown/mapreduce.md new file mode 100644 index 0000000..fde2231 --- /dev/null +++ b/extras/rya.manual/src/site/markdown/mapreduce.md @@ -0,0 +1,107 @@ +<!-- + +[comment]: # Licensed to the Apache Software Foundation (ASF) under one +[comment]: # or more contributor license agreements. See the NOTICE file +[comment]: # distributed with this work for additional information +[comment]: # regarding copyright ownership. The ASF licenses this file +[comment]: # to you under the Apache License, Version 2.0 (the +[comment]: # "License"); you may not use this file except in compliance +[comment]: # with the License. You may obtain a copy of the License at +[comment]: # +[comment]: # http://www.apache.org/licenses/LICENSE-2.0 +[comment]: # +[comment]: # Unless required by applicable law or agreed to in writing, +[comment]: # software distributed under the License is distributed on an +[comment]: # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +[comment]: # KIND, either express or implied. See the License for the +[comment]: # specific language governing permissions and limitations +[comment]: # under the License. + +--> +# MapReduce Interface + +The rya.mapreduce project contains a set of classes facilitating the use of +Accumulo-backed Rya as the input source or output destination of Hadoop +MapReduce jobs. + +## Writable + +*RyaStatementWritable* wraps a statement in a WritableComparable object, so +triples can be used as keys or values in MapReduce tasks. Statements are +considered equal if they contain equivalent triples and equivalent Accumulo +metadata (visibility, timestamp, etc.). + +## Statement Input + +Input formats are provided for reading triple data from Rya or from RDF files: + +- *RdfFileInputFormat* will read and parse RDF files of any format. Format must + be explicitly specified. Reading and parsing is done asynchronously, enabling + large input files depending on how much information the openrdf parser itself + needs to hold in memory in order to parse the file. (For example, large + N-Triples files can be handled easily, but large XML files might require you + to allocate more memory for the Map task.) Handles multiple files if given a + directory as input, as long as all files are the specified format. Files will + only be split if the format is set to N-Triples or N-Quads; otherwise, the + number of input files will be the number of splits. Output pairs are + `<LongWritable, RyaStatementWritable>`, where the former is the number of the + statement within the input split and the latter is the statement itself. + +- *RyaInputFormat* will read statements directly from a Rya table in Accumulo. + Extends Accumulo's AbstractInputFormat and uses that class's configuration + methods to configure the connection to Accumulo. The table scanned should be + one of the Rya core tables (spo, po, or osp), and whichever is used should be + specified using `RyaInputFormat.setTableLayout`, so the input format can + deserialize the statements correctly. Choice of table may influence + parallelization if the tables are split differently in Accumulo. (The number + of splits in Accumulo will be the number of input splits in Hadoop and + therefore the number of Mappers.) Output pairs are + `<Text, RyaStatementWritable>`, where the former is the Accumulo row ID and + the latter is the statement itself. + +## Statement Output + +An output format is provided for writing triple data to Rya: + +- *RyaOutputFormat* will insert statements into the Rya core tables and/or any + configured secondary indexers. Configuration options include: + * Table prefix: identifies Rya instance + * Default visibility: any statement without a visibility set will be written + with this visibility + * Default context: any statement without a context (named graph) set will be + written with this context + * Enable freetext index, geo index, temporal index, entity index, and core + tables: separate options for configuring exactly which indexers to use. + If using secondary indexers, consider providing configuration variables + "sc.freetext.predicates", "sc.geo.predicates", and "sc.temporal.predicates" + as appropriate; otherwise each indexer will attempt to index every + statement. + Expects input pairs `<Writable, RyaStatementWritable>`. Keys are ignored and + values are written to Rya. + +## Configuration + +*MRUtils* defines constant configuration parameter names used for passing +Accumulo connection information, Rya prefix and table layout, RDF format, +etc., as well as some convenience methods for getting and setting these +values with respect to a given Configuration. + +## Base Tool + +*AbstractAccumuloMRTool* can be used as a base class for Rya MapReduce Tools +using the ToolRunner API. It extracts necessary parameters from the +configuration and provides methods for setting input and/or output formats and +configuring them accordingly. To use, extend this class and implement `run`. +In the run method, call `init` to extract and validate configuration values from +the Hadoop Configuration. Then use `setup*(Input/Output)` methods as needed to +configure input and output for MapReduce jobs using the stored parameters. +(Input and output formats can then be configured directly, if necessary.) + +Expects parameters to be specified in the configuration using the names defined +in MRUtils, or for secondary indexers, the names in +`mvm.rya.indexing.accumulo.ConfigUtils`. + +## Examples + +See the `examples` subpackage for examples of how to use the interface, and the +`tools` subpackage for some individual MapReduce applications. http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/sm-firststeps.md ---------------------------------------------------------------------- diff --git a/extras/rya.manual/src/site/markdown/sm-firststeps.md b/extras/rya.manual/src/site/markdown/sm-firststeps.md index 34f995b..431292a 100644 --- a/extras/rya.manual/src/site/markdown/sm-firststeps.md +++ b/extras/rya.manual/src/site/markdown/sm-firststeps.md @@ -42,11 +42,11 @@ See the [Build From Source Section](build-source.md) to get the appropriate arti I find that the best way to load the data is through the Bulk Load Map Reduce job. * Save the RDF Data above onto HDFS. From now on we will refer to this location as `<RDF_HDFS_LOCATION>` -* Move the `accumulo.rya-<version>-job.jar` onto the hadoop cluster +* Move the `rya.mapreduce-<version>-job.jar` onto the hadoop cluster * Bulk load the data. Here is a sample command line: ``` -hadoop jar ../accumulo.rya-2.0.0-SNAPSHOT-job.jar BulkNtripsInputTool -Drdf.tablePrefix=lubm_ -Dcb.username=user -Dcb.pwd=cbpwd -Dcb.instance=instance -Dcb.zk=zookeeperLocation -Drdf.format=N-Triples <RDF_HDFS_LOCATION> +hadoop jar ../rya.mapreduce-3.2.10-SNAPSHOT-job.jar mvm.rya.accumulo.mr.RdfFileInputTool -Drdf.tablePrefix=lubm_ -Dcb.username=user -Dcb.pwd=cbpwd -Dcb.instance=instance -Dcb.zk=zookeeperLocation -Drdf.format=N-Triples <RDF_HDFS_LOCATION> ``` Once the data is loaded, it is actually a good practice to compact your tables. You can do this by opening the accumulo shell `shell` and running the `compact` command on the generated tables. Remember the generated tables will be prefixed by the `rdf.tablePrefix` property you assigned above. The default tablePrefix is `rts`. @@ -77,4 +77,4 @@ This page provides a very simple text box for running queries against the store Remember to update the connection information in the WAR: `WEB-INF/spring/spring-accumulo.xml` -See the [Query data section](querydata.md) for more information. \ No newline at end of file +See the [Query data section](querydata.md) for more information. http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/site.xml ---------------------------------------------------------------------- diff --git a/extras/rya.manual/src/site/site.xml b/extras/rya.manual/src/site/site.xml index a671d3d..a5fab57 100644 --- a/extras/rya.manual/src/site/site.xml +++ b/extras/rya.manual/src/site/site.xml @@ -45,6 +45,7 @@ under the License. <item name="Evaluation Table" href="eval.html"/> <item name="Pre-computed Joins" href="loadPrecomputedJoin.html"/> <item name="Inferencing" href="infer.html"/> + <item name="MapReduce Interface" href="mapreduce.html"/> </menu> <menu name="Samples"> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/pom.xml ---------------------------------------------------------------------- diff --git a/extras/rya.reasoning/pom.xml b/extras/rya.reasoning/pom.xml index b7b7293..bc00404 100644 --- a/extras/rya.reasoning/pom.xml +++ b/extras/rya.reasoning/pom.xml @@ -43,6 +43,10 @@ under the License. <groupId>org.apache.rya</groupId> <artifactId>rya.sail</artifactId> </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.mapreduce</artifactId> + </dependency> <dependency> <groupId>org.apache.hadoop</groupId> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java ---------------------------------------------------------------------- diff --git a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java index dde83c6..09b4a16 100644 --- a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java +++ b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java @@ -22,8 +22,8 @@ package mvm.rya.reasoning.mr; import java.io.IOException; import mvm.rya.accumulo.mr.RyaStatementWritable; -import mvm.rya.accumulo.mr.fileinput.RdfFileInputFormat; -import mvm.rya.accumulo.mr.utils.MRUtils; +import mvm.rya.accumulo.mr.RdfFileInputFormat; +import mvm.rya.accumulo.mr.MRUtils; import mvm.rya.reasoning.Derivation; import mvm.rya.reasoning.Fact; import mvm.rya.reasoning.Schema; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java ---------------------------------------------------------------------- diff --git a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java index 02cce66..0209eff 100644 --- a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java +++ b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java @@ -33,7 +33,7 @@ import java.util.List; import java.util.Map; import java.util.Set; -import mvm.rya.accumulo.mr.utils.MRUtils; +import mvm.rya.accumulo.mr.MRUtils; import mvm.rya.reasoning.Fact; import mvm.rya.reasoning.Schema; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java ---------------------------------------------------------------------- diff --git a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java index 3bed4ca..b306ee8 100644 --- a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java +++ b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java @@ -25,7 +25,7 @@ import java.io.IOException; import mvm.rya.accumulo.AccumuloRdfConfiguration; import mvm.rya.accumulo.AccumuloRdfConstants; import mvm.rya.accumulo.AccumuloRyaDAO; -import mvm.rya.accumulo.mr.utils.MRUtils; +import mvm.rya.accumulo.mr.MRUtils; import mvm.rya.api.RdfCloudTripleStoreConstants; import mvm.rya.api.RdfCloudTripleStoreUtils; import mvm.rya.api.domain.RyaStatement; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/pom.xml ---------------------------------------------------------------------- diff --git a/extras/tinkerpop.rya/pom.xml b/extras/tinkerpop.rya/pom.xml index 9ccb4b1..b92eb96 100644 --- a/extras/tinkerpop.rya/pom.xml +++ b/extras/tinkerpop.rya/pom.xml @@ -39,6 +39,10 @@ under the License. <groupId>org.apache.rya</groupId> <artifactId>accumulo.rya</artifactId> </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.mapreduce</artifactId> + </dependency> <dependency> <groupId>com.tinkerpop.gremlin</groupId> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy ---------------------------------------------------------------------- diff --git a/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy b/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy index fc3419d..a6e906c 100644 --- a/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy +++ b/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy @@ -29,7 +29,7 @@ import mvm.rya.blueprints.sail.RyaSailGraph import mvm.rya.rdftriplestore.RdfCloudTripleStore import mvm.rya.rdftriplestore.inference.InferenceEngine import org.apache.commons.configuration.Configuration -import static mvm.rya.accumulo.mr.utils.MRUtils.* +import static mvm.rya.accumulo.mr.MRUtils.* import org.apache.commons.configuration.MapConfiguration import mvm.rya.blueprints.sail.RyaSailEdge import mvm.rya.blueprints.sail.RyaSailVertex http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy ---------------------------------------------------------------------- diff --git a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy index fe0f4e0..ec8beb8 100644 --- a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy +++ b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy @@ -30,7 +30,7 @@ //import mvm.rya.rdftriplestore.RdfCloudTripleStore //import mvm.rya.rdftriplestore.inference.InferenceEngine //import org.apache.accumulo.core.client.ZooKeeperInstance -//import static mvm.rya.accumulo.mr.utils.MRUtils.* +//import static mvm.rya.accumulo.mr.MRUtils.* //import static mvm.rya.api.RdfCloudTripleStoreConfiguration.CONF_QUERYPLAN_FLAG //import static mvm.rya.api.RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX // http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy ---------------------------------------------------------------------- diff --git a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy index 9dd0627..c4f5dbb 100644 --- a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy +++ b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy @@ -30,7 +30,7 @@ import org.openrdf.model.impl.StatementImpl import org.openrdf.model.impl.ValueFactoryImpl import static mvm.rya.api.RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX -import static mvm.rya.accumulo.mr.utils.MRUtils.* +import static mvm.rya.accumulo.mr.MRUtils.* import org.apache.accumulo.core.security.Authorizations import org.apache.accumulo.core.client.Connector import mvm.rya.accumulo.AccumuloRyaDAO http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy ---------------------------------------------------------------------- diff --git a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy index c661350..f0d2481 100644 --- a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy +++ b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy @@ -23,7 +23,7 @@ import mvm.rya.api.utils.IteratorWrapper import junit.framework.TestCase import mvm.rya.blueprints.config.RyaGraphConfiguration import org.openrdf.model.Statement -import static mvm.rya.accumulo.mr.utils.MRUtils.* +import static mvm.rya.accumulo.mr.MRUtils.* import static mvm.rya.api.RdfCloudTripleStoreConstants.VALUE_FACTORY /** http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/mapreduce/pom.xml ---------------------------------------------------------------------- diff --git a/mapreduce/pom.xml b/mapreduce/pom.xml new file mode 100644 index 0000000..40dd1df --- /dev/null +++ b/mapreduce/pom.xml @@ -0,0 +1,125 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.rya</groupId> + <artifactId>rya-project</artifactId> + <version>3.2.10-SNAPSHOT</version> + </parent> + + <artifactId>rya.mapreduce</artifactId> + <name>Apache Rya MapReduce Tools</name> + + <dependencies> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>accumulo.rya</artifactId> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.indexing</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.accumulo</groupId> + <artifactId>accumulo-core</artifactId> + </dependency> + + <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + </dependency> + + <dependency> + <groupId>org.openrdf.sesame</groupId> + <artifactId>sesame-rio-ntriples</artifactId> + </dependency> + <dependency> + <groupId>org.openrdf.sesame</groupId> + <artifactId>sesame-rio-nquads</artifactId> + </dependency> + <dependency> + <groupId>org.openrdf.sesame</groupId> + <artifactId>sesame-rio-trig</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.mrunit</groupId> + <artifactId>mrunit</artifactId> + <classifier>hadoop2</classifier> + <version>1.1.0</version> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <pluginManagement> + <plugins> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <configuration> + <excludes> + <!-- RDF data Files --> + <exclude>**/*.ntriples</exclude> + <exclude>**/*.trig</exclude> + </excludes> + </configuration> + </plugin> + </plugins> + </pluginManagement> + </build> + + <profiles> + <profile> + <id>mr</id> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <executions> + <execution> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + </transformers> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> +</project>
