Repository: incubator-rya Updated Branches: refs/heads/master 7949baa67 -> 33ef52cbb
RYA-255 Provide an example class that demonstrates the Prospector. Closes #144. Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/33ef52cb Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/33ef52cb Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/33ef52cb Branch: refs/heads/master Commit: 33ef52cbbb585b78c33878c41421ef07761f9032 Parents: 7949baa Author: Kevin Chilton <[email protected]> Authored: Tue Mar 7 16:02:16 2017 -0500 Committer: Caleb Meier <[email protected]> Committed: Fri Sep 29 11:50:46 2017 -0700 ---------------------------------------------------------------------- .../rya/accumulo/AccumuloRdfConfiguration.java | 40 ++++ .../src/main/java/ProspectorExample.java | 193 +++++++++++++++++++ .../src/main/resources/stats_cluster_config.xml | 93 +++++++++ 3 files changed, 326 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java ---------------------------------------------------------------------- diff --git a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java index 0200cf7..ed76b4a 100644 --- a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java +++ b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java @@ -205,6 +205,13 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration { /** + * @param enabled - {@code true} if the Rya instance is backed by a mock Accumulo; otherwise {@code false}. + */ + public void useMockInstance(boolean enabled) { + super.setBooleanIfUnset(USE_MOCK_INSTANCE, enabled); + } + + /** * Indicates that a Mock instance of Accumulo is being used to back the Rya instance. * * @return {@code true} if the Rya instance is backed by a mock Accumulo; otherwise {@code false}. @@ -214,6 +221,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration { } /** + * @param username - The Accumulo username from the configuration object that is meant to + * be used when connecting a {@link Connector} to Accumulo. + */ + public void setUsername(String username) { + super.set(CLOUDBASE_USER, username); + } + + /** * Get the Accumulo username from the configuration object that is meant to * be used when connecting a {@link Connector} to Accumulo. * @@ -224,6 +239,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration { } /** + * @param password - The Accumulo password from the configuration object that is meant to + * be used when connecting a {@link Connector} to Accumulo. + */ + public void setPassword(String password) { + super.set(CLOUDBASE_PASSWORD, password); + } + + /** * Get the Accumulo password from the configuration object that is meant to * be used when connecting a {@link Connector} to Accumulo. * @@ -234,6 +257,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration { } /** + * @param instanceName - The Accumulo instance name from the configuration object that is + * meant to be used when connecting a {@link Connector} to Accumulo. + */ + public void setInstanceName(String instanceName) { + super.set(CLOUDBASE_INSTANCE, instanceName); + } + + /** * Get the Accumulo instance name from the configuration object that is * meant to be used when connecting a {@link Connector} to Accumulo. * @@ -244,6 +275,15 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration { } /** + * @param zookeepers - A comma delimited list of the names of the Zookeeper servers from + * the configuration object that is meant to be used when connecting a + * {@link Connector} to Accumulo. + */ + public void setZookeepers(String zookeepers) { + super.set(CLOUDBASE_ZOOKEEPERS, zookeepers); + } + + /** * Get a comma delimited list of the names of the Zookeeper servers from * the configuration object that is meant to be used when connecting a * {@link Connector} to Accumulo. http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/extras/indexingExample/src/main/java/ProspectorExample.java ---------------------------------------------------------------------- diff --git a/extras/indexingExample/src/main/java/ProspectorExample.java b/extras/indexingExample/src/main/java/ProspectorExample.java new file mode 100644 index 0000000..fd0e4f8 --- /dev/null +++ b/extras/indexingExample/src/main/java/ProspectorExample.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.List; + +import org.apache.hadoop.util.ToolRunner; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import org.apache.rya.accumulo.AccumuloRdfConfiguration; +import org.apache.rya.accumulo.utils.ConnectorFactory; +import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF; +import org.apache.rya.prospector.mr.Prospector; +import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO; +import org.apache.rya.sail.config.RyaSailFactory; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.sail.Sail; +import org.openrdf.sail.SailConnection; + +import com.beust.jcommander.internal.Lists; + +/** + * Demonstrates how you can use the {@link Prospector} to count values that appear within an instance of Rya and + * then use the {@link ProspectorServiceEvalStatsDAO} to fetch those counts. + */ +public class ProspectorExample { + private static final Logger log = Logger.getLogger(RyaClientExample.class); + + private static final ValueFactory VALUE_FACTORY = new ValueFactoryImpl(); + + private static final URI ALICE = VALUE_FACTORY.createURI("urn:alice"); + private static final URI BOB = VALUE_FACTORY.createURI("urn:bob"); + private static final URI CHARLIE = VALUE_FACTORY.createURI("urn:charlie"); + + private static final URI WORKS_AT = VALUE_FACTORY.createURI("urn:worksAt"); + private static final URI ADMIRES = VALUE_FACTORY.createURI("urn:admires"); + private static final URI LIVES_WITH = VALUE_FACTORY.createURI("urn:livesWith"); + + private static final URI BURGER_JOINT = VALUE_FACTORY.createURI("urn:burgerJoint"); + private static final URI DONUT_SHOP= VALUE_FACTORY.createURI("urn:donutShop"); + + public static void main(final String[] args) throws Exception { + setupLogging(); + + // Configure Rya to use a mock instance. + final AccumuloRdfConfiguration config = new AccumuloRdfConfiguration(); + config.useMockInstance(true); + config.setTablePrefix("rya_"); + config.setUsername("user"); + config.setPassword("pass"); + config.setInstanceName("accumulo"); + + // Load some data into Rya. + final List<Statement> statements = Lists.newArrayList( + VALUE_FACTORY.createStatement(ALICE, WORKS_AT, BURGER_JOINT), + VALUE_FACTORY.createStatement(ALICE, ADMIRES, BOB), + VALUE_FACTORY.createStatement(BOB, WORKS_AT, DONUT_SHOP), + VALUE_FACTORY.createStatement(CHARLIE, WORKS_AT, DONUT_SHOP), + VALUE_FACTORY.createStatement(CHARLIE, LIVES_WITH, BOB), + VALUE_FACTORY.createStatement(BOB, LIVES_WITH, CHARLIE), + VALUE_FACTORY.createStatement(BOB, LIVES_WITH, ALICE)); + + final Sail sail = RyaSailFactory.getInstance(config); + final SailConnection conn = sail.getConnection(); + log.info("Loading the following statements into a Mock instance of Accumulo Rya:"); + conn.begin(); + for(final Statement statement : statements) { + log.info(" " + statement.toString()); + conn.addStatement(statement.getSubject(), statement.getPredicate(), statement.getObject()); + } + conn.commit(); + conn.close(); + + // Create the table that the Prospector's results will be written to. + ConnectorFactory.connect(config) + .tableOperations() + .create("rya_prospects"); + + // Run the Prospector using the configuration file that is in the resources directory. + log.info(""); + log.info("Running the Map Reduce job that computes the Prospector results."); + ToolRunner.run(new Prospector(), new String[]{ "src/main/resources/stats_cluster_config.xml" }); + + // Print the table that was created by the Prospector. + log.info(""); + log.info("The following cardinalities were written to the Prospector table:"); + final ProspectorServiceEvalStatsDAO dao = ProspectorServiceEvalStatsDAO.make(config); + + // Do each of the Subjects. + double cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(ALICE)); + log.info(" subject: " + ALICE + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(BOB)); + log.info(" subject: " + BOB + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(CHARLIE)); + log.info(" subject: " + CHARLIE + ", cardinality: " + cardinality); + + // Do each of the Predicates. + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(WORKS_AT)); + log.info(" predicate: " + WORKS_AT + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(ADMIRES)); + log.info(" predicate: " + ADMIRES + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(LIVES_WITH)); + log.info(" predicate: " + LIVES_WITH + ", cardinality: " + cardinality); + + // Do each of the Objects. + cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BURGER_JOINT)); + log.info(" object: " + BURGER_JOINT + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(DONUT_SHOP)); + log.info(" object: " + DONUT_SHOP + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(ALICE)); + log.info(" object: " + ALICE + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BOB)); + log.info(" object: " + BOB + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(CHARLIE)); + log.info(" object: " + CHARLIE + ", cardinality: " + cardinality); + + // Do each of the Subject/Predicate pairs. + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, WORKS_AT)); + log.info(" subject/predicate: " + ALICE + "/" + WORKS_AT + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, ADMIRES)); + log.info(" subject/predicate: " + ALICE + "/" + ADMIRES + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, WORKS_AT)); + log.info(" subject/predicate: " + BOB + "/" + WORKS_AT + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, WORKS_AT)); + log.info(" subject/predicate: " + CHARLIE + "/" + WORKS_AT + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, LIVES_WITH)); + log.info(" subject/predicate: " + CHARLIE + "/" + LIVES_WITH + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, LIVES_WITH)); + log.info(" subject/predicate: " + BOB + "/" + LIVES_WITH + ", cardinality: " + cardinality); + + // Do each of the Subject/Object pairs. + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BURGER_JOINT)); + log.info(" subject/object: " + ALICE + "/" + BURGER_JOINT + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BOB)); + log.info(" subject/object: " + ALICE + "/" + BOB + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, DONUT_SHOP)); + log.info(" subject/object: " + ALICE + "/" + DONUT_SHOP + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, DONUT_SHOP)); + log.info(" subject/object: " + CHARLIE + "/" + DONUT_SHOP + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, BOB)); + log.info(" subject/object: " + CHARLIE + "/" + BOB + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, CHARLIE)); + log.info(" subject/object: " + BOB + "/" + CHARLIE + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, ALICE)); + log.info(" subject/object: " + BOB + "/" + ALICE + ", cardinality: " + cardinality); + + // Do each of the Predicate/Object pairs. + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, BURGER_JOINT)); + log.info(" predicate/object: " + WORKS_AT + "/" + BURGER_JOINT + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(ADMIRES, BOB)); + log.info(" predicate/object: " + ADMIRES + "/" + BOB + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, DONUT_SHOP)); + log.info(" predicate/object: " + WORKS_AT + "/" + DONUT_SHOP + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, BOB)); + log.info(" predicate/object: " + LIVES_WITH + "/" + BOB + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, CHARLIE)); + log.info(" predicate/object: " + LIVES_WITH + "/" + CHARLIE + ", cardinality: " + cardinality); + cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, ALICE)); + log.info(" predicate/object: " + LIVES_WITH + "/" + ALICE + ", cardinality: " + cardinality); + } + + private static void setupLogging() { + // Turn off all the loggers and customize how they write to the console. + final Logger rootLogger = LogManager.getRootLogger(); + rootLogger.setLevel(Level.OFF); + final ConsoleAppender ca = (ConsoleAppender) rootLogger.getAppender("stdout"); + ca.setLayout(new PatternLayout("%-5p - %m%n")); + + // Turn the logger used by the demo back on. + log.setLevel(Level.INFO); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/extras/indexingExample/src/main/resources/stats_cluster_config.xml ---------------------------------------------------------------------- diff --git a/extras/indexingExample/src/main/resources/stats_cluster_config.xml b/extras/indexingExample/src/main/resources/stats_cluster_config.xml new file mode 100644 index 0000000..170a0c2 --- /dev/null +++ b/extras/indexingExample/src/main/resources/stats_cluster_config.xml @@ -0,0 +1,93 @@ +<?xml version="1.0"?> + + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<configuration> + <!-- Accumulo info --> + + <property> + <name>mock</name> + <value>true</value> + </property> + <property> + <name>instance</name> + <value>accumulo</value> + </property> + + <!-- User info --> + <property> + <name>username</name> + <value>user</value> + </property> + <property> + <name>password</name> + <value>pass</value> + </property> + + <!-- Rya info --> + <property> + <name>spo.table</name> + <value>rya_spo</value> + </property> + <property> + <name>prospects.table</name> + <value>rya_prospects</value> + </property> + <property> + <name>selectivity.table</name> + <value>rya_selectivity</value> + </property> + <property> + <name>auths</name> + <value>U</value> + </property> + <property> + <name>prospector.auths</name> + <value>U</value> + </property> + + <property> + <name>prospector.intable</name> + <value>rya_spo</value> + </property> + <property> + <name>prospector.outtable</name> + <value>rya_prospects</value> + </property> + + <property> + <name>inputpath</name> + <value>/tmp/RyaStats/JoinSelectStatisticsSumInput</value> + </property> + <property> + <name>outputpath</name> + <value>/tmp/RyaStats/JoinSelectStatisticsSumInput</value> + </property> + <property> + <name>prospects.outputpath</name> + <value>/tmp/RyaStats/ProspectsOutput</value> + </property> + <property> + <name>spo.outputpath</name> + <value>/tmp/RyaStats/SpoOutput</value> + </property> +</configuration>
