[15/51] [partial] incubator-rya git commit: now including the actual project

pujav65 Wed, 28 Oct 2015 12:26:37 -0700

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/querydata.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/querydata.conf 
b/extras/rya.manual/src/main/webapp/querydata.conf
new file mode 100644
index 0000000..1f93944
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/querydata.conf
@@ -0,0 +1,115 @@
+h1. Query Data
+
+There are a few mechanisms to query data
+
+h2. Web JSP endpoint
+
+Open a url to {{http://server/web.rya/sparqlQuery.jsp}}. This simple form can 
run Sparql.
+
+h2. Web REST endpoint
+
+The War sets up a Web REST endpoint at {{http://server/web.rya/queryrdf}} that 
allows GET requests with queries.
+
+For this sample, we will assume you already loaded data from the 
[loaddata.html] tutorial
+
+Save this file somewhere $RDF_DATA
+
+Second, use the following Java code to load data to the REST endpoint:
+{code}
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLEncoder;
+
+public class QueryDataServletRun {
+
+    public static void main(String[] args) {
+        try {
+            String query = "select * where {\n" +
+                                "<http://mynamespace/ProductType1> ?p ?o.\n" +
+                                "}";
+
+            String queryenc = URLEncoder.encode(query, "UTF-8");
+
+            URL url = new URL("http://server/rdfTripleStore/queryrdf?query="; + 
queryenc);
+            URLConnection urlConnection = url.openConnection();
+            urlConnection.setDoOutput(true);
+
+            BufferedReader rd = new BufferedReader(new InputStreamReader(
+                    urlConnection.getInputStream()));
+            String line;
+            while ((line = rd.readLine()) != null) {
+                System.out.println(line);
+            }
+            rd.close();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+}
+{code}
+
+Compile and run this code above, changing the url that your Rdf War is running 
at.
+
+h2. Direct Code
+
+Here is a code snippet for directly running against Accumulo with the code. 
You will need at least accumulo.rya.jar, rya.api, rya.sail.impl on the 
classpath and transitive dependencies. I find that Maven is the easiest way to 
get a project dependency tree set up.
+
+{code}
+            Connector connector = new ZooKeeperInstance("cbinstance", 
"zkserver:port").getConnector("cbuser", "cbpassword");
+
+            final RdfCloudTripleStore store = new RdfCloudTripleStore();
+            AccumuloRyaDAO crdfdao = new AccumuloRyaDAO();
+            crdfdao.setConnector(connector);
+
+            AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration();
+            conf.setTablePrefix("rts_");
+            conf.setDisplayQueryPlan(true);
+            crdfdao.setConf(conf);
+            store.setRdfDao(crdfdao);
+
+            ProspectorServiceEvalStatsDAO evalDao = new 
ProspectorServiceEvalStatsDAO(connector, conf);
+            evalDao.init();
+            store.setRdfEvalStatsDAO(evalDao);
+
+            InferenceEngine inferenceEngine = new InferenceEngine();
+            inferenceEngine.setRdfDao(crdfdao);
+            inferenceEngine.setConf(conf);
+            store.setInferenceEngine(inferenceEngine);
+
+            Repository myRepository = new RyaSailRepository(store);
+            myRepository.initialize();
+
+            String query = "select * where {\n" +
+                                "<http://mynamespace/ProductType1> ?p ?o.\n" +
+                                "}";
+            RepositoryConnection conn = myRepository.getConnection();
+            System.out.println(query);
+            TupleQuery tupleQuery = conn.prepareTupleQuery(
+                    QueryLanguage.SPARQL, query);
+            ValueFactory vf = ValueFactoryImpl.getInstance();
+
+            TupleQueryResultHandler writer = new 
SPARQLResultsXMLWriter(System.out);
+            tupleQuery.evaluate(new TupleQueryResultHandler() {
+
+                int count = 0;
+
+                @Override
+                public void startQueryResult(List<String> strings) throws 
TupleQueryResultHandlerException {
+                }
+
+                @Override
+                public void endQueryResult() throws 
TupleQueryResultHandlerException {
+                }
+
+                @Override
+                public void handleSolution(BindingSet bindingSet) throws 
TupleQueryResultHandlerException {
+                    System.out.println(bindingSet);
+                }
+            });
+
+            conn.close();
+            myRepository.shutDown();
+{code}
+


http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/quickstart.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/quickstart.conf 
b/extras/rya.manual/src/main/webapp/quickstart.conf
new file mode 100644
index 0000000..ce0d20b
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/quickstart.conf
@@ -0,0 +1,42 @@
+h1. Quick Start
+
+This tutorial will outline the steps needed to get quickly started with the 
Rya store using the web based endpoint.
+
+h2. Prerequisites
+
+* Columnar Store (either Accumulo or Cloudbase) The tutorial will go forward 
using Accumulo
+* Rya code (Git: [email protected]:texeltek/rya.git)
+* Maven 2.2 +
+
+h2. Building from Source
+
+Using Git, pull down the latest code from the url above.
+
+Run the command to build the code {{mvn clean install}}
+
+If all goes well, the build should be successful and a war should be produced 
in {{web/web.rya/target/web.rya.war}}
+
+h2. Deployment
+
+(Using tomcat)
+
+Unwar the above war into the webapps directory.
+
+To point the web.rya war to the appropriate Accumulo instance, make a 
properties file {{environment.properties}} and put it in the classpath. Here is 
an example:
+{code}
+instance.name=accumulo  #Accumulo instance name
+instance.zk=localhost:2181  #Accumulo Zookeepers
+instance.username=root  #Accumulo username
+instance.password=secret  #Accumulo pwd
+rya.tableprefix=triplestore_  #Rya Table Prefix
+rya.displayqueryplan=true  #To display the query plan
+{code}
+
+Start the Tomcat server. {{./bin/startup.sh}}
+
+h2. Usage
+
+First, we need to load data. See the [Load Data Section|./loaddata]
+
+Second, we need to query that data. See the [Query Data Section|./querydata]
+

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/sm-addauth.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/sm-addauth.conf 
b/extras/rya.manual/src/main/webapp/sm-addauth.conf
new file mode 100644
index 0000000..371d8da
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/sm-addauth.conf
@@ -0,0 +1,98 @@
+h1. Add Authentication
+
+This tutorial will give a few examples on how to load and query data with 
authentication.
+
+This is only available for Cloudbase and Accumulo because they provide the 
security filters necessary to do row level authentication and visibility.
+
+h2. Load Data with Visibilities
+
+During the Load process, there are a few ways to set the Column Visibility you 
want set on each of the corresponding rdf rows.
+
+h3. Global Visibility
+
+You can set the Column Visibility globally on the RdfCloudTripleStore, and it 
will use that particular value for every row saved.
+
+To do this, once you create and set up the RdfCloudTripleStore, just set the 
property on the store configuration:
+
+{code}
+//setup
+final RdfCloudTripleStore store = new RdfCloudTripleStore();
+AccumuloRyaDAO crdfdao = new AccumuloRyaDAO();
+crdfdao.setConnector(connector);
+
+AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration();
+conf.setTablePrefix("rts_");
+conf.setDisplayQueryPlan(true);
+
+//set global column Visibility
+conf.setCv("AUTH1|AUTH2");
+
+crdfdao.setConf(conf);
+store.setRdfDao(crdfdao);
+{code}
+
+The format is simply the same as the Column Visibility format.
+
+h3. Per triple or document based Visibility
+
+TODO: Not available as of yet
+
+h2. Query Data with Authentication
+
+Attaching an Authentication to the query process is very simple. It requires 
just adding the property {{RdfCloudTripleStoreConfiguration.CONF_QUERY_AUTH}} 
to the query {{BindingSet}}
+Example:
+
+{code}
+//setup
+Connector connector = new ZooKeeperInstance("cbinstance", 
"zkserver:port").getConnector("cbuser", "cbpassword");
+final RdfCloudTripleStore store = new RdfCloudTripleStore();
+AccumuloRyaDAO crdfdao = new AccumuloRyaDAO();
+crdfdao.setConnector(connector);
+
+AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration();
+conf.setTablePrefix("rts_");
+conf.setDisplayQueryPlan(true);
+crdfdao.setConf(conf);
+//set global column Visibility
+conf.setCv("1|2");
+store.setRdfDao(crdfdao);
+
+InferenceEngine inferenceEngine = new InferenceEngine();
+inferenceEngine.setRdfDao(crdfdao);
+inferenceEngine.setConf(conf);
+store.setInferenceEngine(inferenceEngine);
+
+Repository myRepository = new RyaSailRepository(store);
+myRepository.initialize();
+RepositoryConnection conn = myRepository.getConnection();
+
+//define and add statement
+String litdupsNS = "urn:test:litdups#";
+URI cpu = vf.createURI(litdupsNS, "cpu");
+URI loadPerc = vf.createURI(litdupsNS, "loadPerc");
+URI uri1 = vf.createURI(litdupsNS, "uri1");
+conn.add(cpu, loadPerc, uri1);
+conn.commit();
+
+//query with auth
+String query = "select * where {" +
+                "<" + cpu.toString() + "> ?p ?o1." +
+                "}";
+TupleQuery tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, query);
+tupleQuery.setBinding(RdfCloudTripleStoreConfiguration.CONF_QUERY_AUTH, 
vf.createLiteral("2"));
+TupleQueryResult result = tupleQuery.evaluate();
+while(result.hasNext()) {
+    System.out.println(result.next());
+}
+result.close();
+
+//close
+conn.close();
+myRepository.shutDown();
+{code}
+
+Or you can set a global auth using the configuration:
+
+{code}
+conf.setAuth("2")
+{code}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/sm-firststeps.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/sm-firststeps.conf 
b/extras/rya.manual/src/main/webapp/sm-firststeps.conf
new file mode 100644
index 0000000..04a56b9
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/sm-firststeps.conf
@@ -0,0 +1,55 @@
+h1. Typical First Steps
+
+In this tutorial, I will give you a quick overview of some of the first steps 
I perform to get data loaded and read for query.
+
+h2. Prerequisites
+
+ We are assuming Accumulo 1.4+ usage here.
+
+ * Rya Source Code {{web.rya.war}})
+ * Accumulo on top of Hadoop 0.20+
+ * RDF Data (in N-Triples format, this format is the easiest to bulk load)
+
+h2. Building Source
+
+Skip this section if you already have the Map Reduce artifact and the WAR
+
+See the [Build From Source Section|./build-source.html] to get the appropriate 
artifacts built
+
+h2. Load Data
+
+I find that the best way to load the data is through the Bulk Load Map Reduce 
job.
+
+# Save the RDF Data above onto HDFS. From now on we will refer to this 
location as <RDF_HDFS_LOCATION>
+# Move the cloudbase.rya-<version>-job.jar onto the hadoop cluster
+# Bulk load the data. Here is a sample command line:
+{code}
+hadoop jar ../cloudbase.rya-2.0.0-SNAPSHOT-job.jar BulkNtripsInputTool 
-Drdf.tablePrefix=lubm_ -Dcb.username=cbuser -Dcb.pwd=cbpwd 
-Dcb.instance=cbinstance -Dcb.zk=zookeeperLocation -Drdf.format=N-Triples 
<RDF_HDFS_LOCATION>
+{code}
+
+Once the data is loaded, it is actually a good practice to compact your 
tables. You can do this by opening the cloudbase shell {{cbshell}} and running 
the {{compact}} command on the generated tables. Remember the generated tables 
will be prefixed by the {{rdf.tablePrefix}} property you assigned above. The 
default tablePrefix is {{rts}}.
+Here is a sample cloudbase shell command:
+{code}
+compact -p lubm_(.*)
+{code}
+
+See the [Load Data Section|./loaddata.html] for more options on loading rdf 
data
+
+h2. Run the Statistics Optimizer
+
+For the best query performance, it is recommended to run the Statistics 
Optimizer to create the Evaluation Statistics table. This job will read through 
your data and gather statistics on the distribution of the dataset. This table 
is then queried before query execution to reorder queries based on the data 
distribution.
+
+See the [Evaluation Statistics Table Section|eval.html] on how to do this.
+
+h2. Query data
+
+I find the easiest way to query is just to use the WAR. Load the WAR into your 
favorite web application container and go to the sparqlQuery.jsp page. Example:
+{code}
+http://localhost:8080/web.rya/sparqlQuery.jsp
+{code}
+
+This page provides a very simple text box for running queries against the 
store and getting data back. (SPARQL queries)
+
+Remember to update the connection information in the WAR: 
{{WEB-INF/spring/spring-cloudbase.xml}}
+
+See the [Query data section|./querydata.html] for more information.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/sm-infer.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/sm-infer.conf 
b/extras/rya.manual/src/main/webapp/sm-infer.conf
new file mode 100644
index 0000000..ab25281
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/sm-infer.conf
@@ -0,0 +1,313 @@
+h1. Inferencing
+
+The Rdf store provides simple inferencing currently. The supported list of 
inferred relationships include:
+
+* rdfs:subClassOf
+* rdfs:subPropertyOf
+* owl:EquivalentProperty
+* owl:inverseOf
+* owl:SymmetricProperty
+* owl:TransitiveProperty (This is currently in beta and will not work for 
every case)
+
+h2. Setup
+
+The Inferencing Engine is a scheduled job that runs by default every 5 
minutes, this is configurable, to query the relationships in the store and 
develop the inferred graphs necessary to answer inferencing questions.
+
+This also means that if you load a model into the store, it could take up to 5 
minutes for the inferred relationships to be available.
+
+As usual you will need to set up your {{RdfCloudTripleStore}} with the correct 
DAO, notice we add an {{InferencingEngine}} as well to the store. If this is 
not added, then no inferencing will be done on the queries:
+
+{code}
+//setup
+Connector connector = new ZooKeeperInstance("cbinstance", 
"zkserver:port").getConnector("cbuser", "cbpassword");
+final RdfCloudTripleStore store = new RdfCloudTripleStore();
+AccumuloRyaDAO crdfdao = new AccumuloRyaDAO();
+crdfdao.setConnector(connector);
+
+AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration();
+conf.setTablePrefix("rts_");
+conf.setDisplayQueryPlan(true);
+crdfdao.setConf(conf);
+store.setRdfDao(crdfdao);
+
+ProspectorServiceEvalStatsDAO evalDao = new 
ProspectorServiceEvalStatsDAO(connector, conf);
+evalDao.init();
+store.setRdfEvalStatsDAO(evalDao);
+
+InferenceEngine inferenceEngine = new InferenceEngine();
+inferenceEngine.setRdfDao(crdfdao);
+inferenceEngine.setConf(conf);
+store.setInferenceEngine(inferenceEngine);
+
+Repository myRepository = new RyaSailRepository(store);
+myRepository.initialize();
+RepositoryConnection conn = myRepository.getConnection();
+
+//query code goes here
+
+//close
+conn.close();
+myRepository.shutDown();
+{code}
+
+h2. Samples
+
+We will go through some quick samples on loading inferred relationships, 
seeing and diagnosing the query plan, and checking the data
+
+h3. Rdfs:SubClassOf
+
+First the code, which will load the following subclassof relationship: 
{{UndergraduateStudent subclassof Student subclassof Person}}. Then we will 
load into the tables three triples defining {{UgradA rdf:type 
UndergraduateStudent, StudentB rdf:type Student, PersonC rdf:type Person}}
+
+{code}
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "UndergraduateStudent"), 
RDFS.SUBCLASSOF, vf.createURI(litdupsNS, "Student")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "Student"), 
RDFS.SUBCLASSOF, vf.createURI(litdupsNS, "Person")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "UgradA"), RDF.TYPE, 
vf.createURI(litdupsNS, "UndergraduateStudent")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "StudentB"), RDF.TYPE, 
vf.createURI(litdupsNS, "Student")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "PersonC"), RDF.TYPE, 
vf.createURI(litdupsNS, "Person")));
+conn.commit();
+{code}
+
+Remember that once the model is committed, it may take up to 5 minutes for the 
inferred relationships to be ready. Though you can override this property in 
the {{InferencingEngine}}.
+
+We shall run the following query:
+
+{code}
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX lit: <urn:test:litdups#>
+select * where {?s rdf:type lit:Person.}
+{code}
+
+And should get back the following results:
+{code}
+[s=urn:test:litdups#StudentB]
+[s=urn:test:litdups#PersonC]
+[s=urn:test:litdups#UgradA]
+{code}
+
+h4. How it works
+
+Let us look at the query plan:
+{code}
+QueryRoot
+   Projection
+      ProjectionElemList
+         ProjectionElem "s"
+      Join
+         FixedStatementPattern
+            Var (name=79f261ee-e930-4af1-bc09-e637cc0affef)
+            Var (name=c-79f261ee-e930-4af1-bc09-e637cc0affef, 
value=http://www.w3.org/2000/01/rdf-schema#subClassOf)
+            Var (name=-const-2, value=urn:test:litdups#Person, anonymous)
+         DoNotExpandSP
+            Var (name=s)
+            Var (name=-const-1, 
value=http://www.w3.org/1999/02/22-rdf-syntax-ns#type, anonymous)
+            Var (name=79f261ee-e930-4af1-bc09-e637cc0affef)
+{code}
+
+Basically, we first find out (through the InferencingEngine) what triples have 
subclassof with Person. The InferencingEngine will do the graph analysis to 
find the both Student and UndergraduateStudent are Person classes.
+Then this information is joined with the statement pattern {{(?s rdf:type 
?inf)}} where {{?inf}} is the results from the InferencingEngine.
+
+h3. Rdfs:SubPropertyOf
+
+SubPropertyOf defines that a property can be an instance of another property. 
For example, a {{gradDegreeFrom subPropertyOf degreeFrom}}.
+
+Also, EquivalentProperty can be thought of as specialized SubPropertyOf 
relationship where if {{propA equivalentProperty propB}} then that means that 
{{propA subPropertyOf propB AND propB subPropertyOf propA}}
+
+Sample Code:
+{code}
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "undergradDegreeFrom"), 
RDFS.SUBPROPERTYOF, vf.createURI(litdupsNS, "degreeFrom")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "gradDegreeFrom"), 
RDFS.SUBPROPERTYOF, vf.createURI(litdupsNS, "degreeFrom")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "degreeFrom"), 
RDFS.SUBPROPERTYOF, vf.createURI(litdupsNS, "memberOf")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "memberOf"), 
RDFS.SUBPROPERTYOF, vf.createURI(litdupsNS, "associatedWith")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "UgradA"), 
vf.createURI(litdupsNS, "undergradDegreeFrom"), vf.createURI(litdupsNS, 
"Harvard")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "GradB"), 
vf.createURI(litdupsNS, "gradDegreeFrom"), vf.createURI(litdupsNS, "Yale")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "ProfessorC"), 
vf.createURI(litdupsNS, "memberOf"), vf.createURI(litdupsNS, "Harvard")));
+conn.commit();
+{code}
+
+With query:
+{code}
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX lit: <urn:test:litdups#>
+select * where {?s lit:memberOf lit:Harvard.}
+{code}
+
+Will return results:
+{code}
+[s=urn:test:litdups#UgradA]
+[s=urn:test:litdups#ProfessorC]
+{code}
+
+Since UgradA has undergraduateDegreeFrom Harvard and ProfessorC is memberOf 
Harvard.
+
+h4. How it works
+
+This is very similar to the subClassOf relationship above. Basically the 
InferencingEngine provides what properties are subPropertyOf relationships with 
memberOf, and the second part of the Join checks to see if those properties are 
predicates with object "Harvard".
+
+Query Plan:
+{code}
+QueryRoot
+   Projection
+      ProjectionElemList
+         ProjectionElem "s"
+      Join
+         FixedStatementPattern
+            Var (name=0bad69f3-4769-4293-8318-e828b23dc52a)
+            Var (name=c-0bad69f3-4769-4293-8318-e828b23dc52a, 
value=http://www.w3.org/2000/01/rdf-schema#subPropertyOf)
+            Var (name=-const-1, value=urn:test:litdups#memberOf, anonymous)
+         DoNotExpandSP
+            Var (name=s)
+            Var (name=0bad69f3-4769-4293-8318-e828b23dc52a)
+            Var (name=-const-2, value=urn:test:litdups#Harvard, anonymous)
+{code}
+
+h3. InverseOf
+
+InverseOf defines a property that is an inverse relation of another property. 
For example, a student who has a {{degreeFrom}} a University also means that 
the University {{hasAlumnus}} student.
+
+Code:
+{code}
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "degreeFrom"), 
OWL.INVERSEOF, vf.createURI(litdupsNS, "hasAlumnus")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "UgradA"), 
vf.createURI(litdupsNS, "degreeFrom"), vf.createURI(litdupsNS, "Harvard")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "GradB"), 
vf.createURI(litdupsNS, "degreeFrom"), vf.createURI(litdupsNS, "Harvard")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "Harvard"), 
vf.createURI(litdupsNS, "hasAlumnus"), vf.createURI(litdupsNS, "AlumC")));
+conn.commit();
+{code}
+
+Query:
+{code}
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX lit: <urn:test:litdups#>
+select * where {lit:Harvard lit:hasAlumnus ?s.}
+{code}
+
+Result:
+{code}
+[s=urn:test:litdups#AlumC]
+[s=urn:test:litdups#GradB]
+[s=urn:test:litdups#UgradA]
+{code}
+
+h4. How it works
+
+The query planner will expand the statement pattern {{Harvard hasAlumnus ?s}} 
to a Union between {{Harvard hasAlumnus ?s. and ?s degreeFrom Harvard}}
+
+As a caveat, it is important to note that in general Union queries do not have 
the best performance, so having a property that has an inverseOf and 
subPropertyOf, could cause a query plan that might take long depending on how 
the query planner orders the joins.
+
+Query Plan
+{code}
+QueryRoot
+   Projection
+      ProjectionElemList
+         ProjectionElem "s"
+      InferUnion
+         StatementPattern
+            Var (name=-const-1, value=urn:test:litdups#Harvard, anonymous)
+            Var (name=-const-2, value=urn:test:litdups#hasAlumnus, anonymous)
+            Var (name=s)
+         StatementPattern
+            Var (name=s)
+            Var (name=-const-2, value=urn:test:litdups#degreeFrom)
+            Var (name=-const-1, value=urn:test:litdups#Harvard, anonymous)
+{code}
+
+h3. SymmetricProperty
+
+SymmetricProperty defines a relationship where, for example, if Bob is a 
friendOf Jeff, then Jeff is a friendOf Bob. (Hopefully)
+
+Code:
+{code}
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "friendOf"), RDF.TYPE, 
OWL.SYMMETRICPROPERTY));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "Bob"), 
vf.createURI(litdupsNS, "friendOf"), vf.createURI(litdupsNS, "Jeff")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "James"), 
vf.createURI(litdupsNS, "friendOf"), vf.createURI(litdupsNS, "Jeff")));
+conn.commit();
+{code}
+
+Query:
+{code}
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX lit: <urn:test:litdups#>
+select * where {?s lit:friendOf lit:Bob.}
+{code}
+
+Results:
+{code}
+[s=urn:test:litdups#Jeff]
+{code}
+
+h4. How it works
+
+The query planner will recognize that {{friendOf}} is a SymmetricProperty and 
devise a Union to find the specified relationship and inverse.
+
+Query Plan:
+{code}
+QueryRoot
+   Projection
+      ProjectionElemList
+         ProjectionElem "s"
+      InferUnion
+         StatementPattern
+            Var (name=s)
+            Var (name=-const-1, value=urn:test:litdups#friendOf, anonymous)
+            Var (name=-const-2, value=urn:test:litdups#Bob, anonymous)
+         StatementPattern
+            Var (name=-const-2, value=urn:test:litdups#Bob, anonymous)
+            Var (name=-const-1, value=urn:test:litdups#friendOf, anonymous)
+            Var (name=s)
+{code}
+
+h3. TransitiveProperty
+
+TransitiveProperty provides a transitive relationship between resources. For 
example, if Queens is subRegionOf NYC and NYC is subRegionOf NY, then Queens is 
transitively a subRegionOf NY.
+
+Code:
+{code}
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "subRegionOf"), RDF.TYPE, 
OWL.TRANSITIVEPROPERTY));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "Queens"), 
vf.createURI(litdupsNS, "subRegionOf"), vf.createURI(litdupsNS, "NYC")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "NYC"), 
vf.createURI(litdupsNS, "subRegionOf"), vf.createURI(litdupsNS, "NY")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "NY"), 
vf.createURI(litdupsNS, "subRegionOf"), vf.createURI(litdupsNS, "US")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "US"), 
vf.createURI(litdupsNS, "subRegionOf"), vf.createURI(litdupsNS, 
"NorthAmerica")));
+conn.add(new StatementImpl(vf.createURI(litdupsNS, "NorthAmerica"), 
vf.createURI(litdupsNS, "subRegionOf"), vf.createURI(litdupsNS, "World")));
+conn.commit();
+{code}
+
+Query:
+{code}
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX lit: <urn:test:litdups#>
+select * where {?s lit:subRegionOf lit:NorthAmerica.}
+{code}
+
+Results:
+{code}
+[s=urn:test:litdups#Queens]
+[s=urn:test:litdups#NYC]
+[s=urn:test:litdups#NY]
+[s=urn:test:litdups#US]
+{code}
+
+h4. How it works
+
+The TransitiveProperty relationship works by running recursive queries till 
all the results are returned.
+
+It is important to note that certain TransitiveProperty relationships will not 
work:
+* Open ended property: ?s subRegionOf ?o (At least one of the properties must 
be filled or will be filled as the query gets answered)
+* Closed property: Queens subRegionOf NY (At least one of the properties must 
be empty)
+
+We are working on fixing these issues.
+
+Query Plan:
+{code}
+QueryRoot
+   Projection
+      ProjectionElemList
+         ProjectionElem "s"
+      TransitivePropertySP
+         Var (name=s)
+         Var (name=-const-1, value=urn:test:litdups#subRegionOf, anonymous)
+         Var (name=-const-2, value=urn:test:litdups#NorthAmerica, anonymous)
+{code}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/sm-namedgraph.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/sm-namedgraph.conf 
b/extras/rya.manual/src/main/webapp/sm-namedgraph.conf
new file mode 100644
index 0000000..ce2a0da
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/sm-namedgraph.conf
@@ -0,0 +1,129 @@
+h1. Named Graphs
+
+Named graphs are supported simply in the Rdf Store in a few ways. OpenRdf 
supports sending {{contexts}} as each triple is saved.
+
+h2. Simple Named Graph Load and Query
+
+Here is a very simple example of using the API to Insert data in named graphs 
and querying with Sparql
+
+First we will define a Trig document to load
+Trig document
+{code}
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix swp: <http://www.w3.org/2004/03/trix/swp-1/> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix ex: <http://www.example.org/vocabulary#> .
+@prefix : <http://www.example.org/exampleDocument#> .
+:G1 { :Monica ex:name "Monica Murphy" .
+      :Monica ex:homepage <http://www.monicamurphy.org> .
+      :Monica ex:email <mailto:[email protected]> .
+      :Monica ex:hasSkill ex:Management }
+
+:G2 { :Monica rdf:type ex:Person .
+      :Monica ex:hasSkill ex:Programming }
+
+:G4 { :Phobe ex:name "Phobe Buffet" }
+
+:G3 { :G1 swp:assertedBy _:w1 .
+      _:w1 swp:authority :Chris .
+      _:w1 dc:date "2003-10-02"^^xsd:date .
+      :G2 swp:quotedBy _:w2 .
+      :G4 swp:assertedBy _:w2 .
+      _:w2 dc:date "2003-09-03"^^xsd:date .
+      _:w2 swp:authority :Tom .
+      :Chris rdf:type ex:Person .
+      :Chris ex:email <mailto:[email protected]>.
+      :Tom rdf:type ex:Person .
+      :Tom ex:email <mailto:[email protected]>}
+{code}
+
+We will assume that this file is saved on your classpath somewhere at 
{{<TRIG_FILE>}}
+
+Load data through API:
+{code}
+InputStream stream = 
Thread.currentThread().getContextClassLoader().getResourceAsStream("namedgraphs.trig");
+RepositoryConnection conn = repository.getConnection();
+conn.add(stream, "", RDFFormat.TRIG);
+conn.commit();
+{code}
+
+Now that the data is loaded we can easily query it. For example, we will query 
to find what {{hasSkill}} is defined in graph G2, and relate that to someone 
defined in G1.
+
+Query:
+{code}
+PREFIX  ex:  <http://www.example.org/exampleDocument#>
+PREFIX  voc:  <http://www.example.org/vocabulary#>
+PREFIX  foaf:  <http://xmlns.com/foaf/0.1/>
+PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
+
+SELECT *
+WHERE
+{
+  GRAPH ex:G1
+  {
+    ?m voc:name ?name ;
+       voc:homepage ?hp .
+  } .
+ GRAPH ex:G2
+  {
+    ?m voc:hasSkill ?skill .
+  } .
+}
+{code}
+
+Results:
+{code}
+[hp=http://www.monicamurphy.org;m=http://www.example.org/exampleDocument#Monica;skill=http://www.example.org/vocabulary#Programming;name="Monica
 Murphy"]
+{code}
+
+Here is the Query Plan as well:
+{code}
+QueryRoot
+   Projection
+      ProjectionElemList
+         ProjectionElem "m"
+         ProjectionElem "name"
+         ProjectionElem "hp"
+         ProjectionElem "skill"
+      Join
+         Join
+            StatementPattern FROM NAMED CONTEXT
+               Var (name=m)
+               Var (name=-const-2, 
value=http://www.example.org/vocabulary#name, anonymous)
+               Var (name=name)
+               Var (name=-const-1, 
value=http://www.example.org/exampleDocument#G1, anonymous)
+            StatementPattern FROM NAMED CONTEXT
+               Var (name=m)
+               Var (name=-const-3, 
value=http://www.example.org/vocabulary#homepage, anonymous)
+               Var (name=hp)
+               Var (name=-const-1, 
value=http://www.example.org/exampleDocument#G1, anonymous)
+         StatementPattern FROM NAMED CONTEXT
+            Var (name=m)
+            Var (name=-const-5, 
value=http://www.example.org/vocabulary#hasSkill, anonymous)
+            Var (name=skill)
+            Var (name=-const-4, 
value=http://www.example.org/exampleDocument#G2, anonymous)
+{code}
+
+h2. Inserting named graph data through Sparql
+
+The new Sparql update standard provides another way to insert data, even into 
named graphs.
+
+First the insert update:
+{code}
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
+PREFIX ex: <http://example/addresses#>
+INSERT DATA
+{
+    GRAPH ex:G1 {
+        <http://example/book3> dc:title    "A new book" ;
+                               dc:creator  "A.N.Other" .
+    }
+}
+{code}
+
+To perform this update, it requires different code than querying the data 
directly:
+{code}
+Update update = conn.prepareUpdate(QueryLanguage.SPARQL, insert);
+update.execute();
+{code}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/sm-simpleaqr.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/sm-simpleaqr.conf 
b/extras/rya.manual/src/main/webapp/sm-simpleaqr.conf
new file mode 100644
index 0000000..7616197
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/sm-simpleaqr.conf
@@ -0,0 +1,54 @@
+h1. Simple Add Query and Remove of Statements
+
+This quick tutorial will give a small example on how to add, query, and remove 
statements from Cloudbase
+
+h2. Code
+
+{code}
+//setup
+Connector connector = new ZooKeeperInstance("cbinstance", 
"zkserver:port").getConnector("cbuser", "cbpassword");
+final RdfCloudTripleStore store = new RdfCloudTripleStore();
+AccumuloRyaDAO crdfdao = new AccumuloRyaDAO();
+crdfdao.setConnector(connector);
+
+AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration();
+conf.setTablePrefix("rts_");
+conf.setDisplayQueryPlan(true);
+crdfdao.setConf(conf);
+store.setRdfDao(crdfdao);
+
+ProspectorServiceEvalStatsDAO evalDao = new 
ProspectorServiceEvalStatsDAO(connector, conf);
+evalDao.init();
+store.setRdfEvalStatsDAO(evalDao);
+
+InferenceEngine inferenceEngine = new InferenceEngine();
+inferenceEngine.setRdfDao(crdfdao);
+inferenceEngine.setConf(conf);
+store.setInferenceEngine(inferenceEngine);
+
+Repository myRepository = new RyaSailRepository(store);
+myRepository.initialize();
+RepositoryConnection conn = myRepository.getConnection();
+
+//define and add statement
+String litdupsNS = "urn:test:litdups#";
+URI cpu = vf.createURI(litdupsNS, "cpu");
+URI loadPerc = vf.createURI(litdupsNS, "loadPerc");
+URI uri1 = vf.createURI(litdupsNS, "uri1");
+conn.add(cpu, loadPerc, uri1);
+conn.commit();
+
+//query for all statements that have subject=cpu and pred=loadPerc (wildcard 
object)
+RepositoryResult<Statement> result = conn.getStatements(cpu, loadPerc, null, 
true)
+while(result.hasNext()) {
+    System.out.println(result.next());
+}
+result.close();
+
+//remove statement
+conn.remove(cpu, loadPerc, uri1);
+
+//close
+conn.close();
+myRepository.shutDown();
+{code}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/sm-sparqlquery.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/sm-sparqlquery.conf 
b/extras/rya.manual/src/main/webapp/sm-sparqlquery.conf
new file mode 100644
index 0000000..985e21a
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/sm-sparqlquery.conf
@@ -0,0 +1,58 @@
+h1. Simple Add Query and Remove of Statements
+
+This quick tutorial will give a small example on how to query data with Sparql
+
+h2. Code
+
+{code}
+//setup
+Connector connector = new ZooKeeperInstance("cbinstance", 
"zkserver:port").getConnector("cbuser", "cbpassword");
+final RdfCloudTripleStore store = new RdfCloudTripleStore();
+AccumuloRyaDAO crdfdao = new AccumuloRyaDAO();
+crdfdao.setConnector(connector);
+
+AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration();
+conf.setTablePrefix("rts_");
+conf.setDisplayQueryPlan(true);
+crdfdao.setConf(conf);
+store.setRdfDao(crdfdao);
+
+ProspectorServiceEvalStatsDAO evalDao = new 
ProspectorServiceEvalStatsDAO(connector, conf);
+evalDao.init();
+store.setRdfEvalStatsDAO(evalDao);
+
+InferenceEngine inferenceEngine = new InferenceEngine();
+inferenceEngine.setRdfDao(crdfdao);
+inferenceEngine.setConf(conf);
+store.setInferenceEngine(inferenceEngine);
+
+Repository myRepository = new RyaSailRepository(store);
+myRepository.initialize();
+RepositoryConnection conn = myRepository.getConnection();
+
+//define and add statements
+String litdupsNS = "urn:test:litdups#";
+URI cpu = vf.createURI(litdupsNS, "cpu");
+URI loadPerc = vf.createURI(litdupsNS, "loadPerc");
+URI uri1 = vf.createURI(litdupsNS, "uri1");
+URI pred2 = vf.createURI(litdupsNS, "pred2");
+URI uri2 = vf.createURI(litdupsNS, "uri2");
+conn.add(cpu, loadPerc, uri1);
+conn.commit();
+
+//query using sparql
+String query = "select * where {" +
+                "?x <" + loadPerc.stringValue() + "> ?o1." +
+                "?x <" + pred2.stringValue() + "> ?o2." +
+                "}";
+TupleQuery tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, query);
+TupleQueryResult result = tupleQuery.evaluate();
+while(result.hasNext()) {
+      System.out.println(result.next());
+}
+result.close();
+
+//close
+conn.close();
+myRepository.shutDown();
+{code}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.manual/src/main/webapp/sm-updatedata.conf
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/main/webapp/sm-updatedata.conf 
b/extras/rya.manual/src/main/webapp/sm-updatedata.conf
new file mode 100644
index 0000000..b3e3a5b
--- /dev/null
+++ b/extras/rya.manual/src/main/webapp/sm-updatedata.conf
@@ -0,0 +1,56 @@
+h1. Sparql Update
+
+OpenRDF supports the Sparql Update functionality. Here are a few samples:
+
+Remember, you have to use {{RepositoryConnection.prepareUpdate(..)}} to 
perform these queries
+
+Insert:
+{code}
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
+INSERT DATA
+{ <http://example/book3> dc:title    "A new book" ;
+                         dc:creator  "A.N.Other" .
+}
+{code}
+
+Delete:
+{code}
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
+DELETE DATA
+{ <http://example/book3> dc:title    "A new book" ;
+                         dc:creator  "A.N.Other" .
+}
+{code}
+
+Update:
+{code}
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
+DELETE { ?book dc:title ?title }
+INSERT { ?book dc:title "A newer book".         ?book dc:add "Additional Info" 
}
+WHERE
+  { ?book dc:creator "A.N.Other" .
+  }
+{code}
+
+Insert Named Graph:
+{code}
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
+PREFIX ex: <http://example/addresses#>
+INSERT DATA
+{ GRAPH ex:G1 {
+<http://example/book3> dc:title    "A new book" ;
+                         dc:creator  "A.N.Other" .
+}
+}
+{code}
+
+Update Named Graph:
+{code}
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
+WITH <http://example/addresses#G1>
+DELETE { ?book dc:title ?title }
+INSERT { ?book dc:title "A newer book".         ?book dc:add "Additional Info" 
}
+WHERE
+  { ?book dc:creator "A.N.Other" .
+  }
+{code}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/pom.xml
----------------------------------------------------------------------
diff --git a/extras/rya.prospector/pom.xml b/extras/rya.prospector/pom.xml
new file mode 100644
index 0000000..ddb28dd
--- /dev/null
+++ b/extras/rya.prospector/pom.xml
@@ -0,0 +1,131 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <parent>
+        <groupId>mvm.rya</groupId>
+        <artifactId>rya.extras</artifactId>
+        <version>3.2.9</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <name>${project.groupId}.${project.artifactId}</name>
+    <artifactId>rya.prospector</artifactId>
+    <dependencies>
+        <dependency>
+            <groupId>mvm.rya</groupId>
+            <artifactId>rya.api</artifactId>
+            <exclusions>
+                               <exclusion>
+                                       <artifactId>mockito-all</artifactId>
+                                       <groupId>org.mockito</groupId>
+                               </exclusion>
+                       </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>commons-lang</groupId>
+            <artifactId>commons-lang</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>mvm.rya</groupId>
+            <artifactId>accumulo.rya</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.codehaus.groovy</groupId>
+            <artifactId>groovy-all</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.mrunit</groupId>
+            <artifactId>mrunit</artifactId>
+            <version>1.1.0</version>
+            <classifier>hadoop2</classifier>
+            <scope>test</scope>
+        </dependency>
+        
+    </dependencies>
+    <build>
+ <plugins>
+<plugin>
+<artifactId>maven-compiler-plugin</artifactId>
+<!-- 2.8.0-01 and later require maven-compiler-plugin 3.1 or higher -->
+<version>3.1</version>
+<configuration>
+<compilerId>groovy-eclipse-compiler</compilerId>
+</configuration>
+<dependencies>
+<dependency>
+<groupId>org.codehaus.groovy</groupId>
+<artifactId>groovy-eclipse-compiler</artifactId>
+<version>2.9.1-01</version>
+</dependency>
+<!-- for 2.8.0-01 and later you must have an explicit dependency on 
groovy-eclipse-batch -->
+<dependency>
+<groupId>org.codehaus.groovy</groupId>
+<artifactId>groovy-eclipse-batch</artifactId>
+<version>2.3.7-01</version>
+</dependency>
+</dependencies>
+</plugin>
+<plugin>
+<groupId>org.codehaus.groovy</groupId>
+<artifactId>groovy-eclipse-compiler</artifactId>
+<version>2.9.1-01</version>
+<extensions>true</extensions>
+</plugin>
+<plugin>
+<groupId>org.apache.maven.plugins</groupId>
+<artifactId>maven-shade-plugin</artifactId>
+<executions>
+<execution>
+<configuration>
+<shadedArtifactAttached>true</shadedArtifactAttached>
+<shadedClassifierName>map-reduce</shadedClassifierName>
+<transformers>
+<transformer 
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"
 />
+</transformers>
+</configuration>
+</execution>
+</executions>
+</plugin>
+</plugins>
+     </build>
+    <profiles>
+        <profile>
+            <id>accumulo</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.accumulo</groupId>
+                    <artifactId>accumulo-core</artifactId>
+                    <optional>true</optional>
+                </dependency>
+                <dependency>
+                    <groupId>mvm.rya</groupId>
+                    <artifactId>accumulo.iterators</artifactId>
+                    <optional>true</optional>
+                </dependency>
+            </dependencies>
+        </profile>
+        <profile>
+            <id>cloudbase</id>
+            <activation>
+                <activeByDefault>false</activeByDefault>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>com.texeltek</groupId>
+                    <artifactId>accumulo-cloudbase-shim</artifactId>
+                    <optional>true</optional>
+                </dependency>
+                <dependency>
+                    <groupId>mvm.rya</groupId>
+                    <artifactId>cloudbase.iterators</artifactId>
+                    <optional>true</optional>
+                </dependency>
+            </dependencies>
+        </profile>
+    </profiles>
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IndexEntry.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IndexEntry.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IndexEntry.groovy
new file mode 100644
index 0000000..ae08089
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IndexEntry.groovy
@@ -0,0 +1,57 @@
+package mvm.rya.prospector.domain
+
+/**
+ * Date: 12/5/12
+ * Time: 11:33 AM
+ */
+class IndexEntry {
+    def String index
+    def String data
+    def String dataType
+    def String tripleValueType
+    def String visibility
+    def Long count
+    def Long timestamp
+
+    @Override
+    public String toString() {
+        return "IndexEntry{" +
+                "index='" + index + '\'' +
+                ", data='" + data + '\'' +
+                ", dataType='" + dataType + '\'' +
+                ", tripleValueType=" + tripleValueType +
+                ", visibility='" + visibility + '\'' +
+                ", timestamp='" + timestamp + '\'' +
+                ", count=" + count +
+                '}';
+    }
+
+    boolean equals(o) {
+        if (this.is(o)) return true
+        if (getClass() != o.class) return false
+
+        IndexEntry that = (IndexEntry) o
+
+        if (count != that.count) return false
+        if (timestamp != that.timestamp) return false
+        if (data != that.data) return false
+        if (dataType != that.dataType) return false
+        if (index != that.index) return false
+        if (tripleValueType != that.tripleValueType) return false
+        if (visibility != that.visibility) return false
+
+        return true
+    }
+
+    int hashCode() {
+        int result
+        result = (index != null ? index.hashCode() : 0)
+        result = 31 * result + (data != null ? data.hashCode() : 0)
+        result = 31 * result + (dataType != null ? dataType.hashCode() : 0)
+        result = 31 * result + (tripleValueType != null ? 
tripleValueType.hashCode() : 0)
+        result = 31 * result + (visibility != null ? visibility.hashCode() : 0)
+        result = 31 * result + (count != null ? count.hashCode() : 0)
+        result = 31 * result + (timestamp != null ? timestamp.hashCode() : 0)
+        return result
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IntermediateProspect.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IntermediateProspect.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IntermediateProspect.groovy
new file mode 100644
index 0000000..f80ac93
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/IntermediateProspect.groovy
@@ -0,0 +1,51 @@
+package mvm.rya.prospector.domain
+
+import org.apache.hadoop.io.WritableComparable
+
+import static mvm.rya.prospector.domain.TripleValueType.*
+
+/**
+ * Date: 12/3/12
+ * Time: 11:15 AM
+ */
+class IntermediateProspect implements WritableComparable<IntermediateProspect> 
{
+
+    def String index
+    def String data
+    def String dataType
+    def TripleValueType tripleValueType
+    def String visibility
+
+    @Override
+    int compareTo(IntermediateProspect t) {
+        if(!index.equals(t.index))
+            return index.compareTo(t.index);
+        if(!data.equals(t.data))
+            return data.compareTo(t.data);
+        if(!dataType.equals(t.dataType))
+            return dataType.compareTo(t.dataType);
+        if(!tripleValueType.equals(t.tripleValueType))
+            return tripleValueType.compareTo(t.tripleValueType);
+        if(!visibility.equals(t.visibility))
+            return visibility.compareTo(t.visibility);
+        return 0
+    }
+
+    @Override
+    void write(DataOutput dataOutput) {
+        dataOutput.writeUTF(index);
+        dataOutput.writeUTF(data);
+        dataOutput.writeUTF(dataType);
+        dataOutput.writeUTF(tripleValueType.name());
+        dataOutput.writeUTF(visibility);
+    }
+
+    @Override
+    void readFields(DataInput dataInput) {
+        index = dataInput.readUTF()
+        data = dataInput.readUTF()
+        dataType = dataInput.readUTF()
+        tripleValueType = TripleValueType.valueOf(dataInput.readUTF())
+        visibility = dataInput.readUTF()
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/TripleValueType.java
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/TripleValueType.java
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/TripleValueType.java
new file mode 100644
index 0000000..c2f0a0e
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/domain/TripleValueType.java
@@ -0,0 +1,6 @@
+package mvm.rya.prospector.domain;
+
+public enum TripleValueType {
+
+    subject, predicate, object, entity, subjectpredicate, predicateobject, 
subjectobject
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/Prospector.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/Prospector.groovy 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/Prospector.groovy
new file mode 100644
index 0000000..54ffcf9
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/Prospector.groovy
@@ -0,0 +1,89 @@
+package mvm.rya.prospector.mr
+
+import mvm.rya.prospector.utils.ProspectorUtils
+import org.apache.accumulo.core.data.Mutation
+import org.apache.accumulo.core.data.Value
+import org.apache.accumulo.core.security.ColumnVisibility
+import org.apache.hadoop.conf.Configured
+import org.apache.hadoop.util.Tool
+import org.apache.hadoop.util.ToolRunner
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.Job
+
+import org.apache.hadoop.io.LongWritable
+import org.apache.commons.lang.time.DateUtils
+
+import mvm.rya.prospector.domain.IntermediateProspect
+
+import com.google.common.collect.Lists
+
+import static mvm.rya.prospector.utils.ProspectorConstants.*
+import static mvm.rya.prospector.utils.ProspectorUtils.*
+
+/**
+ * Date: 12/3/12
+ * Time: 10:57 AM
+ */
+class Prospector extends Configured implements Tool {
+
+    private static long NOW = System.currentTimeMillis();
+
+    private Date truncatedDate;
+
+    public static void main(String[] args) {
+        int res = ToolRunner.run(new Prospector(), args);
+        System.exit(res);
+    }
+
+    @Override
+    int run(String[] args) {
+        Configuration conf = getConf();
+
+        truncatedDate = DateUtils.truncate(new Date(NOW), Calendar.MINUTE);
+
+        Path configurationPath = new Path(args[0]);
+        conf.addResource(configurationPath);
+
+        def inTable = conf.get("prospector.intable")
+        def outTable = conf.get("prospector.outtable")
+        def auths_str = conf.get("prospector.auths")
+        assert inTable != null
+        assert outTable != null 
+        assert auths_str != null
+
+        Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + 
System.currentTimeMillis());
+        job.setJarByClass(this.getClass());
+
+        String[] auths = auths_str.split(",")
+        ProspectorUtils.initMRJob(job, inTable, outTable, auths)
+
+        job.getConfiguration().setLong("DATE", NOW);
+
+        def performant = conf.get(PERFORMANT)
+        if (Boolean.parseBoolean(performant)) {
+            /**
+             * Apply some performance tuning
+             */
+            ProspectorUtils.addMRPerformance(job.configuration)
+        }
+
+        job.setMapOutputKeyClass(IntermediateProspect.class);
+        job.setMapOutputValueClass(LongWritable.class);
+
+        job.setMapperClass(ProspectorMapper.class);
+        job.setCombinerClass(ProspectorCombiner.class);
+        job.setReducerClass(ProspectorReducer.class);
+        job.waitForCompletion(true);
+
+        int success = job.isSuccessful() ? 0 : 1;
+
+        if (success == 0) {
+            Mutation m = new Mutation(METADATA)
+            m.put(PROSPECT_TIME, getReverseIndexDateTime(truncatedDate), new 
ColumnVisibility(DEFAULT_VIS), truncatedDate.time, new Value(EMPTY))
+            writeMutations(connector(instance(conf), conf), outTable, [m])
+        }
+
+        return success
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorCombiner.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorCombiner.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorCombiner.groovy
new file mode 100644
index 0000000..65ec71b
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorCombiner.groovy
@@ -0,0 +1,42 @@
+package mvm.rya.prospector.mr
+
+import mvm.rya.prospector.plans.IndexWorkPlan
+import mvm.rya.prospector.plans.IndexWorkPlanManager
+import mvm.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
+import org.apache.commons.lang.time.DateUtils
+import org.apache.hadoop.mapreduce.Reducer
+import mvm.rya.prospector.utils.ProspectorUtils
+
+/**
+ * Date: 12/3/12
+ * Time: 11:06 AM
+ */
+class ProspectorCombiner extends Reducer {
+
+    private Date truncatedDate;
+    private IndexWorkPlanManager manager = new 
ServicesBackedIndexWorkPlanManager()
+    Map<String, IndexWorkPlan> plans
+
+    @Override
+    public void setup(Reducer.Context context) throws IOException, 
InterruptedException {
+        super.setup(context);
+
+        long now = context.getConfiguration().getLong("DATE", 
System.currentTimeMillis());
+        truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE);
+
+        this.plans = ProspectorUtils.planMap(manager.plans)
+    }
+
+    @Override
+    protected void reduce(def prospect, Iterable values, Reducer.Context 
context) {
+        def plan = plans.get(prospect.index)
+        if (plan != null) {
+            def coll = plan.combine(prospect, values)
+            if (coll != null) {
+                coll.each { entry ->
+                    context.write(entry.key, entry.value)
+                }
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorMapper.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorMapper.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorMapper.groovy
new file mode 100644
index 0000000..1eef226
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorMapper.groovy
@@ -0,0 +1,56 @@
+package mvm.rya.prospector.mr
+
+import mvm.rya.accumulo.AccumuloRdfConfiguration
+import mvm.rya.api.RdfCloudTripleStoreConstants
+import mvm.rya.api.domain.RyaStatement
+import mvm.rya.api.resolver.RyaTripleContext
+import mvm.rya.api.resolver.triple.TripleRow
+import mvm.rya.prospector.plans.IndexWorkPlan
+import mvm.rya.prospector.plans.IndexWorkPlanManager
+import mvm.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
+
+import org.apache.commons.lang.time.DateUtils
+import org.apache.hadoop.mapreduce.Mapper
+
+/**
+ * Date: 12/3/12
+ * Time: 11:06 AM
+ */
+class ProspectorMapper extends Mapper {
+
+    private Date truncatedDate;
+    private RyaTripleContext ryaContext;
+    private IndexWorkPlanManager manager = new 
ServicesBackedIndexWorkPlanManager()
+    private Collection<IndexWorkPlan> plans = manager.plans
+
+    @Override
+    public void setup(Mapper.Context context) throws IOException, 
InterruptedException {
+        super.setup(context);
+
+        long now = context.getConfiguration().getLong("DATE", 
System.currentTimeMillis());
+               ryaContext = RyaTripleContext.getInstance(new 
AccumuloRdfConfiguration(context.getConfiguration()));
+        truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE);
+    }
+
+    @Override
+    public void map(def row, def data, Mapper.Context context) {
+        RyaStatement ryaStatement = 
ryaContext.deserializeTriple(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO,
+                new TripleRow(
+                        row.row.bytes,
+                        row.columnFamily.bytes,
+                        row.columnQualifier.bytes,
+                        row.timestamp,
+                        row.columnVisibility.bytes,
+                        data.get()
+                )
+        )
+        plans.each { plan ->
+            def coll = plan.map(ryaStatement)
+            if (coll != null) {
+                coll.each { entry ->
+                    context.write(entry.key, entry.value)
+                }
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorReducer.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorReducer.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorReducer.groovy
new file mode 100644
index 0000000..8beabcf
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/mr/ProspectorReducer.groovy
@@ -0,0 +1,38 @@
+package mvm.rya.prospector.mr
+
+import mvm.rya.prospector.plans.IndexWorkPlan
+import mvm.rya.prospector.plans.IndexWorkPlanManager
+import mvm.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
+import org.apache.commons.lang.time.DateUtils
+import org.apache.hadoop.mapreduce.Reducer
+import mvm.rya.prospector.utils.ProspectorUtils
+
+/**
+ * Date: 12/3/12
+ * Time: 11:06 AM
+ */
+class ProspectorReducer extends Reducer {
+
+    private Date truncatedDate;
+    private IndexWorkPlanManager manager = new 
ServicesBackedIndexWorkPlanManager()
+    Map<String, IndexWorkPlan> plans
+
+    @Override
+    public void setup(Reducer.Context context) throws IOException, 
InterruptedException {
+        super.setup(context);
+
+        def conf = context.getConfiguration()
+        long now = conf.getLong("DATE", System.currentTimeMillis());
+        truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE);
+
+        this.plans = ProspectorUtils.planMap(manager.plans)
+    }
+
+    @Override
+    protected void reduce(def prospect, Iterable values, Reducer.Context 
context) {
+        def plan = plans.get(prospect.index)
+        if (plan != null) {
+            plan.reduce(prospect, values, truncatedDate, context)
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlan.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlan.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlan.groovy
new file mode 100644
index 0000000..0e53f6d
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlan.groovy
@@ -0,0 +1,32 @@
+package mvm.rya.prospector.plans
+
+import mvm.rya.api.domain.RyaStatement
+import mvm.rya.prospector.domain.IntermediateProspect
+import org.apache.hadoop.io.LongWritable
+import org.apache.hadoop.mapreduce.Reducer
+import org.openrdf.model.vocabulary.XMLSchema
+import mvm.rya.prospector.domain.IndexEntry
+
+/**
+ * Date: 12/3/12
+ * Time: 11:12 AM
+ */
+public interface IndexWorkPlan {
+
+    public static final String URITYPE = XMLSchema.ANYURI.stringValue()
+    public static final LongWritable ONE = new LongWritable(1)
+    public static final String DELIM = "\u0000";
+
+    public Collection<Map.Entry<IntermediateProspect, LongWritable>> 
map(RyaStatement ryaStatement)
+
+    public Collection<Map.Entry<IntermediateProspect, LongWritable>> 
combine(IntermediateProspect prospect, Iterable<LongWritable> counts);
+
+    public void reduce(IntermediateProspect prospect, Iterable<LongWritable> 
counts, Date timestamp, Reducer.Context context)
+
+    public String getIndexType()
+
+       public String getCompositeValue(List<String> indices)
+       
+    public List<IndexEntry> query(def connector, String tableName, List<Long> 
prospectTimes, String type, String index, String dataType, String[] auths)
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlanManager.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlanManager.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlanManager.groovy
new file mode 100644
index 0000000..915b66a
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/IndexWorkPlanManager.groovy
@@ -0,0 +1,10 @@
+package mvm.rya.prospector.plans
+
+/**
+ * Date: 12/3/12
+ * Time: 11:24 AM
+ */
+public interface IndexWorkPlanManager {
+
+    public Collection<IndexWorkPlan> getPlans();
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/CountPlan.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/CountPlan.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/CountPlan.groovy
new file mode 100644
index 0000000..ae64e3b
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/CountPlan.groovy
@@ -0,0 +1,201 @@
+package mvm.rya.prospector.plans.impl
+
+import mvm.rya.api.domain.RyaStatement
+import mvm.rya.prospector.domain.IndexEntry
+import mvm.rya.prospector.domain.IntermediateProspect
+import mvm.rya.prospector.domain.TripleValueType
+import mvm.rya.prospector.plans.IndexWorkPlan
+import mvm.rya.prospector.utils.CustomEntry
+import mvm.rya.prospector.utils.ProspectorUtils
+
+import org.apache.accumulo.core.data.Mutation
+import org.apache.accumulo.core.data.Range
+import org.apache.accumulo.core.data.Value
+import org.apache.accumulo.core.security.Authorizations
+import org.apache.accumulo.core.security.ColumnVisibility
+import org.apache.hadoop.io.LongWritable
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.mapreduce.Reducer
+import org.openrdf.model.util.URIUtil
+import org.openrdf.model.vocabulary.XMLSchema;
+
+import static mvm.rya.prospector.utils.ProspectorConstants.COUNT;
+import mvm.rya.api.RdfCloudTripleStoreConstants
+
+/**
+ * Date: 12/3/12
+ * Time: 12:28 PM
+ */
+class CountPlan implements IndexWorkPlan {
+
+    @Override
+    Collection<Map.Entry<IntermediateProspect, LongWritable>> map(RyaStatement 
ryaStatement) {
+        def subject = ryaStatement.getSubject()
+        def predicate = ryaStatement.getPredicate()
+               def subjpred = ryaStatement.getSubject().data + DELIM + 
ryaStatement.getPredicate().data
+               def predobj = ryaStatement.getPredicate().data + DELIM + 
ryaStatement.getObject().data
+               def subjobj = ryaStatement.getSubject().data + DELIM + 
ryaStatement.getObject().data
+        def object = ryaStatement.getObject()
+        def localIndex = URIUtil.getLocalNameIndex(subject.data)
+        def namespace = subject.data.substring(0, localIndex - 1)
+        def visibility = new String(ryaStatement.columnVisibility)
+        return [
+                new CustomEntry<IntermediateProspect, LongWritable>(
+                        new IntermediateProspect(index: COUNT,
+                                data: subject.data,
+                                dataType: URITYPE,
+                                tripleValueType: TripleValueType.subject,
+                                visibility: visibility),
+                        ONE),
+                new CustomEntry<IntermediateProspect, LongWritable>(
+                        new IntermediateProspect(index: COUNT,
+                                data: predicate.data,
+                                dataType: URITYPE,
+                                tripleValueType: TripleValueType.predicate,
+                                visibility: visibility
+                        ), ONE),
+                new CustomEntry<IntermediateProspect, LongWritable>(
+                        new IntermediateProspect(index: COUNT,
+                                data: object.data,
+                                dataType: object.dataType.stringValue(),
+                                tripleValueType: TripleValueType.object,
+                                visibility: visibility
+                        ), ONE),
+                new CustomEntry<IntermediateProspect, LongWritable>(
+                        new IntermediateProspect(index: COUNT,
+                                data: subjpred,
+                                dataType: XMLSchema.STRING,
+                                tripleValueType: 
TripleValueType.subjectpredicate,
+                                visibility: visibility
+                        ), ONE),
+                new CustomEntry<IntermediateProspect, LongWritable>(
+                        new IntermediateProspect(index: COUNT,
+                                data: subjobj,
+                                dataType: XMLSchema.STRING,
+                                tripleValueType: TripleValueType.subjectobject,
+                                visibility: visibility
+                        ), ONE),
+                new CustomEntry<IntermediateProspect, LongWritable>(
+                        new IntermediateProspect(index: COUNT,
+                                data: predobj,
+                                dataType: XMLSchema.STRING,
+                                tripleValueType: 
TripleValueType.predicateobject,
+                                visibility: visibility
+                        ), ONE),
+                new CustomEntry<IntermediateProspect, LongWritable>(
+                        new IntermediateProspect(index: COUNT,
+                                data: namespace,
+                                dataType: URITYPE,
+                                tripleValueType: TripleValueType.entity,
+                                visibility: visibility
+                        ), ONE),
+        ]
+    }
+
+    @Override
+    Collection<Map.Entry<IntermediateProspect, LongWritable>> 
combine(IntermediateProspect prospect, Iterable<LongWritable> counts) {
+
+        def iter = counts.iterator()
+        long sum = 0;
+        iter.each { lw ->
+            sum += lw.get()
+        }
+
+        return [new CustomEntry<IntermediateProspect, LongWritable>(prospect, 
new LongWritable(sum))]
+    }
+
+    @Override
+    void reduce(IntermediateProspect prospect, Iterable<LongWritable> counts, 
Date timestamp, Reducer.Context context) {
+        def iter = counts.iterator()
+        long sum = 0;
+        iter.each { lw ->
+            sum += lw.get()
+        }
+
+        def indexType = prospect.tripleValueType.name()
+
+               // not sure if this is the best idea..
+        if ((sum >= 0) ||
+                indexType.equals(TripleValueType.predicate.toString())) {
+
+            Mutation m = new Mutation(indexType + DELIM + prospect.data + 
DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp))
+            m.put(COUNT, prospect.dataType, new 
ColumnVisibility(prospect.visibility), timestamp.getTime(), new 
Value("${sum}".getBytes()));
+
+            context.write(null, m);
+        }
+    }
+
+    @Override
+    String getIndexType() {
+        return COUNT
+    }
+       
+    @Override
+       String getCompositeValue(List<String> indices){
+               Iterator<String> indexIt = indices.iterator();
+               String compositeIndex = indexIt.next();
+               while (indexIt.hasNext()){
+                       String value = indexIt.next();
+                       compositeIndex += DELIM + value;
+               }
+               return compositeIndex;
+       }
+
+    @Override
+    List<IndexEntry> query(def connector, String tableName, List<Long> 
prospectTimes, String type, String compositeIndex, String dataType, String[] 
auths) {
+
+        assert connector != null && tableName != null && type != null && 
compositeIndex != null
+               
+        def bs = connector.createBatchScanner(tableName, new 
Authorizations(auths), 4)
+        def ranges = []
+        int max = 1000; //by default only return 1000 prospects maximum
+        if (prospectTimes != null) {
+            prospectTimes.each { prospect ->
+                ranges.add(
+                        new Range(type + DELIM + compositeIndex + DELIM + 
ProspectorUtils.getReverseIndexDateTime(new Date(prospect))))
+            }
+        } else {
+            max = 1; //only return the latest if no prospectTimes given
+            def prefix = type + DELIM + compositeIndex + DELIM;
+            ranges.add(new Range(prefix, prefix + 
RdfCloudTripleStoreConstants.LAST))
+        }
+        bs.ranges = ranges
+        if (dataType != null) {
+            bs.fetchColumn(new Text(COUNT), new Text(dataType))
+        } else {
+            bs.fetchColumnFamily(new Text(COUNT))
+        }
+               
+        List<IndexEntry> indexEntries = new ArrayList<IndexEntry>()
+        def iter = bs.iterator()
+               
+        while (iter.hasNext() && indexEntries.size() <= max) {
+            def entry = iter.next()
+            def k = entry.key
+            def v = entry.value
+
+            def rowArr = k.row.toString().split(DELIM)
+                       String values = "";
+                       // if it is a composite index, then return the type as 
a composite index
+                       if 
(type.equalsIgnoreCase(TripleValueType.subjectpredicate.toString()) || 
+                               
type.equalsIgnoreCase(TripleValueType.subjectobject.toString()) ||
+                               
type.equalsIgnoreCase(TripleValueType.predicateobject.toString())){
+                               values =rowArr[1] + DELIM + rowArr[2]
+                       }
+                       else values = rowArr[1]
+
+            indexEntries.add(new IndexEntry(data: values,
+                    tripleValueType: rowArr[0],
+                    index: COUNT,
+                    dataType: k.columnQualifier.toString(),
+                    visibility: k.columnVisibility.toString(),
+                    count: Long.parseLong(new String(v.get())),
+                    timestamp: k.timestamp
+            ))
+        }
+        bs.close()
+
+        return indexEntries
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy
new file mode 100644
index 0000000..931f6a7
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy
@@ -0,0 +1,19 @@
+package mvm.rya.prospector.plans.impl
+
+import mvm.rya.prospector.plans.IndexWorkPlan
+import com.google.common.collect.Lists
+import mvm.rya.prospector.plans.IndexWorkPlanManager
+
+/**
+ * Date: 12/3/12
+ * Time: 11:24 AM
+ */
+class ServicesBackedIndexWorkPlanManager implements IndexWorkPlanManager {
+
+    def Collection<IndexWorkPlan> plans
+
+    ServicesBackedIndexWorkPlanManager() {
+        def iterator = ServiceLoader.load(IndexWorkPlan.class).iterator();
+        plans = Lists.newArrayList(iterator)
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorService.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorService.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorService.groovy
new file mode 100644
index 0000000..eb3a975
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorService.groovy
@@ -0,0 +1,107 @@
+package mvm.rya.prospector.service
+
+import mvm.rya.prospector.utils.ProspectorUtils
+import org.apache.accumulo.core.data.Key
+import org.apache.accumulo.core.data.Range
+import org.apache.accumulo.core.security.Authorizations
+import org.apache.hadoop.io.Text
+
+import static mvm.rya.prospector.utils.ProspectorConstants.METADATA
+import static mvm.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME
+import mvm.rya.prospector.plans.IndexWorkPlanManager
+import mvm.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager
+import mvm.rya.prospector.plans.IndexWorkPlan
+import mvm.rya.prospector.domain.IndexEntry
+
+/**
+ * Date: 12/5/12
+ * Time: 12:28 PM
+ */
+class ProspectorService {
+
+    def connector
+    String tableName
+
+    IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager()
+    Map<String, IndexWorkPlan> plans
+
+    ProspectorService(def connector, String tableName) {
+        this.connector = connector
+        this.tableName = tableName
+        this.plans = ProspectorUtils.planMap(manager.plans)
+
+        //init
+        def tos = connector.tableOperations()
+        if(!tos.exists(tableName)) {
+            tos.create(tableName)
+        }
+    }
+
+    public Iterator<Long> getProspects(String[] auths) {
+
+        def scanner = connector.createScanner(tableName, new 
Authorizations(auths))
+        scanner.setRange(Range.exact(METADATA));
+        scanner.fetchColumnFamily(new Text(PROSPECT_TIME));
+
+        def iterator = scanner.iterator();
+
+        return new Iterator<Long>() {
+
+
+            @Override
+            public boolean hasNext() {
+                return iterator.hasNext();
+            }
+
+            @Override
+            public Long next() {
+                return iterator.next().getKey().getTimestamp();
+            }
+
+            @Override
+            public void remove() {
+                iterator.remove();
+            }
+        };
+
+    }
+
+    public Iterator<Long> getProspectsInRange(long beginTime, long endTime, 
String[] auths) {
+
+        def scanner = connector.createScanner(tableName, new 
Authorizations(auths))
+        scanner.setRange(new Range(
+                new Key(METADATA, PROSPECT_TIME, 
ProspectorUtils.getReverseIndexDateTime(new Date(endTime)), "", Long.MAX_VALUE),
+                new Key(METADATA, PROSPECT_TIME, 
ProspectorUtils.getReverseIndexDateTime(new Date(beginTime)), "", 0l)
+        ))
+        def iterator = scanner.iterator();
+
+        return new Iterator<Long>() {
+
+            @Override
+            public boolean hasNext() {
+                return iterator.hasNext();
+            }
+
+            @Override
+            public Long next() {
+                return iterator.next().getKey().getTimestamp();
+            }
+
+            @Override
+            public void remove() {
+                iterator.remove();
+            }
+        };
+
+    }
+
+    public List<IndexEntry> query(List<Long> prospectTimes, String indexType, 
String type, List<String> index, String dataType, String[] auths) {
+        assert indexType != null
+
+        def plan = plans.get(indexType)
+        assert plan != null: "Index Type: ${indexType} does not exist"
+               String compositeIndex = plan.getCompositeValue(index);
+
+        return plan.query(connector, tableName, prospectTimes, type, 
compositeIndex, dataType, auths)
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy
new file mode 100644
index 0000000..4e9c3d1
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy
@@ -0,0 +1,103 @@
+package mvm.rya.prospector.service
+
+import mvm.rya.api.RdfCloudTripleStoreConfiguration
+import mvm.rya.api.persist.RdfEvalStatsDAO
+import mvm.rya.prospector.domain.TripleValueType
+import mvm.rya.prospector.utils.ProspectorConstants
+import org.apache.hadoop.conf.Configuration
+import org.openrdf.model.Resource
+import org.openrdf.model.Value
+
+import mvm.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF
+
+/**
+ * An ${@link mvm.rya.api.persist.RdfEvalStatsDAO} that uses the Prospector 
Service underneath return counts.
+ */
+class ProspectorServiceEvalStatsDAO implements 
RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> {
+
+    def ProspectorService prospectorService
+
+    ProspectorServiceEvalStatsDAO() {
+    }
+
+    ProspectorServiceEvalStatsDAO(ProspectorService prospectorService, 
RdfCloudTripleStoreConfiguration conf) {
+        this.prospectorService = prospectorService
+    }
+
+    public ProspectorServiceEvalStatsDAO(def connector, 
RdfCloudTripleStoreConfiguration conf) {
+        this.prospectorService = new ProspectorService(connector, 
getProspectTableName(conf))
+    }
+
+    @Override
+    void init() {
+        assert prospectorService != null
+    }
+
+    @Override
+    boolean isInitialized() {
+        return prospectorService != null
+    }
+
+    @Override
+    void destroy() {
+
+    }
+
+       @Override
+    public double getCardinality(RdfCloudTripleStoreConfiguration conf, 
CARDINALITY_OF card, List<Value> val) {
+
+        assert conf != null && card != null && val != null
+        String triplePart = null;
+        switch (card) {
+            case (CARDINALITY_OF.SUBJECT):
+                triplePart = TripleValueType.subject
+                break;
+            case (CARDINALITY_OF.PREDICATE):
+                triplePart = TripleValueType.predicate
+                break;
+            case (CARDINALITY_OF.OBJECT):
+                triplePart = TripleValueType.object
+                break;
+            case (CARDINALITY_OF.SUBJECTPREDICATE):
+                triplePart = TripleValueType.subjectpredicate
+                break;
+             case (CARDINALITY_OF.SUBJECTOBJECT):
+                triplePart = TripleValueType.subjectobject
+                break;
+             case (CARDINALITY_OF.PREDICATEOBJECT):
+                triplePart = TripleValueType.predicateobject
+                break;
+       }
+
+        String[] auths = conf.getAuths()
+               List<String> indexedValues = new ArrayList<String>();
+               Iterator<Value> valueIt = val.iterator();
+               while (valueIt.hasNext()){
+                       indexedValues.add(valueIt.next().stringValue());
+               }
+
+        def indexEntries = prospectorService.query(null, 
ProspectorConstants.COUNT, triplePart, indexedValues, null /** what is the 
datatype here? */,
+                auths)
+
+        return indexEntries.size() > 0 ? indexEntries.head().count : -1
+    }
+
+       @Override
+       double getCardinality(RdfCloudTripleStoreConfiguration conf, 
CARDINALITY_OF card, List<Value> val, Resource context) {
+               return getCardinality(conf, card, val) //TODO: Not sure about 
the context yet
+       }
+
+    @Override
+    public void setConf(RdfCloudTripleStoreConfiguration conf) {
+
+    }
+
+    @Override
+    RdfCloudTripleStoreConfiguration getConf() {
+        return null
+    }
+
+    public static String getProspectTableName(RdfCloudTripleStoreConfiguration 
conf) {
+        return conf.getTablePrefix() + "prospects";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/CustomEntry.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/CustomEntry.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/CustomEntry.groovy
new file mode 100644
index 0000000..4d7ae1d
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/CustomEntry.groovy
@@ -0,0 +1,33 @@
+package mvm.rya.prospector.utils
+
+/**
+ * Date: 12/3/12
+ * Time: 12:33 PM
+ */
+class CustomEntry<K, V> implements Map.Entry<K, V> {
+
+    K key;
+    V value;
+
+    CustomEntry(K key, V value) {
+        this.key = key
+        this.value = value
+    }
+
+    K getKey() {
+        return key
+    }
+
+    void setKey(K key) {
+        this.key = key
+    }
+
+    V getValue() {
+        return value
+    }
+
+    V setValue(V value) {
+        this.value = value
+        this.value
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorConstants.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorConstants.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorConstants.groovy
new file mode 100644
index 0000000..edca753
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorConstants.groovy
@@ -0,0 +1,22 @@
+package mvm.rya.prospector.utils
+
+/**
+ * Date: 12/5/12
+ * Time: 10:57 AM
+ */
+class ProspectorConstants {
+    public static final String COUNT = "count"
+    public static final String METADATA = "metadata"
+    public static final String PROSPECT_TIME = "prospectTime"
+    public static final String DEFAULT_VIS = "U&FOUO"
+    public static final byte[] EMPTY = new byte [0];
+
+    //config properties
+    public static final String PERFORMANT = "performant"
+
+    public static final String USERNAME = "username"
+    public static final String PASSWORD = "password"
+    public static final String INSTANCE = "instance"
+    public static final String ZOOKEEPERS = "zookeepers"
+    public static final String MOCK = "mock"
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorUtils.groovy
----------------------------------------------------------------------
diff --git 
a/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorUtils.groovy
 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorUtils.groovy
new file mode 100644
index 0000000..ba90fa2
--- /dev/null
+++ 
b/extras/rya.prospector/src/main/groovy/mvm/rya/prospector/utils/ProspectorUtils.groovy
@@ -0,0 +1,119 @@
+package mvm.rya.prospector.utils
+
+import org.apache.accumulo.core.client.Connector
+import org.apache.accumulo.core.client.Instance
+import org.apache.accumulo.core.client.ZooKeeperInstance
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat
+import org.apache.accumulo.core.client.mock.MockInstance
+import org.apache.accumulo.core.data.Mutation
+import org.apache.accumulo.core.security.Authorizations
+import org.apache.commons.lang.Validate
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.hadoop.mapreduce.Job
+
+import java.text.SimpleDateFormat
+import mvm.rya.prospector.plans.IndexWorkPlan
+import org.apache.accumulo.core.client.security.tokens.PasswordToken
+
+import static mvm.rya.prospector.utils.ProspectorConstants.*
+
+/**
+ * Date: 12/4/12
+ * Time: 4:24 PM
+ */
+class ProspectorUtils {
+
+    public static final long INDEXED_DATE_SORT_VAL = 999999999999999999L; // 
18 char long, same length as date format pattern below
+    public static final String INDEXED_DATE_FORMAT = "yyyyMMddHHmmsssSSS";
+
+    public static String getReverseIndexDateTime(Date date) {
+        Validate.notNull(date);
+        String formattedDateString = new 
SimpleDateFormat(INDEXED_DATE_FORMAT).format(date);
+        long diff = INDEXED_DATE_SORT_VAL - Long.valueOf(formattedDateString);
+
+        return Long.toString(diff);
+    }
+
+    public static Map<String, IndexWorkPlan> planMap(def plans) {
+        plans.inject([:]) { map, plan ->
+            map.putAt(plan.indexType, plan)
+            map
+        }
+    }
+
+    public static void initMRJob(Job job, String table, String outtable, 
String[] auths) {
+        Configuration conf = job.configuration
+        String username = conf.get(USERNAME)
+        String password = conf.get(PASSWORD)
+        String instance = conf.get(INSTANCE)
+        String zookeepers = conf.get(ZOOKEEPERS)
+        String mock = conf.get(MOCK)
+
+        //input
+        if (Boolean.parseBoolean(mock)) {
+            AccumuloInputFormat.setMockInstance(job, instance)
+            AccumuloOutputFormat.setMockInstance(job, instance)
+        } else if (zookeepers != null) {
+            AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers)
+            AccumuloOutputFormat.setZooKeeperInstance(job, instance, 
zookeepers)
+        } else {
+            throw new IllegalArgumentException("Must specify either mock or 
zookeepers");
+        }
+
+       AccumuloInputFormat.setConnectorInfo(job, username, new 
PasswordToken(password.getBytes()))
+        AccumuloInputFormat.setInputTableName(job, table)
+        job.setInputFormatClass(AccumuloInputFormat.class);
+               AccumuloInputFormat.setScanAuthorizations(job, new 
Authorizations(auths))
+               
+        // OUTPUT
+        job.setOutputFormatClass(AccumuloOutputFormat.class);
+        job.setOutputKeyClass(Text.class);
+        job.setOutputValueClass(Mutation.class);
+        AccumuloOutputFormat.setConnectorInfo(job, username, new 
PasswordToken(password.getBytes()))
+        AccumuloOutputFormat.setDefaultTableName(job, outtable)
+    }
+
+    public static void addMRPerformance(Configuration conf) {
+        conf.setBoolean("mapred.map.tasks.speculative.execution", false);
+        conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
+        conf.set("io.sort.mb", "256");
+        conf.setBoolean("mapred.compress.map.output", true);
+        conf.set("mapred.map.output.compression.codec", 
GzipCodec.class.getName());
+    }
+
+    public static Instance instance(Configuration conf) {
+        assert conf != null
+
+        String instance_str = conf.get(INSTANCE)
+        String zookeepers = conf.get(ZOOKEEPERS)
+        String mock = conf.get(MOCK)
+        if (Boolean.parseBoolean(mock)) {
+            return new MockInstance(instance_str)
+        } else if (zookeepers != null) {
+            return new ZooKeeperInstance(instance_str, zookeepers)
+        } else {
+            throw new IllegalArgumentException("Must specify either mock or 
zookeepers");
+        }
+    }
+
+    public static Connector connector(Instance instance, Configuration conf) {
+        String username = conf.get(USERNAME)
+        String password = conf.get(PASSWORD)
+        if (instance == null)
+            instance = instance(conf)
+        return instance.getConnector(username, password)
+    }
+
+    public static void writeMutations(Connector connector, String tableName, 
def mutations) {
+        def bw = connector.createBatchWriter(tableName, 10000l, 10000l, 4);
+        mutations.each { m ->
+            bw.addMutation(m)
+        }
+        bw.flush()
+        bw.close()
+    }
+
+}

[15/51] [partial] incubator-rya git commit: now including the actual project

Reply via email to