RYA-253 Converted the rya.prospector class from Groovy to Java. Also added documentation to the project. Closes #151
Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/bd9b9124 Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/bd9b9124 Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/bd9b9124 Branch: refs/heads/master Commit: bd9b91241da484fab81969c2fff3b6bb12974843 Parents: e2a979d Author: Kevin Chilton <kevin.chil...@parsons.com> Authored: Wed Mar 1 17:42:08 2017 -0500 Committer: Caleb Meier <caleb.me...@parsons.com> Committed: Thu Apr 13 06:31:01 2017 -0700 ---------------------------------------------------------------------- .../apache/rya/api/persist/RdfEvalStatsDAO.java | 8 +- .../accumulo/entity/EntityOptimizer.java | 25 +- .../accumulo/entity/EntityTupleSet.java | 64 ++--- extras/rya.prospector/pom.xml | 69 ----- .../rya/prospector/domain/IndexEntry.groovy | 76 ------ .../domain/IntermediateProspect.groovy | 70 ----- .../rya/prospector/domain/TripleValueType.java | 26 -- .../apache/rya/prospector/mr/Prospector.groovy | 108 -------- .../rya/prospector/mr/ProspectorCombiner.groovy | 61 ----- .../rya/prospector/mr/ProspectorMapper.groovy | 75 ------ .../rya/prospector/mr/ProspectorReducer.groovy | 57 ---- .../rya/prospector/plans/IndexWorkPlan.groovy | 51 ---- .../plans/IndexWorkPlanManager.groovy | 29 -- .../rya/prospector/plans/impl/CountPlan.groovy | 220 ---------------- .../ServicesBackedIndexWorkPlanManager.groovy | 38 --- .../prospector/service/ProspectorService.groovy | 126 --------- .../ProspectorServiceEvalStatsDAO.groovy | 122 --------- .../rya/prospector/utils/CustomEntry.groovy | 52 ---- .../prospector/utils/ProspectorConstants.groovy | 41 --- .../rya/prospector/utils/ProspectorUtils.groovy | 138 ---------- .../rya/prospector/domain/IndexEntry.java | 241 +++++++++++++++++ .../prospector/domain/IntermediateProspect.java | 213 +++++++++++++++ .../rya/prospector/domain/TripleValueType.java | 101 +++++++ .../apache/rya/prospector/mr/Prospector.java | 113 ++++++++ .../rya/prospector/mr/ProspectorCombiner.java | 61 +++++ .../rya/prospector/mr/ProspectorMapper.java | 83 ++++++ .../rya/prospector/mr/ProspectorReducer.java | 65 +++++ .../rya/prospector/plans/IndexWorkPlan.java | 115 ++++++++ .../prospector/plans/IndexWorkPlanManager.java | 36 +++ .../rya/prospector/plans/impl/CountPlan.java | 262 +++++++++++++++++++ .../ServicesBackedIndexWorkPlanManager.java | 49 ++++ .../prospector/service/ProspectorService.java | 162 ++++++++++++ .../service/ProspectorServiceEvalStatsDAO.java | 143 ++++++++++ .../rya/prospector/utils/CustomEntry.java | 58 ++++ .../prospector/utils/ProspectorConstants.java | 52 ++++ .../rya/prospector/utils/ProspectorUtils.java | 147 +++++++++++ .../rya/prospector/mr/ProspectorTest.groovy | 178 ------------- .../ProspectorServiceEvalStatsDAOTest.groovy | 182 ------------- .../rya/prospector/mr/ProspectorTest.java | 248 ++++++++++++++++++ .../ProspectorServiceEvalStatsDAOTest.java | 181 +++++++++++++ pom.xml | 55 ---- .../QueryJoinSelectOptimizerTest.java | 4 + 42 files changed, 2383 insertions(+), 1822 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java b/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java index b1d46c3..0b63d58 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/persist/RdfEvalStatsDAO.java @@ -8,9 +8,9 @@ package org.apache.rya.api.persist; * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -24,7 +24,6 @@ package org.apache.rya.api.persist; import java.util.List; import org.apache.rya.api.RdfCloudTripleStoreConfiguration; - import org.openrdf.model.Resource; import org.openrdf.model.Value; @@ -44,9 +43,10 @@ public interface RdfEvalStatsDAO<C extends RdfCloudTripleStoreConfiguration> { public void destroy() throws RdfDAOException; + // XXX returns -1 if no cardinality could be found. public double getCardinality(C conf, CARDINALITY_OF card, List<Value> val) throws RdfDAOException; public double getCardinality(C conf, CARDINALITY_OF card, List<Value> val, Resource context) throws RdfDAOException; - + public void setConf(C conf); public C getConf(); http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java index f3b7183..244493a 100644 --- a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityOptimizer.java @@ -25,6 +25,11 @@ import java.util.Collection; import java.util.List; import java.util.Set; +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; import org.apache.rya.accumulo.AccumuloRdfConfiguration; import org.apache.rya.api.RdfCloudTripleStoreConfiguration; import org.apache.rya.api.persist.joinselect.SelectivityEvalDAO; @@ -33,11 +38,6 @@ import org.apache.rya.joinselect.AccumuloSelectivityEvalDAO; import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO; import org.apache.rya.rdftriplestore.inference.DoNotExpandSP; import org.apache.rya.rdftriplestore.utils.FixedStatementPattern; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; import org.openrdf.query.BindingSet; import org.openrdf.query.Dataset; import org.openrdf.query.algebra.Filter; @@ -47,12 +47,15 @@ import org.openrdf.query.algebra.StatementPattern; import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.evaluation.QueryOptimizer; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; public class EntityOptimizer implements QueryOptimizer, Configurable { + private static final Logger LOG = LoggerFactory.getLogger(EntityTupleSet.class); private SelectivityEvalDAO<RdfCloudTripleStoreConfiguration> eval; private RdfCloudTripleStoreConfiguration conf; @@ -69,10 +72,8 @@ public class EntityOptimizer implements QueryOptimizer, Configurable { eval = new AccumuloSelectivityEvalDAO(conf, ConfigUtils.getConnector(conf)); ((AccumuloSelectivityEvalDAO)eval).setRdfEvalDAO(new ProspectorServiceEvalStatsDAO(ConfigUtils.getConnector(conf), conf)); eval.init(); - } catch (AccumuloException e) { - e.printStackTrace(); - } catch (AccumuloSecurityException e) { - e.printStackTrace(); + } catch (final AccumuloException | AccumuloSecurityException | TableExistsException e) { + LOG.warn("A problem was encountered while constructing the EntityOptimizer.", e); } isEvalDaoSet = true; @@ -103,10 +104,8 @@ public class EntityOptimizer implements QueryOptimizer, Configurable { eval = new AccumuloSelectivityEvalDAO(this.conf, ConfigUtils.getConnector(this.conf)); ((AccumuloSelectivityEvalDAO)eval).setRdfEvalDAO(new ProspectorServiceEvalStatsDAO(ConfigUtils.getConnector(this.conf), this.conf)); eval.init(); - } catch (AccumuloException e) { - e.printStackTrace(); - } catch (AccumuloSecurityException e) { - e.printStackTrace(); + } catch (final AccumuloException | AccumuloSecurityException | TableExistsException e) { + LOG.warn("A problem was encountered while setting the Configuration for the EntityOptimizer.", e); } isEvalDaoSet = true; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java index d829a29..42b7bb0 100644 --- a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/entity/EntityTupleSet.java @@ -1,5 +1,3 @@ -package org.apache.rya.indexing.accumulo.entity; - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -18,15 +16,17 @@ package org.apache.rya.indexing.accumulo.entity; * specific language governing permissions and limitations * under the License. */ - - -import info.aduna.iteration.CloseableIteration; +package org.apache.rya.indexing.accumulo.entity; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Set; +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.commons.io.IOUtils; import org.apache.rya.accumulo.AccumuloRdfConfiguration; import org.apache.rya.accumulo.AccumuloRyaDAO; import org.apache.rya.api.RdfCloudTripleStoreConfiguration; @@ -37,11 +37,6 @@ import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO; import org.apache.rya.rdftriplestore.RdfCloudTripleStore; import org.apache.rya.rdftriplestore.RdfCloudTripleStoreConnection; import org.apache.rya.rdftriplestore.evaluation.ExternalBatchingIterator; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.accumulo.core.client.Connector; -import org.apache.commons.io.IOUtils; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.StatementPattern; @@ -49,12 +44,16 @@ import org.openrdf.query.algebra.Var; import org.openrdf.query.algebra.evaluation.QueryBindingSet; import org.openrdf.query.algebra.evaluation.impl.ExternalSet; import org.openrdf.sail.SailException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.beust.jcommander.internal.Sets; import com.google.common.base.Joiner; -public class EntityTupleSet extends ExternalSet implements ExternalBatchingIterator { +import info.aduna.iteration.CloseableIteration; +public class EntityTupleSet extends ExternalSet implements ExternalBatchingIterator { + private static final Logger LOG = LoggerFactory.getLogger(EntityTupleSet.class); private StarQuery starQuery; private RdfCloudTripleStoreConfiguration conf; @@ -97,26 +96,29 @@ public class EntityTupleSet extends ExternalSet implements ExternalBatchingItera } catch (AccumuloSecurityException e) { e.printStackTrace(); } - if (conf.isUseStats() && conf.isUseSelectivity()) { - ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(accCon, conf); - evalDao.init(); - AccumuloSelectivityEvalDAO ase = new AccumuloSelectivityEvalDAO(conf, accCon); - ase.setRdfEvalDAO(evalDao); - ase.init(); - - cardinality = starQuery.getCardinality(ase); - CardinalityStatementPattern csp = starQuery.getMinCardSp(ase); - - minCard = csp.getCardinality(); - minSp = csp.getSp(); - } else { - // TODO come up with a better default if cardinality is not - // initialized - cardinality = minCard = 1; - minSp = starQuery.getNodes().get(0); + try { + if (conf.isUseStats() && conf.isUseSelectivity()) { + ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(accCon, conf); + evalDao.init(); + AccumuloSelectivityEvalDAO ase = new AccumuloSelectivityEvalDAO(conf, accCon); + ase.setRdfEvalDAO(evalDao); + ase.init(); + + cardinality = starQuery.getCardinality(ase); + CardinalityStatementPattern csp = starQuery.getMinCardSp(ase); + + minCard = csp.getCardinality(); + minSp = csp.getSp(); + } else { + // TODO come up with a better default if cardinality is not + // initialized + cardinality = minCard = 1; + minSp = starQuery.getNodes().get(0); + } + } catch(final Exception e) { + LOG.warn("A problem was encountered while initializing the EntityTupleSet.", e); } - } @Override @@ -224,7 +226,7 @@ public class EntityTupleSet extends ExternalSet implements ExternalBatchingItera @Override - public CloseableIteration<BindingSet,QueryEvaluationException> evaluate(final Collection<BindingSet> bindingset) throws QueryEvaluationException { + public CloseableIteration<BindingSet,QueryEvaluationException> evaluate(Collection<BindingSet> bindingset) throws QueryEvaluationException { if(bindingset.size() < 2 && !this.evalOptUsed) { BindingSet bs = new QueryBindingSet(); @@ -248,7 +250,7 @@ public class EntityTupleSet extends ExternalSet implements ExternalBatchingItera private RdfCloudTripleStoreConnection getRyaSailConnection() throws AccumuloException, AccumuloSecurityException, SailException { - final RdfCloudTripleStore store = new RdfCloudTripleStore(); + RdfCloudTripleStore store = new RdfCloudTripleStore(); AccumuloRyaDAO crdfdao = new AccumuloRyaDAO(); crdfdao.setConnector(accCon); AccumuloRdfConfiguration acc = new AccumuloRdfConfiguration(conf); http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/pom.xml ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/pom.xml b/extras/rya.prospector/pom.xml index 952ab94..35a9f67 100644 --- a/extras/rya.prospector/pom.xml +++ b/extras/rya.prospector/pom.xml @@ -48,10 +48,6 @@ under the License. <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> </dependency> - <dependency> - <groupId>org.codehaus.groovy</groupId> - <artifactId>groovy-all</artifactId> - </dependency> <dependency> <groupId>org.apache.mrunit</groupId> @@ -75,75 +71,10 @@ under the License. </excludes> </configuration> </plugin> - <!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.--> - <plugin> - <groupId>org.eclipse.m2e</groupId> - <artifactId>lifecycle-mapping</artifactId> - <version>1.0.0</version> - <configuration> - <lifecycleMappingMetadata> - <pluginExecutions> - <pluginExecution> - <pluginExecutionFilter> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <versionRange>[3.2,)</versionRange> - <goals> - <goal>compile</goal> - <goal>testCompile</goal> - </goals> - </pluginExecutionFilter> - <action> - <ignore></ignore> - </action> - </pluginExecution> - <pluginExecution> - <pluginExecutionFilter> - <groupId>org.codehaus.groovy</groupId> - <artifactId>groovy-eclipse-compiler</artifactId> - <versionRange>[2.9.1-01,)</versionRange> - <goals> - <goal>add-groovy-build-paths</goal> - </goals> - </pluginExecutionFilter> - <action> - <ignore></ignore> - </action> - </pluginExecution> - </pluginExecutions> - </lifecycleMappingMetadata> - </configuration> - </plugin> </plugins> </pluginManagement> <plugins> <plugin> - <artifactId>maven-compiler-plugin</artifactId> - <configuration> - <compilerId>groovy-eclipse-compiler</compilerId> - </configuration> - <dependencies> - <dependency> - <groupId>org.codehaus.groovy</groupId> - <artifactId>groovy-eclipse-compiler</artifactId> - <version>2.9.1-01</version> - </dependency> - <!-- for 2.8.0-01 and later you must have an explicit - dependency on groovy-eclipse-batch --> - <dependency> - <groupId>org.codehaus.groovy</groupId> - <artifactId>groovy-eclipse-batch</artifactId> - <version>2.3.7-01</version> - </dependency> - </dependencies> - </plugin> - <plugin> - <groupId>org.codehaus.groovy</groupId> - <artifactId>groovy-eclipse-compiler</artifactId> - <version>2.9.1-01</version> - <extensions>true</extensions> - </plugin> - <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <executions> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy deleted file mode 100644 index 8b0b670..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IndexEntry.groovy +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.domain - -/** - * Date: 12/5/12 - * Time: 11:33 AM - */ -class IndexEntry { - def String index - def String data - def String dataType - def String tripleValueType - def String visibility - def Long count - def Long timestamp - - @Override - public String toString() { - return "IndexEntry{" + - "index='" + index + '\'' + - ", data='" + data + '\'' + - ", dataType='" + dataType + '\'' + - ", tripleValueType=" + tripleValueType + - ", visibility='" + visibility + '\'' + - ", timestamp='" + timestamp + '\'' + - ", count=" + count + - '}'; - } - - boolean equals(o) { - if (this.is(o)) return true - if (getClass() != o.class) return false - - IndexEntry that = (IndexEntry) o - - if (count != that.count) return false - if (timestamp != that.timestamp) return false - if (data != that.data) return false - if (dataType != that.dataType) return false - if (index != that.index) return false - if (tripleValueType != that.tripleValueType) return false - if (visibility != that.visibility) return false - - return true - } - - int hashCode() { - int result - result = (index != null ? index.hashCode() : 0) - result = 31 * result + (data != null ? data.hashCode() : 0) - result = 31 * result + (dataType != null ? dataType.hashCode() : 0) - result = 31 * result + (tripleValueType != null ? tripleValueType.hashCode() : 0) - result = 31 * result + (visibility != null ? visibility.hashCode() : 0) - result = 31 * result + (count != null ? count.hashCode() : 0) - result = 31 * result + (timestamp != null ? timestamp.hashCode() : 0) - return result - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy deleted file mode 100644 index c5e34c0..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/IntermediateProspect.groovy +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.domain - -import org.apache.hadoop.io.WritableComparable - -import static org.apache.rya.prospector.domain.TripleValueType.* - -/** - * Date: 12/3/12 - * Time: 11:15 AM - */ -class IntermediateProspect implements WritableComparable<IntermediateProspect> { - - def String index - def String data - def String dataType - def TripleValueType tripleValueType - def String visibility - - @Override - int compareTo(IntermediateProspect t) { - if(!index.equals(t.index)) - return index.compareTo(t.index); - if(!data.equals(t.data)) - return data.compareTo(t.data); - if(!dataType.equals(t.dataType)) - return dataType.compareTo(t.dataType); - if(!tripleValueType.equals(t.tripleValueType)) - return tripleValueType.compareTo(t.tripleValueType); - if(!visibility.equals(t.visibility)) - return visibility.compareTo(t.visibility); - return 0 - } - - @Override - void write(DataOutput dataOutput) { - dataOutput.writeUTF(index); - dataOutput.writeUTF(data); - dataOutput.writeUTF(dataType); - dataOutput.writeUTF(tripleValueType.name()); - dataOutput.writeUTF(visibility); - } - - @Override - void readFields(DataInput dataInput) { - index = dataInput.readUTF() - data = dataInput.readUTF() - dataType = dataInput.readUTF() - tripleValueType = TripleValueType.valueOf(dataInput.readUTF()) - visibility = dataInput.readUTF() - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java deleted file mode 100644 index 0c53076..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/domain/TripleValueType.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.apache.rya.prospector.domain; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -public enum TripleValueType { - - subject, predicate, object, entity, subjectpredicate, predicateobject, subjectobject -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy deleted file mode 100644 index c51ecef..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/Prospector.groovy +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.prospector.utils.ProspectorUtils -import org.apache.accumulo.core.data.Mutation -import org.apache.accumulo.core.data.Value -import org.apache.accumulo.core.security.ColumnVisibility -import org.apache.hadoop.conf.Configured -import org.apache.hadoop.util.Tool -import org.apache.hadoop.util.ToolRunner -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.hadoop.mapreduce.Job - -import org.apache.hadoop.io.LongWritable -import org.apache.commons.lang.time.DateUtils - -import org.apache.rya.prospector.domain.IntermediateProspect - -import com.google.common.collect.Lists - -import static org.apache.rya.prospector.utils.ProspectorConstants.* -import static org.apache.rya.prospector.utils.ProspectorUtils.* - -/** - * Date: 12/3/12 - * Time: 10:57 AM - */ -class Prospector extends Configured implements Tool { - - private static long NOW = System.currentTimeMillis(); - - private Date truncatedDate; - - public static void main(String[] args) { - int res = ToolRunner.run(new Prospector(), args); - System.exit(res); - } - - @Override - int run(String[] args) { - Configuration conf = getConf(); - - truncatedDate = DateUtils.truncate(new Date(NOW), Calendar.MINUTE); - - Path configurationPath = new Path(args[0]); - conf.addResource(configurationPath); - - def inTable = conf.get("prospector.intable") - def outTable = conf.get("prospector.outtable") - def auths_str = conf.get("prospector.auths") - assert inTable != null - assert outTable != null - assert auths_str != null - - Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); - job.setJarByClass(this.getClass()); - - String[] auths = auths_str.split(",") - ProspectorUtils.initMRJob(job, inTable, outTable, auths) - - job.getConfiguration().setLong("DATE", NOW); - - def performant = conf.get(PERFORMANT) - if (Boolean.parseBoolean(performant)) { - /** - * Apply some performance tuning - */ - ProspectorUtils.addMRPerformance(job.configuration) - } - - job.setMapOutputKeyClass(IntermediateProspect.class); - job.setMapOutputValueClass(LongWritable.class); - - job.setMapperClass(ProspectorMapper.class); - job.setCombinerClass(ProspectorCombiner.class); - job.setReducerClass(ProspectorReducer.class); - job.waitForCompletion(true); - - int success = job.isSuccessful() ? 0 : 1; - - if (success == 0) { - Mutation m = new Mutation(METADATA) - m.put(PROSPECT_TIME, getReverseIndexDateTime(truncatedDate), new ColumnVisibility(DEFAULT_VIS), truncatedDate.time, new Value(EMPTY)) - writeMutations(connector(instance(conf), conf), outTable, [m]) - } - - return success - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy deleted file mode 100644 index 784ffd2..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorCombiner.groovy +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager -import org.apache.commons.lang.time.DateUtils -import org.apache.hadoop.mapreduce.Reducer -import org.apache.rya.prospector.utils.ProspectorUtils - -/** - * Date: 12/3/12 - * Time: 11:06 AM - */ -class ProspectorCombiner extends Reducer { - - private Date truncatedDate; - private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - Map<String, IndexWorkPlan> plans - - @Override - public void setup(Reducer.Context context) throws IOException, InterruptedException { - super.setup(context); - - long now = context.getConfiguration().getLong("DATE", System.currentTimeMillis()); - truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE); - - this.plans = ProspectorUtils.planMap(manager.plans) - } - - @Override - protected void reduce(def prospect, Iterable values, Reducer.Context context) { - def plan = plans.get(prospect.index) - if (plan != null) { - def coll = plan.combine(prospect, values) - if (coll != null) { - coll.each { entry -> - context.write(entry.key, entry.value) - } - } - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy deleted file mode 100644 index 36eab60..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorMapper.groovy +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.accumulo.AccumuloRdfConfiguration -import org.apache.rya.api.RdfCloudTripleStoreConstants -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.api.resolver.RyaTripleContext -import org.apache.rya.api.resolver.triple.TripleRow -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager - -import org.apache.commons.lang.time.DateUtils -import org.apache.hadoop.mapreduce.Mapper - -/** - * Date: 12/3/12 - * Time: 11:06 AM - */ -class ProspectorMapper extends Mapper { - - private Date truncatedDate; - private RyaTripleContext ryaContext; - private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - private Collection<IndexWorkPlan> plans = manager.plans - - @Override - public void setup(Mapper.Context context) throws IOException, InterruptedException { - super.setup(context); - - long now = context.getConfiguration().getLong("DATE", System.currentTimeMillis()); - ryaContext = RyaTripleContext.getInstance(new AccumuloRdfConfiguration(context.getConfiguration())); - truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE); - } - - @Override - public void map(def row, def data, Mapper.Context context) { - RyaStatement ryaStatement = ryaContext.deserializeTriple(RdfCloudTripleStoreConstants.TABLE_LAYOUT.SPO, - new TripleRow( - row.row.bytes, - row.columnFamily.bytes, - row.columnQualifier.bytes, - row.timestamp, - row.columnVisibility.bytes, - data.get() - ) - ) - plans.each { plan -> - def coll = plan.map(ryaStatement) - if (coll != null) { - coll.each { entry -> - context.write(entry.key, entry.value) - } - } - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy deleted file mode 100644 index 1f4352b..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/mr/ProspectorReducer.groovy +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.mr - -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager -import org.apache.commons.lang.time.DateUtils -import org.apache.hadoop.mapreduce.Reducer -import org.apache.rya.prospector.utils.ProspectorUtils - -/** - * Date: 12/3/12 - * Time: 11:06 AM - */ -class ProspectorReducer extends Reducer { - - private Date truncatedDate; - private IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - Map<String, IndexWorkPlan> plans - - @Override - public void setup(Reducer.Context context) throws IOException, InterruptedException { - super.setup(context); - - def conf = context.getConfiguration() - long now = conf.getLong("DATE", System.currentTimeMillis()); - truncatedDate = DateUtils.truncate(new Date(now), Calendar.MINUTE); - - this.plans = ProspectorUtils.planMap(manager.plans) - } - - @Override - protected void reduce(def prospect, Iterable values, Reducer.Context context) { - def plan = plans.get(prospect.index) - if (plan != null) { - plan.reduce(prospect, values, truncatedDate, context) - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy deleted file mode 100644 index 80316ea..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlan.groovy +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.plans - -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.prospector.domain.IntermediateProspect -import org.apache.hadoop.io.LongWritable -import org.apache.hadoop.mapreduce.Reducer -import org.openrdf.model.vocabulary.XMLSchema -import org.apache.rya.prospector.domain.IndexEntry - -/** - * Date: 12/3/12 - * Time: 11:12 AM - */ -public interface IndexWorkPlan { - - public static final String URITYPE = XMLSchema.ANYURI.stringValue() - public static final LongWritable ONE = new LongWritable(1) - public static final String DELIM = "\u0000"; - - public Collection<Map.Entry<IntermediateProspect, LongWritable>> map(RyaStatement ryaStatement) - - public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(IntermediateProspect prospect, Iterable<LongWritable> counts); - - public void reduce(IntermediateProspect prospect, Iterable<LongWritable> counts, Date timestamp, Reducer.Context context) - - public String getIndexType() - - public String getCompositeValue(List<String> indices) - - public List<IndexEntry> query(def connector, String tableName, List<Long> prospectTimes, String type, String index, String dataType, String[] auths) - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy deleted file mode 100644 index f1029dc..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/IndexWorkPlanManager.groovy +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.plans - -/** - * Date: 12/3/12 - * Time: 11:24 AM - */ -public interface IndexWorkPlanManager { - - public Collection<IndexWorkPlan> getPlans(); -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy deleted file mode 100644 index 51527a5..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/CountPlan.groovy +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.plans.impl - -import org.apache.rya.api.domain.RyaStatement -import org.apache.rya.prospector.domain.IndexEntry -import org.apache.rya.prospector.domain.IntermediateProspect -import org.apache.rya.prospector.domain.TripleValueType -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.utils.CustomEntry -import org.apache.rya.prospector.utils.ProspectorUtils - -import org.apache.accumulo.core.data.Mutation -import org.apache.accumulo.core.data.Range -import org.apache.accumulo.core.data.Value -import org.apache.accumulo.core.security.Authorizations -import org.apache.accumulo.core.security.ColumnVisibility -import org.apache.hadoop.io.LongWritable -import org.apache.hadoop.io.Text -import org.apache.hadoop.mapreduce.Reducer -import org.openrdf.model.util.URIUtil -import org.openrdf.model.vocabulary.XMLSchema; - -import static org.apache.rya.prospector.utils.ProspectorConstants.COUNT; -import org.apache.rya.api.RdfCloudTripleStoreConstants - -/** - * Date: 12/3/12 - * Time: 12:28 PM - */ -class CountPlan implements IndexWorkPlan { - - @Override - Collection<Map.Entry<IntermediateProspect, LongWritable>> map(RyaStatement ryaStatement) { - def subject = ryaStatement.getSubject() - def predicate = ryaStatement.getPredicate() - def subjpred = ryaStatement.getSubject().data + DELIM + ryaStatement.getPredicate().data - def predobj = ryaStatement.getPredicate().data + DELIM + ryaStatement.getObject().data - def subjobj = ryaStatement.getSubject().data + DELIM + ryaStatement.getObject().data - def object = ryaStatement.getObject() - def localIndex = URIUtil.getLocalNameIndex(subject.data) - def namespace = subject.data.substring(0, localIndex - 1) - def visibility = new String(ryaStatement.columnVisibility) - return [ - new CustomEntry<IntermediateProspect, LongWritable>( - new IntermediateProspect(index: COUNT, - data: subject.data, - dataType: URITYPE, - tripleValueType: TripleValueType.subject, - visibility: visibility), - ONE), - new CustomEntry<IntermediateProspect, LongWritable>( - new IntermediateProspect(index: COUNT, - data: predicate.data, - dataType: URITYPE, - tripleValueType: TripleValueType.predicate, - visibility: visibility - ), ONE), - new CustomEntry<IntermediateProspect, LongWritable>( - new IntermediateProspect(index: COUNT, - data: object.data, - dataType: object.dataType.stringValue(), - tripleValueType: TripleValueType.object, - visibility: visibility - ), ONE), - new CustomEntry<IntermediateProspect, LongWritable>( - new IntermediateProspect(index: COUNT, - data: subjpred, - dataType: XMLSchema.STRING, - tripleValueType: TripleValueType.subjectpredicate, - visibility: visibility - ), ONE), - new CustomEntry<IntermediateProspect, LongWritable>( - new IntermediateProspect(index: COUNT, - data: subjobj, - dataType: XMLSchema.STRING, - tripleValueType: TripleValueType.subjectobject, - visibility: visibility - ), ONE), - new CustomEntry<IntermediateProspect, LongWritable>( - new IntermediateProspect(index: COUNT, - data: predobj, - dataType: XMLSchema.STRING, - tripleValueType: TripleValueType.predicateobject, - visibility: visibility - ), ONE), - new CustomEntry<IntermediateProspect, LongWritable>( - new IntermediateProspect(index: COUNT, - data: namespace, - dataType: URITYPE, - tripleValueType: TripleValueType.entity, - visibility: visibility - ), ONE), - ] - } - - @Override - Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(IntermediateProspect prospect, Iterable<LongWritable> counts) { - - def iter = counts.iterator() - long sum = 0; - iter.each { lw -> - sum += lw.get() - } - - return [new CustomEntry<IntermediateProspect, LongWritable>(prospect, new LongWritable(sum))] - } - - @Override - void reduce(IntermediateProspect prospect, Iterable<LongWritable> counts, Date timestamp, Reducer.Context context) { - def iter = counts.iterator() - long sum = 0; - iter.each { lw -> - sum += lw.get() - } - - def indexType = prospect.tripleValueType.name() - - // not sure if this is the best idea.. - if ((sum >= 0) || - indexType.equals(TripleValueType.predicate.toString())) { - - Mutation m = new Mutation(indexType + DELIM + prospect.data + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp)) - m.put(COUNT, prospect.dataType, new ColumnVisibility(prospect.visibility), timestamp.getTime(), new Value("${sum}".getBytes())); - - context.write(null, m); - } - } - - @Override - String getIndexType() { - return COUNT - } - - @Override - String getCompositeValue(List<String> indices){ - Iterator<String> indexIt = indices.iterator(); - String compositeIndex = indexIt.next(); - while (indexIt.hasNext()){ - String value = indexIt.next(); - compositeIndex += DELIM + value; - } - return compositeIndex; - } - - @Override - List<IndexEntry> query(def connector, String tableName, List<Long> prospectTimes, String type, String compositeIndex, String dataType, String[] auths) { - - assert connector != null && tableName != null && type != null && compositeIndex != null - - def bs = connector.createBatchScanner(tableName, new Authorizations(auths), 4) - def ranges = [] - int max = 1000; //by default only return 1000 prospects maximum - if (prospectTimes != null) { - prospectTimes.each { prospect -> - ranges.add( - new Range(type + DELIM + compositeIndex + DELIM + ProspectorUtils.getReverseIndexDateTime(new Date(prospect)))) - } - } else { - max = 1; //only return the latest if no prospectTimes given - def prefix = type + DELIM + compositeIndex + DELIM; - ranges.add(new Range(prefix, prefix + RdfCloudTripleStoreConstants.LAST)) - } - bs.ranges = ranges - if (dataType != null) { - bs.fetchColumn(new Text(COUNT), new Text(dataType)) - } else { - bs.fetchColumnFamily(new Text(COUNT)) - } - - List<IndexEntry> indexEntries = new ArrayList<IndexEntry>() - def iter = bs.iterator() - - while (iter.hasNext() && indexEntries.size() <= max) { - def entry = iter.next() - def k = entry.key - def v = entry.value - - def rowArr = k.row.toString().split(DELIM) - String values = ""; - // if it is a composite index, then return the type as a composite index - if (type.equalsIgnoreCase(TripleValueType.subjectpredicate.toString()) || - type.equalsIgnoreCase(TripleValueType.subjectobject.toString()) || - type.equalsIgnoreCase(TripleValueType.predicateobject.toString())){ - values =rowArr[1] + DELIM + rowArr[2] - } - else values = rowArr[1] - - indexEntries.add(new IndexEntry(data: values, - tripleValueType: rowArr[0], - index: COUNT, - dataType: k.columnQualifier.toString(), - visibility: k.columnVisibility.toString(), - count: Long.parseLong(new String(v.get())), - timestamp: k.timestamp - )) - } - bs.close() - - return indexEntries - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy deleted file mode 100644 index 07c81af..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/plans/impl/ServicesBackedIndexWorkPlanManager.groovy +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.plans.impl - -import org.apache.rya.prospector.plans.IndexWorkPlan -import com.google.common.collect.Lists -import org.apache.rya.prospector.plans.IndexWorkPlanManager - -/** - * Date: 12/3/12 - * Time: 11:24 AM - */ -class ServicesBackedIndexWorkPlanManager implements IndexWorkPlanManager { - - def Collection<IndexWorkPlan> plans - - ServicesBackedIndexWorkPlanManager() { - def iterator = ServiceLoader.load(IndexWorkPlan.class).iterator(); - plans = Lists.newArrayList(iterator) - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy deleted file mode 100644 index d72e0e0..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorService.groovy +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.service - -import org.apache.rya.prospector.utils.ProspectorUtils -import org.apache.accumulo.core.data.Key -import org.apache.accumulo.core.data.Range -import org.apache.accumulo.core.security.Authorizations -import org.apache.hadoop.io.Text - -import static org.apache.rya.prospector.utils.ProspectorConstants.METADATA -import static org.apache.rya.prospector.utils.ProspectorConstants.PROSPECT_TIME -import org.apache.rya.prospector.plans.IndexWorkPlanManager -import org.apache.rya.prospector.plans.impl.ServicesBackedIndexWorkPlanManager -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.rya.prospector.domain.IndexEntry - -/** - * Date: 12/5/12 - * Time: 12:28 PM - */ -class ProspectorService { - - def connector - String tableName - - IndexWorkPlanManager manager = new ServicesBackedIndexWorkPlanManager() - Map<String, IndexWorkPlan> plans - - ProspectorService(def connector, String tableName) { - this.connector = connector - this.tableName = tableName - this.plans = ProspectorUtils.planMap(manager.plans) - - //init - def tos = connector.tableOperations() - if(!tos.exists(tableName)) { - tos.create(tableName) - } - } - - public Iterator<Long> getProspects(String[] auths) { - - def scanner = connector.createScanner(tableName, new Authorizations(auths)) - scanner.setRange(Range.exact(METADATA)); - scanner.fetchColumnFamily(new Text(PROSPECT_TIME)); - - def iterator = scanner.iterator(); - - return new Iterator<Long>() { - - - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public Long next() { - return iterator.next().getKey().getTimestamp(); - } - - @Override - public void remove() { - iterator.remove(); - } - }; - - } - - public Iterator<Long> getProspectsInRange(long beginTime, long endTime, String[] auths) { - - def scanner = connector.createScanner(tableName, new Authorizations(auths)) - scanner.setRange(new Range( - new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(endTime)), "", Long.MAX_VALUE), - new Key(METADATA, PROSPECT_TIME, ProspectorUtils.getReverseIndexDateTime(new Date(beginTime)), "", 0l) - )) - def iterator = scanner.iterator(); - - return new Iterator<Long>() { - - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public Long next() { - return iterator.next().getKey().getTimestamp(); - } - - @Override - public void remove() { - iterator.remove(); - } - }; - - } - - public List<IndexEntry> query(List<Long> prospectTimes, String indexType, String type, List<String> index, String dataType, String[] auths) { - assert indexType != null - - def plan = plans.get(indexType) - assert plan != null: "Index Type: ${indexType} does not exist" - String compositeIndex = plan.getCompositeValue(index); - - return plan.query(connector, tableName, prospectTimes, type, compositeIndex, dataType, auths) - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy deleted file mode 100644 index 2c2b153..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/service/ProspectorServiceEvalStatsDAO.groovy +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.service - -import org.apache.rya.api.RdfCloudTripleStoreConfiguration -import org.apache.rya.api.persist.RdfEvalStatsDAO -import org.apache.rya.prospector.domain.TripleValueType -import org.apache.rya.prospector.utils.ProspectorConstants -import org.apache.hadoop.conf.Configuration -import org.openrdf.model.Resource -import org.openrdf.model.Value - -import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF - -/** - * An ${@link org.apache.rya.api.persist.RdfEvalStatsDAO} that uses the Prospector Service underneath return counts. - */ -class ProspectorServiceEvalStatsDAO implements RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> { - - def ProspectorService prospectorService - - ProspectorServiceEvalStatsDAO() { - } - - ProspectorServiceEvalStatsDAO(ProspectorService prospectorService, RdfCloudTripleStoreConfiguration conf) { - this.prospectorService = prospectorService - } - - public ProspectorServiceEvalStatsDAO(def connector, RdfCloudTripleStoreConfiguration conf) { - this.prospectorService = new ProspectorService(connector, getProspectTableName(conf)) - } - - @Override - void init() { - assert prospectorService != null - } - - @Override - boolean isInitialized() { - return prospectorService != null - } - - @Override - void destroy() { - - } - - @Override - public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List<Value> val) { - - assert conf != null && card != null && val != null - String triplePart = null; - switch (card) { - case (CARDINALITY_OF.SUBJECT): - triplePart = TripleValueType.subject - break; - case (CARDINALITY_OF.PREDICATE): - triplePart = TripleValueType.predicate - break; - case (CARDINALITY_OF.OBJECT): - triplePart = TripleValueType.object - break; - case (CARDINALITY_OF.SUBJECTPREDICATE): - triplePart = TripleValueType.subjectpredicate - break; - case (CARDINALITY_OF.SUBJECTOBJECT): - triplePart = TripleValueType.subjectobject - break; - case (CARDINALITY_OF.PREDICATEOBJECT): - triplePart = TripleValueType.predicateobject - break; - } - - String[] auths = conf.getAuths() - List<String> indexedValues = new ArrayList<String>(); - Iterator<Value> valueIt = val.iterator(); - while (valueIt.hasNext()){ - indexedValues.add(valueIt.next().stringValue()); - } - - def indexEntries = prospectorService.query(null, ProspectorConstants.COUNT, triplePart, indexedValues, null /** what is the datatype here? */, - auths) - - return indexEntries.size() > 0 ? indexEntries.head().count : -1 - } - - @Override - double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List<Value> val, Resource context) { - return getCardinality(conf, card, val) //TODO: Not sure about the context yet - } - - @Override - public void setConf(RdfCloudTripleStoreConfiguration conf) { - - } - - @Override - RdfCloudTripleStoreConfiguration getConf() { - return null - } - - public static String getProspectTableName(RdfCloudTripleStoreConfiguration conf) { - return conf.getTablePrefix() + "prospects"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy deleted file mode 100644 index 9f23c48..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/CustomEntry.groovy +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.utils - -/** - * Date: 12/3/12 - * Time: 12:33 PM - */ -class CustomEntry<K, V> implements Map.Entry<K, V> { - - K key; - V value; - - CustomEntry(K key, V value) { - this.key = key - this.value = value - } - - K getKey() { - return key - } - - void setKey(K key) { - this.key = key - } - - V getValue() { - return value - } - - V setValue(V value) { - this.value = value - this.value - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy deleted file mode 100644 index 29eac37..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorConstants.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.utils - -/** - * Date: 12/5/12 - * Time: 10:57 AM - */ -class ProspectorConstants { - public static final String COUNT = "count" - public static final String METADATA = "metadata" - public static final String PROSPECT_TIME = "prospectTime" - public static final String DEFAULT_VIS = "U&FOUO" - public static final byte[] EMPTY = new byte [0]; - - //config properties - public static final String PERFORMANT = "performant" - - public static final String USERNAME = "username" - public static final String PASSWORD = "password" - public static final String INSTANCE = "instance" - public static final String ZOOKEEPERS = "zookeepers" - public static final String MOCK = "mock" -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy b/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy deleted file mode 100644 index e4142d9..0000000 --- a/extras/rya.prospector/src/main/groovy/org/apache/rya/prospector/utils/ProspectorUtils.groovy +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.rya.prospector.utils - -import org.apache.accumulo.core.client.Connector -import org.apache.accumulo.core.client.Instance -import org.apache.accumulo.core.client.ZooKeeperInstance -import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat -import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat -import org.apache.accumulo.core.client.mock.MockInstance -import org.apache.accumulo.core.data.Mutation -import org.apache.accumulo.core.security.Authorizations -import org.apache.commons.lang.Validate -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.io.Text -import org.apache.hadoop.io.compress.GzipCodec -import org.apache.hadoop.mapreduce.Job - -import java.text.SimpleDateFormat -import org.apache.rya.prospector.plans.IndexWorkPlan -import org.apache.accumulo.core.client.security.tokens.PasswordToken - -import static org.apache.rya.prospector.utils.ProspectorConstants.* - -/** - * Date: 12/4/12 - * Time: 4:24 PM - */ -class ProspectorUtils { - - public static final long INDEXED_DATE_SORT_VAL = 999999999999999999L; // 18 char long, same length as date format pattern below - public static final String INDEXED_DATE_FORMAT = "yyyyMMddHHmmsssSSS"; - - public static String getReverseIndexDateTime(Date date) { - Validate.notNull(date); - String formattedDateString = new SimpleDateFormat(INDEXED_DATE_FORMAT).format(date); - long diff = INDEXED_DATE_SORT_VAL - Long.valueOf(formattedDateString); - - return Long.toString(diff); - } - - public static Map<String, IndexWorkPlan> planMap(def plans) { - plans.inject([:]) { map, plan -> - map.putAt(plan.indexType, plan) - map - } - } - - public static void initMRJob(Job job, String table, String outtable, String[] auths) { - Configuration conf = job.configuration - String username = conf.get(USERNAME) - String password = conf.get(PASSWORD) - String instance = conf.get(INSTANCE) - String zookeepers = conf.get(ZOOKEEPERS) - String mock = conf.get(MOCK) - - //input - if (Boolean.parseBoolean(mock)) { - AccumuloInputFormat.setMockInstance(job, instance) - AccumuloOutputFormat.setMockInstance(job, instance) - } else if (zookeepers != null) { - AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers) - AccumuloOutputFormat.setZooKeeperInstance(job, instance, zookeepers) - } else { - throw new IllegalArgumentException("Must specify either mock or zookeepers"); - } - - AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes())) - AccumuloInputFormat.setInputTableName(job, table) - job.setInputFormatClass(AccumuloInputFormat.class); - AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths)) - - // OUTPUT - job.setOutputFormatClass(AccumuloOutputFormat.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(Mutation.class); - AccumuloOutputFormat.setConnectorInfo(job, username, new PasswordToken(password.getBytes())) - AccumuloOutputFormat.setDefaultTableName(job, outtable) - } - - public static void addMRPerformance(Configuration conf) { - conf.setBoolean("mapred.map.tasks.speculative.execution", false); - conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); - conf.set("io.sort.mb", "256"); - conf.setBoolean("mapred.compress.map.output", true); - conf.set("mapred.map.output.compression.codec", GzipCodec.class.getName()); - } - - public static Instance instance(Configuration conf) { - assert conf != null - - String instance_str = conf.get(INSTANCE) - String zookeepers = conf.get(ZOOKEEPERS) - String mock = conf.get(MOCK) - if (Boolean.parseBoolean(mock)) { - return new MockInstance(instance_str) - } else if (zookeepers != null) { - return new ZooKeeperInstance(instance_str, zookeepers) - } else { - throw new IllegalArgumentException("Must specify either mock or zookeepers"); - } - } - - public static Connector connector(Instance instance, Configuration conf) { - String username = conf.get(USERNAME) - String password = conf.get(PASSWORD) - if (instance == null) - instance = instance(conf) - return instance.getConnector(username, password) - } - - public static void writeMutations(Connector connector, String tableName, def mutations) { - def bw = connector.createBatchWriter(tableName, 10000l, 10000l, 4); - mutations.each { m -> - bw.addMutation(m) - } - bw.flush() - bw.close() - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/bd9b9124/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java ---------------------------------------------------------------------- diff --git a/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java new file mode 100644 index 0000000..4d4dfc8 --- /dev/null +++ b/extras/rya.prospector/src/main/java/org/apache/rya/prospector/domain/IndexEntry.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.prospector.domain; + +import java.util.Objects; + +import org.apache.rya.prospector.mr.Prospector; +import org.apache.rya.prospector.plans.IndexWorkPlan; + +/** + * Represents a count that was the result of a {@link Prospector} run. + */ +public class IndexEntry { + + private final String index; + private final String data; + private final String dataType; + private final String tripleValueType; + private final String visibility; + private final Long count; + private final Long timestamp; + + /** + * Constructs an instance of {@link IndexEntry}. + * + * @param index - Indicates which {@link IndexWorkPlan} the data came from. + * @param data - The information that is being counted. + * @param dataType - The data type of {@code data}. + * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}. + * @param visibility - The visibility of this entry. + * @param count - The number of times the {@code data} appeared within Rya. + * @param timestamp - Identifies which Prospect run this entry belongs to. + */ + public IndexEntry( + final String index, + final String data, + final String dataType, + final String tripleValueType, + final String visibility, + final Long count, + final Long timestamp) { + this.index = index; + this.data = data; + this.dataType = dataType; + this.tripleValueType = tripleValueType; + this.visibility = visibility; + this.count = count; + this.timestamp = timestamp; + } + + /** + * @return Indicates which {@link IndexWorkPlan} the data came from. + */ + public String getIndex() { + return index; + } + + /** + * @return The information that is being counted. + */ + public String getData() { + return data; + } + + /** + * @return The data type of {@code data}. + */ + public String getDataType() { + return dataType; + } + + /** + * @return Indicates which parts of the RDF Statement are included in {@code data}. + */ + public String getTripleValueType() { + return tripleValueType; + } + + /** + * @return The visibility of this entry. + */ + public String getVisibility() { + return visibility; + } + + /** + * @return The number of times the {@code data} appeared within Rya. + */ + public Long getCount() { + return count; + } + + /** + * @return Identifies which Prospect run this entry belongs to. + */ + public Long getTimestamp() { + return timestamp; + } + + @Override + public String toString() { + return "IndexEntry{" + + "index='" + index + '\'' + + ", data='" + data + '\'' + + ", dataType='" + dataType + '\'' + + ", tripleValueType=" + tripleValueType + + ", visibility='" + visibility + '\'' + + ", timestamp='" + timestamp + '\'' + + ", count=" + count + + '}'; + } + + @Override + public int hashCode() { + return Objects.hash(index, data, dataType, tripleValueType, visibility, count, timestamp); + } + + @Override + public boolean equals(Object o) { + if(this == o) { + return true; + } + if(o instanceof IndexEntry) { + final IndexEntry entry = (IndexEntry) o; + return Objects.equals(index, entry.index) && + Objects.equals(data, entry.data) && + Objects.equals(dataType, entry.dataType) && + Objects.equals(tripleValueType, entry.tripleValueType) && + Objects.equals(visibility, entry.visibility) && + Objects.equals(count, entry.count) && + Objects.equals(timestamp, entry.timestamp); + } + return false; + } + + /** + * @return An empty instance of {@link Builder}. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builds instances of {@link IndexEntry}. + */ + public static final class Builder { + private String index; + private String data; + private String dataType; + private String tripleValueType; + private String visibility; + private Long count; + private Long timestamp; + + /** + * @param index - Indicates which {@link IndexWorkPlan} the data came from. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setIndex(String index) { + this.index = index; + return this; + } + + /** + * @param data - The information that is being counted. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setData(String data) { + this.data = data; + return this; + } + + /** + * @param dataType - The data type of {@code data}. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setDataType(String dataType) { + this.dataType = dataType; + return this; + } + + /** + * @param tripleValueType - Indicates which parts of the RDF Statement are included in {@code data}. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setTripleValueType(String tripleValueType) { + this.tripleValueType = tripleValueType; + return this; + } + + /** + * @param visibility - The visibility of this entry. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setVisibility(String visibility) { + this.visibility = visibility; + return this; + } + + /** + * @param count - The number of times the {@code data} appeared within Rya. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setCount(Long count) { + this.count = count; + return this; + } + + /** + * @param timestamp - Identifies which Prospect run this entry belongs to. + * @return This {@link Builder} so that method invocations may be chained. + */ + public Builder setTimestamp(Long timestamp) { + this.timestamp = timestamp; + return this; + } + + /** + * @return Constructs an instance of {@link IndexEntry} built using this builder's values. + */ + public IndexEntry build() { + return new IndexEntry(index, data, dataType, tripleValueType, visibility, count, timestamp); + } + } +} \ No newline at end of file