http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/VarConstantIndexListPruner.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/VarConstantIndexListPruner.java b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/VarConstantIndexListPruner.java new file mode 100644 index 0000000..577663b --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/VarConstantIndexListPruner.java @@ -0,0 +1,174 @@ +package mvm.rya.indexing.IndexPlanValidator; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import mvm.rya.indexing.external.tupleSet.ExternalTupleSet; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + + + + +public class VarConstantIndexListPruner implements IndexListPruner { + + private Map<String, Integer> queryConstantMap; + private int querySpCount; + private int queryFilterCount; + + public VarConstantIndexListPruner(TupleExpr te) { + + ConstantCollector cc = new ConstantCollector(); + te.visit(cc); + this.queryConstantMap = cc.getConstantMap(); + querySpCount = cc.getSpCount(); + queryFilterCount = cc.getFilterCount(); + } + + @Override + public List<ExternalTupleSet> getRelevantIndices(List<ExternalTupleSet> indexList) { + + List<ExternalTupleSet> relIndexSet = new ArrayList<>(); + + for (ExternalTupleSet e : indexList) { + + if (isRelevant(e.getTupleExpr())) { + relIndexSet.add(e); + } + + } + + return relIndexSet; + } + + private boolean isRelevant(TupleExpr index) { + + ConstantCollector cc = new ConstantCollector(); + index.visit(cc); + + Map<String, Integer> indexConstantMap = cc.getConstantMap(); + int indexSpCount = cc.getSpCount(); + int indexFilterCount = cc.getFilterCount(); + Set<String> indexConstants = indexConstantMap.keySet(); + + if (indexSpCount > querySpCount || indexFilterCount > queryFilterCount + || !Sets.intersection(indexConstants, queryConstantMap.keySet()).equals(indexConstants)) { + return false; + } + + for (String s : indexConstants) { + if (indexConstantMap.get(s) > queryConstantMap.get(s)) { + return false; + } + } + + return true; + } + + + private static class ConstantCollector extends QueryModelVisitorBase<RuntimeException> { + + private Map<String, Integer> constantMap = Maps.newHashMap(); + private int spCount = 0; + private int filterCount = 0; + + + @Override + public void meet(StatementPattern node) throws RuntimeException { + + spCount++; + super.meet(node); + + } + + + @Override + public void meet(Filter node) throws RuntimeException { + + filterCount++; + super.meet(node); + + } + + + + + @Override + public void meet(Var node) throws RuntimeException { + + if (node.isConstant()) { + String key = node.getValue().toString(); + if(constantMap.containsKey(key)){ + int count = constantMap.get(key); + count += 1; + constantMap.put(key, count); + } else { + constantMap.put(key, 1); + } + } + + } + + + @Override + public void meet(ValueConstant node) throws RuntimeException { + + String key = node.getValue().toString(); + + if(constantMap.containsKey(key)) { + int count = constantMap.get(key); + count += 1; + constantMap.put(key, count); + } else { + constantMap.put(key,1); + } + + } + + + public Map<String, Integer> getConstantMap() { + return constantMap; + } + + public int getSpCount(){ + return spCount; + } + + + public int getFilterCount() { + return filterCount; + } + + } + +}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingExpr.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingExpr.java b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingExpr.java new file mode 100644 index 0000000..1d4c4bb --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingExpr.java @@ -0,0 +1,94 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.Set; + +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.Var; + +import com.google.common.collect.Sets; + +public class IndexingExpr { + + private final URI function; + private final Value[] arguments; + private final StatementPattern spConstraint; + + public IndexingExpr(URI function, StatementPattern spConstraint, Value... arguments) { + this.function = function; + this.arguments = arguments; + this.spConstraint = spConstraint; + } + + public URI getFunction() { + return function; + } + + public Value[] getArguments() { + return arguments; + } + + public StatementPattern getSpConstraint() { + return spConstraint; + } + + + public Set<String> getBindingNames() { + //resource and match variable for search are already included as standard result-bindings + Set<String> bindings = Sets.newHashSet(); + + for(Var v: spConstraint.getVarList()) { + if(!v.isConstant()) { + bindings.add(v.getName()); + } + } + return bindings; + } + + + @Override + public boolean equals(Object other) { + if (!(other instanceof IndexingExpr)) { + return false; + } + IndexingExpr arg = (IndexingExpr) other; + return (this.function.equals(arg.function)) && (this.spConstraint.equals(arg.spConstraint)) + && (this.arguments.equals(arg.arguments)); + } + + + @Override + public int hashCode() { + int result = 17; + result = 31*result + function.hashCode(); + result = 31*result + spConstraint.hashCode(); + result = 31*result + arguments.hashCode(); + + return result; + } + +} + + + http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingFunctionRegistry.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingFunctionRegistry.java b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingFunctionRegistry.java new file mode 100644 index 0000000..2f2c486 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexingFunctionRegistry.java @@ -0,0 +1,134 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.openrdf.model.URI; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.Var; + +import com.google.common.collect.Maps; + +public class IndexingFunctionRegistry { + + + private static final Map<URI, FUNCTION_TYPE> SEARCH_FUNCTIONS = Maps.newHashMap(); + + static { + + String TEMPORAL_NS = "tag:rya-rdf.org,2015:temporal#"; + + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"after"),FUNCTION_TYPE.TEMPORAL); + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"before"), FUNCTION_TYPE.TEMPORAL); + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"equals"), FUNCTION_TYPE.TEMPORAL); + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"beforeInterval"), FUNCTION_TYPE.TEMPORAL); + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"afterInterval"), FUNCTION_TYPE.TEMPORAL); + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"insideInterval"), FUNCTION_TYPE.TEMPORAL); + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"hasBeginningInterval"), FUNCTION_TYPE.TEMPORAL); + SEARCH_FUNCTIONS.put(new URIImpl(TEMPORAL_NS+"hasEndInterval"), FUNCTION_TYPE.TEMPORAL); + + + SEARCH_FUNCTIONS.put(new URIImpl("http://rdf.useekm.com/fts#text"), FUNCTION_TYPE.FREETEXT); + + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_EQUALS, FUNCTION_TYPE.GEO); + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_DISJOINT, FUNCTION_TYPE.GEO); + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_INTERSECTS, FUNCTION_TYPE.GEO); + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_TOUCHES, FUNCTION_TYPE.GEO); + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_WITHIN, FUNCTION_TYPE.GEO); + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_CONTAINS, FUNCTION_TYPE.GEO); + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_OVERLAPS, FUNCTION_TYPE.GEO); + SEARCH_FUNCTIONS.put(GeoConstants.GEO_SF_CROSSES, FUNCTION_TYPE.GEO); + + } + + public enum FUNCTION_TYPE {GEO, TEMPORAL, FREETEXT}; + + + public static Set<URI> getFunctions() { + return SEARCH_FUNCTIONS.keySet(); + } + + + public static Var getResultVarFromFunctionCall(URI function, List<ValueExpr> args) { + + FUNCTION_TYPE type = SEARCH_FUNCTIONS.get(function); + + switch(type) { + case GEO: + return findBinaryResultVar(args); + case FREETEXT: + return findLiteralResultVar(args); + case TEMPORAL: + return findBinaryResultVar(args); + default: + return null; + } + + } + + + public static FUNCTION_TYPE getFunctionType(URI func) { + return SEARCH_FUNCTIONS.get(func); + } + + + + private static boolean isUnboundVariable(ValueExpr expr) { + return expr instanceof Var && !((Var)expr).hasValue(); + } + + private static boolean isConstant(ValueExpr expr) { + return expr instanceof ValueConstant || (expr instanceof Var && ((Var)expr).hasValue()); + } + + + private static Var findBinaryResultVar(List<ValueExpr> args) { + + if (args.size() >= 2) { + ValueExpr arg1 = args.get(0); + ValueExpr arg2 = args.get(1); + if (isUnboundVariable(arg1) && isConstant(arg2)) + return (Var) arg1; + else if (isUnboundVariable(arg2) && isConstant(arg1)) + return (Var) arg2; + } + return null; + } + + + private static Var findLiteralResultVar(List<ValueExpr> args) { + if (args.size() >= 2) { + ValueExpr arg1 = args.get(0); + ValueExpr arg2 = args.get(1); + if (isUnboundVariable(arg1) && isConstant(arg2)) + return (Var)arg1; + } + return null; + } + + + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/IteratorFactory.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/IteratorFactory.java b/extras/indexing/src/main/java/org/apache/rya/indexing/IteratorFactory.java new file mode 100644 index 0000000..eb88d99 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/IteratorFactory.java @@ -0,0 +1,159 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import info.aduna.iteration.CloseableIteration; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.NoSuchElementException; +import java.util.Set; + +import org.openrdf.model.Resource; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.impl.MapBindingSet; + + +//Given StatementPattern constraint and SearchFunction associated with an Indexing Node, +//creates appropriate StatementConstraints object from StatementPattern constraint and +//binding set and then uses SearchFunction to delegate query to appropriate index. +//Resulting iterator over statements is then converted to an iterator over binding sets +public class IteratorFactory { + + public static CloseableIteration<BindingSet, QueryEvaluationException> getIterator(final StatementPattern match, + final BindingSet bindings, final String queryText, final SearchFunction searchFunction) { + return new CloseableIteration<BindingSet, QueryEvaluationException>() { + + private boolean isClosed = false; + private CloseableIteration<Statement, QueryEvaluationException> statementIt = null; + + private String subjectBinding = match.getSubjectVar().getName(); + private String predicateBinding = match.getPredicateVar().getName(); + private String objectBinding = match.getObjectVar().getName(); + private String contextBinding = null; + + private void performQuery() throws QueryEvaluationException { + + StatementConstraints contraints = new StatementConstraints(); + + // get the context (i.e. named graph) of the statement and use that in the query + QueryModelNode parentNode = match.getSubjectVar().getParentNode(); + if (parentNode instanceof StatementPattern) { + StatementPattern parentStatement = (StatementPattern) parentNode; + Var contextVar = parentStatement.getContextVar(); + if (contextVar != null) { + contextBinding = contextVar.getName(); + Resource context = (Resource) contextVar.getValue(); + contraints.setContext(context); + } + } + + // get the subject constraint + if (match.getSubjectVar().isConstant()) { + // get the subject binding from the filter/statement pair + Resource subject = (Resource) match.getSubjectVar().getValue(); + contraints.setSubject(subject); + } else if (bindings.hasBinding(subjectBinding)) { + // get the subject binding from the passed in bindings (eg from other statements/parts of the tree) + Resource subject = (Resource) bindings.getValue(subjectBinding); + contraints.setSubject(subject); + } + + // get the predicate constraint + if (match.getPredicateVar().isConstant()) { + // get the predicate binding from the filter/statement pair + Set<URI> predicates = new HashSet<URI>(getPredicateRestrictions(match.getPredicateVar())); + contraints.setPredicates(predicates); + } else if (bindings.hasBinding(predicateBinding)) { + // get the predicate binding from the passed in bindings (eg from other statements/parts of the tree) + URI predicateUri = (URI) bindings.getValue(predicateBinding); + Set<URI> predicates = Collections.singleton(predicateUri); + contraints.setPredicates(predicates); + } + + statementIt = searchFunction.performSearch(queryText, contraints); + } + + @Override + public boolean hasNext() throws QueryEvaluationException { + if (statementIt == null) { + performQuery(); + } + return statementIt.hasNext(); + } + + @Override + public BindingSet next() throws QueryEvaluationException { + if (!hasNext() || isClosed) { + throw new NoSuchElementException(); + } + + Statement statment = statementIt.next(); + + MapBindingSet bset = new MapBindingSet(); + if (!subjectBinding.startsWith("-const")) + bset.addBinding(subjectBinding, statment.getSubject()); + if (!predicateBinding.startsWith("-const")) + bset.addBinding(predicateBinding, statment.getPredicate()); + if (!objectBinding.startsWith("-const")) + bset.addBinding(objectBinding, statment.getObject()); + if (contextBinding != null && !contextBinding.startsWith("-const")) + bset.addBinding(contextBinding, statment.getContext()); + + // merge with other bindings. + for (String name : bindings.getBindingNames()) { + bset.addBinding(name, bindings.getValue(name)); + } + + return bset; + } + + @Override + public void remove() throws QueryEvaluationException { + throw new UnsupportedOperationException(); + + } + + @Override + public void close() throws QueryEvaluationException { + if (statementIt != null) { + statementIt.close(); + } + isClosed = true; + } + + }; + + } + + public static Collection<URI> getPredicateRestrictions(Var predicate) { + if (predicate.hasValue()) + return Collections.singleton((URI) predicate.getValue()); + return Collections.emptyList(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/KeyParts.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/KeyParts.java b/extras/indexing/src/main/java/org/apache/rya/indexing/KeyParts.java new file mode 100644 index 0000000..2dd7a73 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/KeyParts.java @@ -0,0 +1,333 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import org.apache.accumulo.core.data.Value; +import org.apache.commons.codec.binary.StringUtils; +import org.apache.hadoop.io.Text; +import org.openrdf.model.Resource; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.impl.ContextStatementImpl; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.URIImpl; + +/** + * Store and format the various temporal index keys. + * Row Keys are in these two forms, where [x] denotes x is optional: + * rowkey = contraintPrefix datetime + * rowkey = datetime 0x/00 uniquesuffix + * contraintPrefix = 0x/00 hash([subject][predicate]) + * uniquesuffix = some bytes to make it unique, like hash(statement). + * + * The instance is in one of two modes depending on the constructor: + * storage mode -- construct with a triple statement, get an iterator of keys to store. + * query mode -- construct with a statement and query constraints, get the key prefix to search. + * + * this has the flavor of an immutable object + * This is independent of the underlying database engine + * + * @author David.Lotts + * + */ +public class KeyParts implements Iterable<KeyParts> { + private static final String CQ_S_P_AT = "spo"; + private static final String CQ_P_AT = "po"; + private static final String CQ_S_AT = "so"; + private static final String CQ_O_AT = "o"; + public static final String CQ_BEGIN = "begin"; + public static final String CQ_END = "end"; + + public static final byte[] HASH_PREFIX = new byte[] {0}; + public static final byte[] HASH_PREFIX_FOLLOWING = new byte[] {1}; + + public final Text cf; + public final Text cq; + public final Text constraintPrefix; // subject and/or predicate + final Text storeKey; // subject and/or predicate + final private TemporalInstant instant; + final private Statement statement; + final private boolean queryMode; + KeyParts(final Text constraintPrefix, final TemporalInstant instant, final String cf, final String cq) { + queryMode = true; // query mode + storeKey = null; + statement = null; + this.constraintPrefix = constraintPrefix; + this.instant = instant; + this.cf = new Text(cf); + this.cq = new Text(cq); + } + + /** + * this is the value to index. + * @return + */ + public Value getValue() { + assert statement!=null; + return new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement))); + } + + public KeyParts(final Statement statement, final TemporalInstant instant2) { + queryMode = false; // store mode + storeKey = null; + constraintPrefix = null; + this.statement = statement; + instant = instant2; + cf = null; + cq = null; + } + + private KeyParts(final Text keyText, final Text cf, final Text cq, final Statement statement) { + queryMode = false; // store mode + constraintPrefix = null; + this.statement = statement; + instant = null; + storeKey = keyText; + this.cf = cf; + this.cq = cq; + } + + @Override + public Iterator<KeyParts> iterator() { + final String[] strategies = new String[] { + CQ_O_AT, CQ_S_P_AT, CQ_P_AT, CQ_S_AT + } ; // CQ_END? + assert !queryMode : "iterator for queryMode is not immplemented" ; + if (queryMode) { + return null; + } + + // if (!queryMode) + return new Iterator<KeyParts>() { + int nextStrategy = 0; + + @Override + public boolean hasNext() { + return nextStrategy < strategies.length; + } + + @Override + public KeyParts next() { + assert(statement!=null); + Text keyText = new Text(); + // increment++ the next strategy AFTER getting the value + switch (nextStrategy++) { + case 0: // index o+hash(p+s) + assert (CQ_O_AT.equals(strategies[0])); + keyText = new Text(instant.getAsKeyBytes()); + KeyParts.appendUniqueness(statement, keyText); + return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_O_AT), statement); + case 1:// index hash(s+p)+o + assert (CQ_S_P_AT.equals(strategies[1])); + KeyParts.appendSubjectPredicate(statement, keyText); + KeyParts.appendInstant(instant, keyText); + // appendUniqueness -- Not needed since it is already unique. + return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_S_P_AT), statement); + case 2: // index hash(p)+o + assert (CQ_P_AT.equals(strategies[2])); + KeyParts.appendPredicate(statement, keyText); + KeyParts.appendInstant(instant, keyText); + KeyParts.appendUniqueness(statement, keyText); + return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_P_AT), statement); + case 3: // index hash(s)+o + assert (CQ_S_AT.equals(strategies[3])); + KeyParts.appendSubject(statement, keyText); + KeyParts.appendInstant(instant, keyText); + KeyParts.appendUniqueness(statement, keyText); + return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_S_AT), statement); + } + throw new Error("Next passed end? No such nextStrategy="+(nextStrategy-1)); + + } + + @Override + public void remove() { + throw new Error("Remove not Implemented."); + } + }; + } + + public byte[] getStoreKey() { + assert !queryMode : "must be in store Mode, store keys are not initialized."; + return storeKey.copyBytes(); + } + + /** + * Query key is the prefix plus the datetime, but no uniqueness at the end. + * @return the row key for range queries. + */ + public Text getQueryKey() { + return getQueryKey(instant); + }; + + /** + * Query key is the prefix plus the datetime, but no uniqueness at the end. + * + * @return the row key for range queries. + */ + public Text getQueryKey(final TemporalInstant theInstant) { + assert queryMode : "must be in query Mode, query keys are not initialized."; + final Text keyText = new Text(); + if (constraintPrefix != null) { + appendBytes(constraintPrefix.copyBytes(), keyText); + } + appendInstant(theInstant, keyText); + return keyText; + }; + + @Override + public String toString() { + return "KeyParts [contraintPrefix=" + toHumanString(constraintPrefix) + ", instant=" + toHumanString(instant.getAsKeyBytes()) + ", cf=" + cf + ", cq=" + cq + "]"; + } + private static void appendSubject(final Statement statement, final Text keyText) { + final Value statementValue = new Value(StatementSerializer.writeSubject(statement).getBytes()); + final byte[] hashOfValue = uniqueFromValueForKey(statementValue); + appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. + appendBytes(hashOfValue, keyText); + } + + private static void appendPredicate(final Statement statement, final Text keyText) { + final Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writePredicate(statement))); + final byte[] hashOfValue = uniqueFromValueForKey(statementValue); + appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. + appendBytes(hashOfValue, keyText); + } + + private static void appendInstant(final TemporalInstant instant, final Text keyText) { + final byte[] bytes = instant.getAsKeyBytes(); + appendBytes(bytes, keyText); + } + + private static void appendSubjectPredicate(final Statement statement, final Text keyText) { + final Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeSubjectPredicate(statement))); + final byte[] hashOfValue = uniqueFromValueForKey(statementValue); + appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. + appendBytes(hashOfValue, keyText); + } + + /** + * Append any byte array to a row key. + * @param bytes append this + * @param keyText text to append to + */ + private static void appendBytes(final byte[] bytes, final Text keyText) { + keyText.append(bytes, 0, bytes.length); + } + + /** + * Get a collision unlikely hash string and append to the key, + * so that if two keys have the same value, then they will be the same, + * if two different values that occur at the same time there keys are different. + * If the application uses a very large number of statements at the exact same time, + * the md5 value might be upgraded to for example sha-1 to avoid collisions. + * @param statement + * @param keyText + */ + public static void appendUniqueness(final Statement statement, final Text keyText) { + keyText.append(HASH_PREFIX, 0, 1); // delimiter + final Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement))); + final byte[] hashOfValue = Md5Hash.md5Binary(statementValue); + keyText.append(hashOfValue, 0, hashOfValue.length); + } + /** + * Get a collision unlikely hash string to append to the key, + * so that if two keys have the same value, then they will be the same, + * if two different values that occur at the same time there keys are different. + * @param value + * @return + */ + private static byte[] uniqueFromValueForKey(final Value value) { + return Md5Hash.md5Binary(value); + } + + /** + * List all the index keys to find for any query. Set the strategy via the column qualifier, ex: CQ_S_P_AT. + * Column Family (CF) is the context/named-graph. + * @param queryInstant + * @param contraints + * @return + */ + static public List<KeyParts> keyPartsForQuery(final TemporalInstant queryInstant, final StatementConstraints contraints) { + final List<KeyParts> keys = new LinkedList<KeyParts>(); + final URI urlNull = new URIImpl("urn:null"); + final Resource currentContext = contraints.getContext(); + final boolean hasSubj = contraints.hasSubject(); + if (contraints.hasPredicates()) { + for (final URI nextPredicate : contraints.getPredicates()) { + final Text contraintPrefix = new Text(); + final Statement statement = new ContextStatementImpl(hasSubj ? contraints.getSubject() : urlNull, nextPredicate, urlNull, contraints.getContext()); + if (hasSubj) { + appendSubjectPredicate(statement, contraintPrefix); + } else { + appendPredicate(statement, contraintPrefix); + } + keys.add(new KeyParts(contraintPrefix, queryInstant, (currentContext==null)?"":currentContext.toString(), hasSubj?CQ_S_P_AT:CQ_P_AT )); + } + } + else if (contraints.hasSubject()) { // and no predicates + final Text contraintPrefix = new Text(); + final Statement statement = new StatementImpl(contraints.getSubject(), urlNull, urlNull); + appendSubject(statement, contraintPrefix); + keys.add( new KeyParts(contraintPrefix, queryInstant, (currentContext==null)?"":currentContext.toString(), CQ_S_AT) ); + } + else { + // No constraints except possibly a context/named-graph, handled by the CF + keys.add( new KeyParts(null, queryInstant, (currentContext==null)?"":currentContext.toString(), CQ_O_AT) ); + } + return keys; + } + /** + * convert a non-utf8 byte[] and text and value to string and show unprintable bytes as {xx} where x is hex. + * @param value + * @return Human readable representation. + */ + public static String toHumanString(final Value value) { + return toHumanString(value==null?null:value.get()); + } + public static String toHumanString(final Text text) { + return toHumanString(text==null?null:text.copyBytes()); + } + public static String toHumanString(final byte[] bytes) { + if (bytes==null) { + return "{null}"; + } + final StringBuilder sb = new StringBuilder(); + for (final byte b : bytes) { + if ((b > 0x7e) || (b < 32)) { + sb.append("{"); + sb.append(Integer.toHexString( b & 0xff )); // Lop off the sign extended ones. + sb.append("}"); + } else if (b == '{'||b == '}') { // Escape the literal braces. + sb.append("{"); + sb.append((char)b); + sb.append("}"); + } else { + sb.append((char)b); + } + } + return sb.toString(); + } + + } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/Md5Hash.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/Md5Hash.java b/extras/indexing/src/main/java/org/apache/rya/indexing/Md5Hash.java new file mode 100644 index 0000000..0e83822 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/Md5Hash.java @@ -0,0 +1,44 @@ +package mvm.rya.indexing; + +import org.apache.accumulo.core.data.Value; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.StringUtils; +import org.apache.commons.codec.digest.DigestUtils; + +/** + * Utility methods for generating hashes. Note that MD5 is 16 bytes, or 32 Hex chars. To make it smaller (but still printable), this class + * Base64 encodes those 16 bytes into 22 chars. + */ +public class Md5Hash { + public static String md5Base64(final byte[] data) { + return Base64.encodeBase64URLSafeString(DigestUtils.md5(data)); + } + + public static String md5Base64(final String string) { + return md5Base64(StringUtils.getBytesUtf8(string)); + } + + public static byte[] md5Binary(final Value value) { + return DigestUtils.md5(value.get()); + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunction.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunction.java b/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunction.java new file mode 100644 index 0000000..6a19ee0 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunction.java @@ -0,0 +1,45 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import info.aduna.iteration.CloseableIteration; +import org.openrdf.model.Statement; +import org.openrdf.query.QueryEvaluationException; + +/** + * A function used to perform a search. + */ +public interface SearchFunction { + + /** + * Search the indices for the given terms and return {@link Statement}s that meet the {@link StatementConstraints} + * + * @param searchTerms + * the search terms + * @param contraints + * the constraints on the returned {@link Statement}s + * @return + * @throws QueryEvaluationException + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> performSearch(String searchTerms, StatementConstraints contraints) + throws QueryEvaluationException; + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunctionFactory.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunctionFactory.java b/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunctionFactory.java new file mode 100644 index 0000000..719cc2f --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/SearchFunctionFactory.java @@ -0,0 +1,71 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.Map; + +import org.apache.log4j.Logger; +import org.openrdf.model.URI; +import org.openrdf.query.QueryEvaluationException; + +import com.google.common.collect.Maps; + +public abstract class SearchFunctionFactory { + + private static final Logger logger = Logger.getLogger(SearchFunctionFactory.class); + + private final Map<URI, SearchFunction> SEARCH_FUNCTION_MAP = Maps.newHashMap(); + + + /** + * Get a {@link GeoSearchFunction} for a give URI. + * + * @param searchFunction + * @return + */ + public SearchFunction getSearchFunction(final URI searchFunction) { + + SearchFunction geoFunc = null; + + try { + geoFunc = getSearchFunctionInternal(searchFunction); + } catch (QueryEvaluationException e) { + e.printStackTrace(); + } + + return geoFunc; + } + + private SearchFunction getSearchFunctionInternal(final URI searchFunction) throws QueryEvaluationException { + SearchFunction sf = SEARCH_FUNCTION_MAP.get(searchFunction); + + if (sf != null) { + return sf; + } else { + throw new QueryEvaluationException("Unknown Search Function: " + searchFunction.stringValue()); + } + + + } + + +} + http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/StatementConstraints.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/StatementConstraints.java b/extras/indexing/src/main/java/org/apache/rya/indexing/StatementConstraints.java new file mode 100644 index 0000000..e8f1d4e --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/StatementConstraints.java @@ -0,0 +1,73 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + + +import java.util.Set; + +import org.openrdf.model.Resource; +import org.openrdf.model.URI; + +public class StatementConstraints { + private Resource context = null; + private Resource subject = null; + private Set<URI> predicates = null; + + public StatementConstraints setContext(Resource context) { + this.context = context; + return this; + } + + public StatementConstraints setPredicates(Set<URI> predicates) { + this.predicates = predicates; + return this; + } + + public StatementConstraints setSubject(Resource subject) { + this.subject = subject; + return this; + } + + public Resource getContext() { + return context; + } + + public Set<URI> getPredicates() { + return predicates; + } + + public Resource getSubject() { + return subject; + } + + public boolean hasSubject() { + return subject != null; + } + + public boolean hasPredicates() { + return predicates != null && !predicates.isEmpty(); + } + + public boolean hasContext() { + return context != null; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/StatementSerializer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/StatementSerializer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/StatementSerializer.java new file mode 100644 index 0000000..107f69d --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/StatementSerializer.java @@ -0,0 +1,225 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + + +import java.io.IOException; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.Validate; +import org.openrdf.model.Literal; +import org.openrdf.model.Resource; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ContextStatementImpl; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.ValueFactoryImpl; + +/** + * A set of Utilities to serialize {@link Statement}s to/from {@link String}s. + */ +public class StatementSerializer { + private static String SEP = "\u0000"; + + private static ValueFactory VALUE_FACTORY = new ValueFactoryImpl(); + + /** + * Read a {@link Statement} from a {@link String} + * + * @param in + * the {@link String} to parse + * @return a {@link Statement} + */ + public static Statement readStatement(String in) throws IOException { + String[] parts = in.split(SEP); + + if (parts.length != 4) { + throw new IOException("Not a valid statement: " + in); + } + + String contextString = parts[0]; + String subjectString = parts[1]; + String predicateString = parts[2]; + String objectString = parts[3]; + return readStatement(subjectString, predicateString, objectString, contextString); + } + + public static Statement readStatement(String subjectString, String predicateString, String objectString) { + return readStatement(subjectString, predicateString, objectString, ""); + } + + public static Statement readStatement(String subjectString, String predicateString, String objectString, String contextString) { + Resource subject = createResource(subjectString); + URI predicate = VALUE_FACTORY.createURI(predicateString); + + boolean isObjectLiteral = objectString.startsWith("\""); + + Value object = null; + if (isObjectLiteral) { + object = parseLiteral(objectString); + } else { + object = createResource(objectString); + } + + if (contextString == null || contextString.isEmpty()) { + return new StatementImpl(subject, predicate, object); + } else { + Resource context = VALUE_FACTORY.createURI(contextString); + return new ContextStatementImpl(subject, predicate, object, context); + } + } + + private static Resource createResource(String str) { + if (str.startsWith("_")) { + return VALUE_FACTORY.createBNode(str.substring(2)); + } + return VALUE_FACTORY.createURI(str); + + } + + private static Literal parseLiteral(String fullLiteralString) { + Validate.notNull(fullLiteralString); + Validate.isTrue(fullLiteralString.length() > 1); + + if (fullLiteralString.endsWith("\"")) { + String fullLiteralWithoutQuotes = fullLiteralString.substring(1, fullLiteralString.length() - 1); + return VALUE_FACTORY.createLiteral(fullLiteralWithoutQuotes, (String) null); + } else { + + // find the closing quote + int labelEnd = fullLiteralString.lastIndexOf("\""); + + String label = fullLiteralString.substring(1, labelEnd); + + String data = fullLiteralString.substring(labelEnd + 1); + + if (data.startsWith("@")) { + // the data is "language" + String lang = data.substring(1); + return VALUE_FACTORY.createLiteral(label, lang); + } else if (data.startsWith("^^<")) { + // the data is a "datatype" + String datatype = data.substring(3, data.length() - 1); + URI datatypeUri = VALUE_FACTORY.createURI(datatype); + return VALUE_FACTORY.createLiteral(label, datatypeUri); + } + } + return null; + + } + + public static String writeSubject(Statement statement) { + return statement.getSubject().toString(); + } + + public static String writeObject(Statement statement) { + return statement.getObject().toString(); + } + + public static String writePredicate(Statement statement) { + return statement.getPredicate().toString(); + } + + public static String writeSubjectPredicate(Statement statement) { + Validate.notNull(statement); + Validate.notNull(statement.getSubject()); + Validate.notNull(statement.getPredicate()); + return statement.getSubject().toString() + SEP + statement.getPredicate().toString(); + } + + public static String writeContext(Statement statement) { + if (statement.getContext() == null) { + return ""; + } + return statement.getContext().toString(); + } + + /** + * Write a {@link Statement} to a {@link String} + * + * @param statement + * the {@link Statement} to write + * @return a {@link String} representation of the statement + */ + public static String writeStatement(Statement statement) { + Resource subject = statement.getSubject(); + Resource context = statement.getContext(); + URI predicate = statement.getPredicate(); + Value object = statement.getObject(); + + Validate.notNull(subject); + Validate.notNull(predicate); + Validate.notNull(object); + + String s = ""; + if (context == null) { + s = SEP + subject.toString() + SEP + predicate.toString() + SEP + object.toString(); + } else { + s = context.toString() + SEP + subject.toString() + SEP + predicate.toString() + SEP + object.toString(); + } + return s; + } + + /** + * Creates a Regular Expression to match serialized statements meeting these constraints. A <code>null</code> or empty parameters imply + * no constraint. A <code>null</code> return value implies no constraints. + * + * @param context + * context constraint + * @param subject + * subject constraint + * @param predicates + * list of predicate constraints + * @return a regular expression that can be used to match serialized statements. A <code>null</code> return value implies no + * constraints. + */ + public static String createStatementRegex(StatementConstraints contraints) { + Resource context = contraints.getContext(); + Resource subject = contraints.getSubject(); + Set<URI> predicates = contraints.getPredicates(); + if (context == null && subject == null && (predicates == null || predicates.isEmpty())) { + return null; + } + + // match on anything but a separator + String anyReg = "[^" + SEP + "]*"; + + // if context is empty, match on any context + String contextReg = (context == null) ? anyReg : context.stringValue(); + + // if subject is empty, match on any subject + String subjectReg = (subject == null) ? anyReg : subject.stringValue(); + + // if the predicates are empty, match on any predicate. Otherwise, "or" the predicates. + String predicateReg = ""; + if (predicates == null || predicates.isEmpty()) { + predicateReg = anyReg; + } else { + predicateReg = "(" + StringUtils.join(predicates, "|") + ")"; + } + + return "^" + contextReg + SEP + subjectReg + SEP + predicateReg + SEP + ".*"; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalIndexer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalIndexer.java new file mode 100644 index 0000000..3f6858c --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalIndexer.java @@ -0,0 +1,166 @@ +package mvm.rya.indexing; + +import org.openrdf.model.Statement; +import org.openrdf.query.QueryEvaluationException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import info.aduna.iteration.CloseableIteration; +import mvm.rya.api.persist.index.RyaSecondaryIndexer; + +/** + * A repository to store, index, and retrieve {@link Statement}s based on time. + * Instants: + * Instant {before, equals, after} Instant + * Instant {before, after, inside} Interval + * Instant {hasBeginning, hasEnd} Interval + * + * OWL-Time provides the interval relations: + * <pre> + * intervalEquals, + * intervalBefore, + * intervalMeets, + * intervalOverlaps, + * intervalStarts, + * intervalDuring, + * intervalFinishes, + * + * and their reverse interval relations: + * intervalAfter, + * intervalMetBy, + * intervalOverlappedBy, + * intervalStartedBy, + * intervalContains, + * intervalFinishedBy. + * + * from Allen paper in 1983 + * + * Relation Y Symbol Inverse Y + * before Y < > X + * equal Y = = X + * meets Y m mi X + * overlaps Y o oi X + * during Y d di X + * starts Y s si X + * finishes Y f fi X + * </pre> + * + */ + +public interface TemporalIndexer extends RyaSecondaryIndexer { + + /* consider ParseException here */ + + /*- + * + * And Now, what you you've all been waiting for, the queries: + * the instant versions: + * format: x {relation} y + * read: Given literal y, find all statements where the date object x is ( x relation y ) + * Instant {before, equals, after} Instant + * Instant {before, after, inside} Interval + * Instant {hasBeginning, hasEnd} Interval + * + * the Allen interval relations, as described above. + * intervalEquals, + * intervalBefore, + * intervalMeets, + * intervalOverlaps, + * intervalStarts, + * intervalDuring, + * intervalFinishes + * and then the inverses, including after. + */ + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantEqualsInstant( + TemporalInstant queryInstant, StatementConstraints contraints) + throws QueryEvaluationException;; + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInstant( + TemporalInstant queryInstant, StatementConstraints contraints) + throws QueryEvaluationException;; + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInstant( + TemporalInstant queryInstant, StatementConstraints contraints) + throws QueryEvaluationException;; + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInterval( + TemporalInterval givenInterval, StatementConstraints contraints) + throws QueryEvaluationException;; + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInterval( + TemporalInterval givenInterval, StatementConstraints contraints) + throws QueryEvaluationException; + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantInsideInterval( + TemporalInterval givenInterval, StatementConstraints contraints) + throws QueryEvaluationException; + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantHasBeginningInterval( + TemporalInterval queryInterval, StatementConstraints contraints) + throws QueryEvaluationException; + + public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantHasEndInterval( + TemporalInterval queryInterval, StatementConstraints contraints) + throws QueryEvaluationException; + + /** + * Returns statements that contain a time instance that is equal to the + * queried time and meet the {@link StatementConstraints}. + * + * @param query + * the queried time instance + * @param contraints + * the {@link StatementConstraints} + * @return + * @throws QueryEvaluationException + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryIntervalEquals( + TemporalInterval query, StatementConstraints contraints) + throws QueryEvaluationException; + + /** + * Returns statements that contain a time instances that are before the + * queried {@link TemporalInterval} and meet the {@link StatementConstraints} + * + * @param query + * the queried time instance + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryIntervalBefore( + TemporalInterval query, StatementConstraints contraints) + throws QueryEvaluationException; + + /** + * Returns statements that contain a time instance that is after the queried {@link TemporalInterval} and meet the {@link StatementConstraints}. + * + * @param query + * the queried time instance + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryIntervalAfter( + TemporalInterval query, StatementConstraints contraints) + throws QueryEvaluationException; +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstant.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstant.java b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstant.java new file mode 100644 index 0000000..f4e6d95 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstant.java @@ -0,0 +1,83 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.io.Serializable; + +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; + +/** + * Time and date interface for building intervals. + * + *Implementations: + * Implementation should have a factory method for TemporalInterval since TemporalIntervals reference only this + * interface for begin & end, so it injects an implementation. + * public static TemporalInterval parseInterval(String dateTimeInterval) + * + * The following are notes and may not have been implemented. + * + * = rfc3339 + *https://www.ietf.org/rfc/rfc3339.txt + * a subset of ISO-8601 + * YYYY-MM-DDThh:mm:ss.fffZ + * Limits: + *All dates and times are assumed to be in the "current era", + somewhere between 0000AD and 9999AD. + * resolution: to the second, or millisecond if the optional fraction is used. + * + * = epoch + * 32bit or 64bit integer specifying the number of seconds since a standard date-time (1970) + * 32bit is good until 2038. + * 64bit is good until after the heat death of our universe + * + */ +public interface TemporalInstant extends Comparable<TemporalInstant>, Serializable { + @Override + public boolean equals(Object obj) ; + + @Override + public int compareTo(TemporalInstant o) ; + + @Override + public int hashCode() ; + /** + * Get the date as a byte array. + */ + public byte[] getAsKeyBytes(); + /** + * Get the date as a String. + */ + public String getAsKeyString(); + /** + * Get the date as a human readable for reporting with timeZone. + */ + public String getAsReadable(DateTimeZone tz); + /** + * Get the date as a human readable for reporting, timeZone is implementation specific. + */ + public String getAsReadable(); + /** + * Get the date as a Joda/Java v8 DateTime. + */ + public DateTime getAsDateTime(); + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstantRfc3339.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstantRfc3339.java b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstantRfc3339.java new file mode 100644 index 0000000..f47bb92 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInstantRfc3339.java @@ -0,0 +1,219 @@ +/** + * + */ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.codec.binary.StringUtils; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.ISODateTimeFormat; + +/** + * Immutable date and time instance returning a human readable key. + * Preserves the Time zone, but not stored in the key. + * Converts fields (hours, etc) correctly for tz=Zulu when stored, + * so the original timezone is not preserved when retrieved. + * + * Uses rfc 3339, which looks like: YYYY-MM-DDThh:mm:ssZ a subset + * of ISO-8601 : https://www.ietf.org/rfc/rfc3339.txt + * + * Limits: All dates and times are assumed to be in the "current era", no BC, + * somewhere between 0000AD and 9999AD. + * + * Resolution: to the second, or millisecond if the optional fraction is used. + * + * This is really a wrapper for Joda DateTime. if you need functionality from + * that wonderful class, simply use t.getAsDateTime(). + * + */ +public class TemporalInstantRfc3339 implements TemporalInstant { + + private static final long serialVersionUID = -7790000399142290309L; + + private final DateTime dateTime; + /** + * Format key like this: YYYY-MM-DDThh:mm:ssZ + */ + public final static DateTimeFormatter FORMATTER = ISODateTimeFormat.dateTimeNoMillis(); + + public static final Pattern PATTERN = Pattern.compile("\\[(.*)\\,(.*)\\].*"); + + /** + * New date assumed UTC time zone. + * + * @param year + * @param month + * @param day + * @param hour + * @param minute + * @param second + */ + public TemporalInstantRfc3339(final int year, final int month, final int day, final int hour, final int minute, final int second) { + dateTime = new DateTime(year, month, day, hour, minute, second, DateTimeZone.UTC); + } + + /** + * Construct with a Joda/java v8 DateTime; + * TZ is preserved, but not in the key. + * + * @param dateTime + * initialize with this date time. Converted to zulu time zone for key generation. + * @return + */ + public TemporalInstantRfc3339(final DateTime datetime) { + dateTime = datetime; + } + /** + * Get an interval setting beginning and end with this implementation of {@link TemporalInstant}. + * beginning must be less than end. + * + * @param dateTimeInterval String in the form [dateTime1,dateTime2] + */ + public static TemporalInterval parseInterval(final String dateTimeInterval) { + + final Matcher matcher = PATTERN.matcher(dateTimeInterval); + if (matcher.find()) { + // Got a date time pair, parse into an interval. + return new TemporalInterval( + new TemporalInstantRfc3339(new DateTime(matcher.group(1))), + new TemporalInstantRfc3339(new DateTime(matcher.group(2)))); + } + throw new IllegalArgumentException("Can't parse interval, expecting '[ISO8601dateTime1,ISO8601dateTime2]', actual: "+dateTimeInterval); + } + + /** + * if this is older returns -1, equal 0, else 1 + * + */ + @Override + public int compareTo(final TemporalInstant that) { + return getAsKeyString().compareTo(that.getAsKeyString()); + } + + @Override + public byte[] getAsKeyBytes() { + return StringUtils.getBytesUtf8(getAsKeyString()); + } + + @Override + public String getAsKeyString() { + return dateTime.withZone(DateTimeZone.UTC).toString(FORMATTER); + } + + /** + * Readable string, formated local time at {@link DateTimeZone}. + * If the timezone is UTC (Z), it was probably a key from the database. + * If the server and client are in different Time zone, should probably use the client timezone. + * + * Time at specified time zone: + * instant.getAsReadable(DateTimeZone.forID("-05:00"))); + * instant.getAsReadable(DateTimeZone.getDefault())); + * + * Use original time zone set in the constructor: + * instant.getAsDateTime().toString(TemporalInstantRfc3339.FORMATTER)); + * + */ + @Override + public String getAsReadable(final DateTimeZone dateTimeZone) { + return dateTime.withZone(dateTimeZone).toString(FORMATTER); + } + + /** + * Use original time zone set in the constructor, or UTC if from parsing the key. + */ + @Override + public String getAsReadable() { + return dateTime.toString(FORMATTER); + } + + /** + * default toString, same as getAsReadable(). + */ + @Override + public String toString() { + return getAsReadable(); + } + + /** + * Show readable time converted to the default timezone. + */ + @Override + public DateTime getAsDateTime() { + return dateTime; + } + + /** + * Minimum Date, used for infinitely past. + */ + private static final TemporalInstant MINIMUM = new TemporalInstantRfc3339(new DateTime(Long.MIN_VALUE)); + /** + * maximum date/time is used for infinitely in the future. + */ + private static final TemporalInstant MAXIMUM = new TemporalInstantRfc3339(new DateTime(Long.MAX_VALUE)); + + /** + * infinite past date. + * @return an instant that will compare as NEWER than anything but itself. + */ + public static TemporalInstant getMinimumInstance() { + return MINIMUM; + } + /** + * infinite future date. + * @return an instant that will compare as OLDER than anything but itself + */ + + public static TemporalInstant getMaximumInstance() { + return MAXIMUM; + } + + /* (non-Javadoc) + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + return getAsKeyString().hashCode(); + } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final TemporalInstantRfc3339 other = (TemporalInstantRfc3339) obj; + return (getAsKeyString().equals(other.getAsKeyString())); + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/44a2dcf0/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInterval.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInterval.java b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInterval.java new file mode 100644 index 0000000..b23b99c --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/TemporalInterval.java @@ -0,0 +1,181 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.io.UnsupportedEncodingException; + +/** + * A time with beginning and end date and time, which could be indefinitely in + * the past or future. Immutable, so it's thread safe. For use in reading and + * writing from Rya's temporal indexing scheme. + * + */ +public class TemporalInterval implements Comparable<TemporalInterval> { + + // the beginning and end. Read-only because they are final references to immutable objects. + private final TemporalInstant hasBeginning; + private final TemporalInstant hasEnd; + + /** + * Separate the beginning and end with this. + * Used because Joda time library's interval uses this. + * TODO: Move this down to the TemporalInterval implementation. + * TODO: Then add a TemporalInterval.keyConcatenate(). + */ + public static final String DELIMITER = "/"; + +// /** +// * Empty constructor -- not allowed, no defaults. +// * For an infinite span of time: do it like this: +// * new TemporalInterval(TemporalInstantImpl.getMinimum, TemporalInstantImpl.getMaximum) +// */ +// public TemporalInterval() { +// hasBeginning = null; +// hasEnd = null; +// } + + /** + * Constructor setting beginning and end with an implementation of {@link TemporalInstant}. + * beginning must be less than end. + * + * @param hasBeginning + * @param hasEnd + */ + public TemporalInterval(TemporalInstant hasBeginning, TemporalInstant hasEnd) { + super(); + if (hasBeginning != null && hasEnd != null && 0 < hasBeginning.compareTo(hasEnd)) + throw new IllegalArgumentException("The Beginning instance must not compare greater than the end."); + this.hasBeginning = hasBeginning; + this.hasEnd = hasEnd; + } + + /** + * @return the hasBeginning + */ + public TemporalInstant getHasBeginning() { + return hasBeginning; + } + + /** + * @return the hasEnd + */ + public TemporalInstant getHasEnd() { + return hasEnd; + } + + /** + * True if CompareTo() says equal (0) + */ + @Override + public boolean equals(Object other) { + return other instanceof TemporalInterval + && this.compareTo((TemporalInterval) other) == 0; + }; + + /** + * Compare beginnings, if the same then compare ends, or equal if beginnings equal and endings equal. + * Nulls represent infinity. + */ + @Override + public int compareTo(TemporalInterval other) { + int compBegins = this.hasBeginning.compareTo(other.hasBeginning); + if (0 == compBegins) + return this.hasEnd.compareTo(other.hasEnd); + else + return compBegins; + + } + + /** + * Hashcode for + */ + @Override + public int hashCode() { + if (hasBeginning == null) + if (hasEnd == null) + return 0; + else + return hasEnd.hashCode(); + else + return hashboth(this.hasBeginning.hashCode(), + this.hasEnd.hashCode()); + } + + /** + * Hashcode combining two string hashcodes. + */ + protected static int hashboth(int i1, int i2) { + // return (int) (( 1L * i1 * i2) ; % (1L + Integer.MAX_VALUE)); + // let the overflow happen. It won't throw an error. + return (i1 + i2); + } + + /** + * Get the key use for rowid for the beginning of the interval. Use ascii + * for conversion to catch and prevent multi-byte chars. + * + * @return + */ + public byte[] getAsKeyBeginning() { + try { + return (hasBeginning.getAsKeyString() + DELIMITER + hasEnd + .getAsKeyString()).getBytes("US-ASCII"); + } catch (UnsupportedEncodingException e) { + // this is a code error, the strings are mostly numbers. + throw new Error("while converting key string to ascii bytes", e); + } + } + + /** + * get the key used for indexing the end of the interval. Use ascii for + * conversion to catch and prevent multi-byte chars. + * + * @return + */ + public byte[] getAsKeyEnd() { + try { + return (hasEnd.getAsKeyString() + DELIMITER + hasBeginning + .getAsKeyString()).getBytes("US-ASCII"); + } catch (UnsupportedEncodingException e) { + // this is a code error, the strings are mostly numbers and ascii + // symbols. + throw new Error("while converting key string to ascii bytes", e); + } + } + + /** + * Format as a "period" in this paper. This is not a standard, really. + * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.298.8948&rep=rep1&type=pdf + * also consider using the typed literal syntax: + * "[2010-01-01,2010-01-31]"^^xs:period + * @return [begindate,enddate] for example: [2010-01-01,2010-01-31] + * + */ + public String getAsPair() { + return "["+hasBeginning.getAsReadable() + "," + hasEnd.getAsReadable() + "]"; + } + + @Override + public String toString() { + return getAsPair() ; + // return hasBeginning.getAsReadable() + DELIMITER + hasEnd.getAsReadable(); + } +}
