http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/KeyParts.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/KeyParts.java b/extras/indexing/src/main/java/mvm/rya/indexing/KeyParts.java deleted file mode 100644 index 2caf81c..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/KeyParts.java +++ /dev/null @@ -1,331 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; - -import mvm.rya.indexing.accumulo.Md5Hash; -import mvm.rya.indexing.accumulo.StatementSerializer; - -import org.apache.accumulo.core.data.Value; -import org.apache.commons.codec.binary.StringUtils; -import org.apache.hadoop.io.Text; -import org.openrdf.model.Resource; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.model.impl.ContextStatementImpl; -import org.openrdf.model.impl.StatementImpl; -import org.openrdf.model.impl.URIImpl; - -/** - * Store and format the various temporal index keys. - * Row Keys are in these two forms, where [x] denotes x is optional: - * rowkey = contraintPrefix datetime - * rowkey = datetime 0x/00 uniquesuffix - * contraintPrefix = 0x/00 hash([subject][predicate]) - * uniquesuffix = some bytes to make it unique, like hash(statement). - * - * The instance is in one of two modes depending on the constructor: - * storage mode -- construct with a triple statement, get an iterator of keys to store. - * query mode -- construct with a statement and query constraints, get the key prefix to search. - * - * this has the flavor of an immutable object - * This is independent of the underlying database engine - * - * @author David.Lotts - * - */ -public class KeyParts implements Iterable<KeyParts> { - private static final String CQ_S_P_AT = "spo"; - private static final String CQ_P_AT = "po"; - private static final String CQ_S_AT = "so"; - private static final String CQ_O_AT = "o"; - public static final String CQ_BEGIN = "begin"; - public static final String CQ_END = "end"; - - public static final byte[] HASH_PREFIX = new byte[] {0}; - public static final byte[] HASH_PREFIX_FOLLOWING = new byte[] {1}; - - public final Text cf; - public final Text cq; - public final Text constraintPrefix; // subject and/or predicate - final Text storeKey; // subject and/or predicate - final private TemporalInstant instant; - final private Statement statement; - final private boolean queryMode; - KeyParts(Text constraintPrefix, TemporalInstant instant, String cf, String cq) { - this.queryMode = true; // query mode - this.storeKey = null; - this.statement = null; - this.constraintPrefix = constraintPrefix; - this.instant = instant; - this.cf = new Text(cf); - this.cq = new Text(cq); - } - - /** - * this is the value to index. - * @return - */ - public Value getValue() { - assert statement!=null; - return new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement))); - } - - public KeyParts(Statement statement, TemporalInstant instant2) { - this.queryMode = false; // store mode - this.storeKey = null; - this.constraintPrefix = null; - this.statement = statement; - this.instant = instant2; - this.cf = null; - this.cq = null; - } - - private KeyParts(Text keyText, Text cf, Text cq, Statement statement) { - this.queryMode = false; // store mode - this.constraintPrefix = null; - this.statement = statement; - this.instant = null; - this.storeKey = keyText; - this.cf = cf; - this.cq = cq; - } - - @Override - public Iterator<KeyParts> iterator() { - final String[] strategies = new String[] { - CQ_O_AT, CQ_S_P_AT, CQ_P_AT, CQ_S_AT - } ; // CQ_END? - assert !queryMode : "iterator for queryMode is not immplemented" ; - if (queryMode) - return null; - - // if (!queryMode) - return new Iterator<KeyParts>() { - int nextStrategy = 0; - - @Override - public boolean hasNext() { - return nextStrategy < strategies.length; - } - - @Override - public KeyParts next() { - assert(statement!=null); - Text keyText = new Text(); - // increment++ the next strategy AFTER getting the value - switch (nextStrategy++) { - case 0: // index o+hash(p+s) - assert (CQ_O_AT.equals(strategies[0])); - keyText = new Text(instant.getAsKeyBytes()); - KeyParts.appendUniqueness(statement, keyText); - return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_O_AT), statement); - case 1:// index hash(s+p)+o - assert (CQ_S_P_AT.equals(strategies[1])); - KeyParts.appendSubjectPredicate(statement, keyText); - KeyParts.appendInstant(instant, keyText); - // appendUniqueness -- Not needed since it is already unique. - return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_S_P_AT), statement); - case 2: // index hash(p)+o - assert (CQ_P_AT.equals(strategies[2])); - KeyParts.appendPredicate(statement, keyText); - KeyParts.appendInstant(instant, keyText); - KeyParts.appendUniqueness(statement, keyText); - return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_P_AT), statement); - case 3: // index hash(s)+o - assert (CQ_S_AT.equals(strategies[3])); - KeyParts.appendSubject(statement, keyText); - KeyParts.appendInstant(instant, keyText); - KeyParts.appendUniqueness(statement, keyText); - return new KeyParts(keyText, new Text(StatementSerializer.writeContext(statement)), new Text(CQ_S_AT), statement); - } - throw new Error("Next passed end? No such nextStrategy="+(nextStrategy-1)); - - } - - @Override - public void remove() { - throw new Error("Remove not Implemented."); - } - }; - } - - public byte[] getStoreKey() { - assert !queryMode : "must be in store Mode, store keys are not initialized."; - return this.storeKey.copyBytes(); - } - - /** - * Query key is the prefix plus the datetime, but no uniqueness at the end. - * @return the row key for range queries. - */ - public Text getQueryKey() { - return getQueryKey(this.instant); - }; - - /** - * Query key is the prefix plus the datetime, but no uniqueness at the end. - * - * @return the row key for range queries. - */ - public Text getQueryKey(TemporalInstant theInstant) { - assert queryMode : "must be in query Mode, query keys are not initialized."; - Text keyText = new Text(); - if (constraintPrefix != null) - appendBytes(constraintPrefix.copyBytes(), keyText); - appendInstant(theInstant, keyText); - return keyText; - }; - - @Override - public String toString() { - return "KeyParts [contraintPrefix=" + toHumanString(constraintPrefix) + ", instant=" + toHumanString(instant.getAsKeyBytes()) + ", cf=" + cf + ", cq=" + cq + "]"; - } - private static void appendSubject(Statement statement, Text keyText) { - Value statementValue = new Value(StatementSerializer.writeSubject(statement).getBytes()); - byte[] hashOfValue = uniqueFromValueForKey(statementValue); - appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. - appendBytes(hashOfValue, keyText); - } - - private static void appendPredicate(Statement statement, Text keyText) { - Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writePredicate(statement))); - byte[] hashOfValue = uniqueFromValueForKey(statementValue); - appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. - appendBytes(hashOfValue, keyText); - } - - private static void appendInstant(TemporalInstant instant, Text keyText) { - byte[] bytes = instant.getAsKeyBytes(); - appendBytes(bytes, keyText); - } - - private static void appendSubjectPredicate(Statement statement, Text keyText) { - Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeSubjectPredicate(statement))); - byte[] hashOfValue = uniqueFromValueForKey(statementValue); - appendBytes(HASH_PREFIX, keyText); // prefix the hash with a zero byte. - appendBytes(hashOfValue, keyText); - } - - /** - * Append any byte array to a row key. - * @param bytes append this - * @param keyText text to append to - */ - private static void appendBytes(byte[] bytes, Text keyText) { - keyText.append(bytes, 0, bytes.length); - } - - /** - * Get a collision unlikely hash string and append to the key, - * so that if two keys have the same value, then they will be the same, - * if two different values that occur at the same time there keys are different. - * If the application uses a very large number of statements at the exact same time, - * the md5 value might be upgraded to for example sha-1 to avoid collisions. - * @param statement - * @param keyText - */ - public static void appendUniqueness(Statement statement, Text keyText) { - keyText.append(HASH_PREFIX, 0, 1); // delimiter - Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement))); - byte[] hashOfValue = Md5Hash.md5Binary(statementValue); - keyText.append(hashOfValue, 0, hashOfValue.length); - } - /** - * Get a collision unlikely hash string to append to the key, - * so that if two keys have the same value, then they will be the same, - * if two different values that occur at the same time there keys are different. - * @param value - * @return - */ - private static byte[] uniqueFromValueForKey(Value value) { - return Md5Hash.md5Binary(value); - } - - /** - * List all the index keys to find for any query. Set the strategy via the column qualifier, ex: CQ_S_P_AT. - * Column Family (CF) is the context/named-graph. - * @param queryInstant - * @param contraints - * @return - */ - static public List<KeyParts> keyPartsForQuery(TemporalInstant queryInstant, StatementContraints contraints) { - List<KeyParts> keys = new LinkedList<KeyParts>(); - URI urlNull = new URIImpl("urn:null"); - Resource currentContext = contraints.getContext(); - boolean hasSubj = contraints.hasSubject(); - if (contraints.hasPredicates()) { - for (URI nextPredicate : contraints.getPredicates()) { - Text contraintPrefix = new Text(); - Statement statement = new ContextStatementImpl(hasSubj ? contraints.getSubject() : urlNull, nextPredicate, urlNull, contraints.getContext()); - if (hasSubj) - appendSubjectPredicate(statement, contraintPrefix); - else - appendPredicate(statement, contraintPrefix); - keys.add(new KeyParts(contraintPrefix, queryInstant, (currentContext==null)?"":currentContext.toString(), hasSubj?CQ_S_P_AT:CQ_P_AT )); - } - } - else if (contraints.hasSubject()) { // and no predicates - Text contraintPrefix = new Text(); - Statement statement = new StatementImpl(contraints.getSubject(), urlNull, urlNull); - appendSubject(statement, contraintPrefix); - keys.add( new KeyParts(contraintPrefix, queryInstant, (currentContext==null)?"":currentContext.toString(), CQ_S_AT) ); - } - else { - // No constraints except possibly a context/named-graph, handled by the CF - keys.add( new KeyParts(null, queryInstant, (currentContext==null)?"":currentContext.toString(), CQ_O_AT) ); - } - return keys; - } - /** - * convert a non-utf8 byte[] and text and value to string and show unprintable bytes as {xx} where x is hex. - * @param value - * @return Human readable representation. - */ - public static String toHumanString(Value value) { - return toHumanString(value==null?null:value.get()); - } - public static String toHumanString(Text text) { - return toHumanString(text==null?null:text.copyBytes()); - } - public static String toHumanString(byte[] bytes) { - if (bytes==null) - return "{null}"; - StringBuilder sb = new StringBuilder(); - for (byte b : bytes) { - if ((b > 0x7e) || (b < 32)) { - sb.append("{"); - sb.append(Integer.toHexString( b & 0xff )); // Lop off the sign extended ones. - sb.append("}"); - } else if (b == '{'||b == '}') { // Escape the literal braces. - sb.append("{"); - sb.append((char)b); - sb.append("}"); - } else - sb.append((char)b); - } - return sb.toString(); - } - - }
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/PrecompQueryIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/PrecompQueryIndexer.java b/extras/indexing/src/main/java/mvm/rya/indexing/PrecompQueryIndexer.java deleted file mode 100644 index 1aecd98..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/PrecompQueryIndexer.java +++ /dev/null @@ -1,63 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import info.aduna.iteration.CloseableIteration; - -import java.io.Closeable; -import java.io.Flushable; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -import mvm.rya.indexing.external.tupleSet.AccumuloIndexSet.AccValueFactory; - -import org.apache.accumulo.core.client.TableNotFoundException; -import org.openrdf.model.Value; -import org.openrdf.query.BindingSet; -import org.openrdf.query.QueryEvaluationException; - - - -public interface PrecompQueryIndexer extends Closeable, Flushable { - - - public abstract void storeBindingSet(BindingSet bs) throws IOException ; - - public abstract void storeBindingSets(Collection<BindingSet> bindingSets) - throws IOException, IllegalArgumentException; - - - public abstract CloseableIteration<BindingSet, QueryEvaluationException> queryPrecompJoin(List<String> varOrder, - String localityGroup, Map<String, AccValueFactory> bindings, Map<String,Value> valMap, - Collection<BindingSet> constraints) throws QueryEvaluationException,TableNotFoundException; - - - - @Override - public abstract void flush() throws IOException; - - @Override - public abstract void close() throws IOException; -} - - http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/RyaSailFactory.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/RyaSailFactory.java b/extras/indexing/src/main/java/mvm/rya/indexing/RyaSailFactory.java deleted file mode 100644 index 646aab0..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/RyaSailFactory.java +++ /dev/null @@ -1,84 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import mvm.rya.accumulo.AccumuloRdfConfiguration; -import mvm.rya.accumulo.AccumuloRyaDAO; -import mvm.rya.api.RdfCloudTripleStoreConfiguration; -import mvm.rya.api.persist.RyaDAOException; -import mvm.rya.indexing.accumulo.ConfigUtils; -import mvm.rya.mongodb.MongoDBRdfConfiguration; -import mvm.rya.mongodb.MongoDBRyaDAO; -import mvm.rya.rdftriplestore.RdfCloudTripleStore; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.accumulo.core.client.Connector; -import org.apache.hadoop.conf.Configuration; -import org.openrdf.sail.Sail; - -public class RyaSailFactory { - - - - public static Sail getInstance(Configuration conf) throws AccumuloException, - AccumuloSecurityException, RyaDAOException { - - return getRyaSail(conf); - } - - - - private static Sail getRyaSail(Configuration config) throws AccumuloException, AccumuloSecurityException, RyaDAOException { - - RdfCloudTripleStore store = new RdfCloudTripleStore(); - if (ConfigUtils.getUseMongo(config)) { - MongoDBRdfConfiguration conf = new MongoDBRdfConfiguration(config); - conf.setTablePrefix(config.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX)); - ConfigUtils.setIndexers(conf); - - MongoDBRyaDAO crdfdao = new MongoDBRyaDAO(conf); - crdfdao.init(); - - conf.setDisplayQueryPlan(true); - store.setRyaDAO(crdfdao); - } else { - Connector connector = ConfigUtils.getConnector(config); - AccumuloRyaDAO crdfdao = new AccumuloRyaDAO(); - crdfdao.setConnector(connector); - - AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration(config); - conf.setTablePrefix(config.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX)); // sets - // TablePrefixLayoutStrategy - ConfigUtils.setIndexers(conf); - conf.setDisplayQueryPlan(true); - - crdfdao.setConf(conf); - crdfdao.init(); - store.setRyaDAO(crdfdao); - } - - return store; - } - - - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunction.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunction.java b/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunction.java deleted file mode 100644 index ce94556..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunction.java +++ /dev/null @@ -1,45 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import info.aduna.iteration.CloseableIteration; -import org.openrdf.model.Statement; -import org.openrdf.query.QueryEvaluationException; - -/** - * A function used to perform a search. - */ -public interface SearchFunction { - - /** - * Search the indices for the given terms and return {@link Statement}s that meet the {@link StatementContraints} - * - * @param searchTerms - * the search terms - * @param contraints - * the constraints on the returned {@link Statement}s - * @return - * @throws QueryEvaluationException - */ - public abstract CloseableIteration<Statement, QueryEvaluationException> performSearch(String searchTerms, StatementContraints contraints) - throws QueryEvaluationException; - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunctionFactory.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunctionFactory.java b/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunctionFactory.java deleted file mode 100644 index 719cc2f..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/SearchFunctionFactory.java +++ /dev/null @@ -1,71 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import java.util.Map; - -import org.apache.log4j.Logger; -import org.openrdf.model.URI; -import org.openrdf.query.QueryEvaluationException; - -import com.google.common.collect.Maps; - -public abstract class SearchFunctionFactory { - - private static final Logger logger = Logger.getLogger(SearchFunctionFactory.class); - - private final Map<URI, SearchFunction> SEARCH_FUNCTION_MAP = Maps.newHashMap(); - - - /** - * Get a {@link GeoSearchFunction} for a give URI. - * - * @param searchFunction - * @return - */ - public SearchFunction getSearchFunction(final URI searchFunction) { - - SearchFunction geoFunc = null; - - try { - geoFunc = getSearchFunctionInternal(searchFunction); - } catch (QueryEvaluationException e) { - e.printStackTrace(); - } - - return geoFunc; - } - - private SearchFunction getSearchFunctionInternal(final URI searchFunction) throws QueryEvaluationException { - SearchFunction sf = SEARCH_FUNCTION_MAP.get(searchFunction); - - if (sf != null) { - return sf; - } else { - throw new QueryEvaluationException("Unknown Search Function: " + searchFunction.stringValue()); - } - - - } - - -} - http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/StatementContraints.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/StatementContraints.java b/extras/indexing/src/main/java/mvm/rya/indexing/StatementContraints.java deleted file mode 100644 index 437c74d..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/StatementContraints.java +++ /dev/null @@ -1,73 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import java.util.Set; - -import org.openrdf.model.Resource; -import org.openrdf.model.URI; - -public class StatementContraints { - private Resource context = null; - private Resource subject = null; - private Set<URI> predicates = null; - - public StatementContraints setContext(Resource context) { - this.context = context; - return this; - } - - public StatementContraints setPredicates(Set<URI> predicates) { - this.predicates = predicates; - return this; - } - - public StatementContraints setSubject(Resource subject) { - this.subject = subject; - return this; - } - - public Resource getContext() { - return context; - } - - public Set<URI> getPredicates() { - return predicates; - } - - public Resource getSubject() { - return subject; - } - - public boolean hasSubject() { - return subject != null; - } - - public boolean hasPredicates() { - return predicates != null && !predicates.isEmpty(); - } - - public boolean hasContext() { - return context != null; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/TemporalIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/TemporalIndexer.java b/extras/indexing/src/main/java/mvm/rya/indexing/TemporalIndexer.java deleted file mode 100644 index be06e25..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/TemporalIndexer.java +++ /dev/null @@ -1,183 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import info.aduna.iteration.CloseableIteration; - -import java.io.IOException; -import java.util.Set; - -import mvm.rya.api.persist.index.RyaSecondaryIndexer; - -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.query.QueryEvaluationException; - -/** - * A repository to store, index, and retrieve {@link Statement}s based on time. - * Instants: - * Instant {before, equals, after} Instant - * Instant {before, after, inside} Interval - * Instant {hasBeginning, hasEnd} Interval - * - * OWL-Time provides the interval relations: - * <pre> - * intervalEquals, - * intervalBefore, - * intervalMeets, - * intervalOverlaps, - * intervalStarts, - * intervalDuring, - * intervalFinishes, - * - * and their reverse interval relations: - * intervalAfter, - * intervalMetBy, - * intervalOverlappedBy, - * intervalStartedBy, - * intervalContains, - * intervalFinishedBy. - * - * from Allen paper in 1983 - * - * Relation Y Symbol Inverse Y - * before Y < > X - * equal Y = = X - * meets Y m mi X - * overlaps Y o oi X - * during Y d di X - * starts Y s si X - * finishes Y f fi X - * </pre> - * - */ - -public interface TemporalIndexer extends RyaSecondaryIndexer { - - /* consider ParseException here */ - - /*- - * - * And Now, what you you've all been waiting for, the queries: - * the instant versions: - * format: x {relation} y - * read: Given literal y, find all statements where the date object x is ( x relation y ) - * Instant {before, equals, after} Instant - * Instant {before, after, inside} Interval - * Instant {hasBeginning, hasEnd} Interval - * - * the Allen interval relations, as described above. - * intervalEquals, - * intervalBefore, - * intervalMeets, - * intervalOverlaps, - * intervalStarts, - * intervalDuring, - * intervalFinishes - * and then the inverses, including after. - */ - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantEqualsInstant( - TemporalInstant queryInstant, StatementContraints contraints) - throws QueryEvaluationException;; - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInstant( - TemporalInstant queryInstant, StatementContraints contraints) - throws QueryEvaluationException;; - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInstant( - TemporalInstant queryInstant, StatementContraints contraints) - throws QueryEvaluationException;; - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInterval( - TemporalInterval givenInterval, StatementContraints contraints) - throws QueryEvaluationException;; - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInterval( - TemporalInterval givenInterval, StatementContraints contraints) - throws QueryEvaluationException; - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantInsideInterval( - TemporalInterval givenInterval, StatementContraints contraints) - throws QueryEvaluationException; - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantHasBeginningInterval( - TemporalInterval queryInterval, StatementContraints contraints) - throws QueryEvaluationException; - - public abstract CloseableIteration<Statement, QueryEvaluationException> queryInstantHasEndInterval( - TemporalInterval queryInterval, StatementContraints contraints) - throws QueryEvaluationException; - - /** - * Returns statements that contain a time instance that is equal to the - * queried time and meet the {@link StatementContraints}. - * - * @param query - * the queried time instance - * @param contraints - * the {@link StatementContraints} - * @return - * @throws QueryEvaluationException - */ - public abstract CloseableIteration<Statement, QueryEvaluationException> queryIntervalEquals( - TemporalInterval query, StatementContraints contraints) - throws QueryEvaluationException; - - /** - * Returns statements that contain a time instances that are before the - * queried {@link TemporalInterval} and meet the {@link StatementContraints} - * - * @param query - * the queried time instance - * @param contraints - * the {@link StatementContraints} - * @return - */ - public abstract CloseableIteration<Statement, QueryEvaluationException> queryIntervalBefore( - TemporalInterval query, StatementContraints contraints) - throws QueryEvaluationException; - - /** - * Returns statements that contain a time instance that is after the queried {@link TemporalInterval} and meet the {@link StatementContraints}. - * - * @param query - * the queried time instance - * @param contraints - * the {@link StatementContraints} - * @return - */ - public abstract CloseableIteration<Statement, QueryEvaluationException> queryIntervalAfter( - TemporalInterval query, StatementContraints contraints) - throws QueryEvaluationException; - - /* End of the Allen algebra queries */ - /** - * @return the set of predicates indexed by the indexer. - */ - public abstract Set<URI> getIndexablePredicates(); - - @Override - public abstract void flush() throws IOException; - - @Override - public abstract void close() throws IOException; -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInstant.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInstant.java b/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInstant.java deleted file mode 100644 index f4e6d95..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInstant.java +++ /dev/null @@ -1,83 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import java.io.Serializable; - -import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; - -/** - * Time and date interface for building intervals. - * - *Implementations: - * Implementation should have a factory method for TemporalInterval since TemporalIntervals reference only this - * interface for begin & end, so it injects an implementation. - * public static TemporalInterval parseInterval(String dateTimeInterval) - * - * The following are notes and may not have been implemented. - * - * = rfc3339 - *https://www.ietf.org/rfc/rfc3339.txt - * a subset of ISO-8601 - * YYYY-MM-DDThh:mm:ss.fffZ - * Limits: - *All dates and times are assumed to be in the "current era", - somewhere between 0000AD and 9999AD. - * resolution: to the second, or millisecond if the optional fraction is used. - * - * = epoch - * 32bit or 64bit integer specifying the number of seconds since a standard date-time (1970) - * 32bit is good until 2038. - * 64bit is good until after the heat death of our universe - * - */ -public interface TemporalInstant extends Comparable<TemporalInstant>, Serializable { - @Override - public boolean equals(Object obj) ; - - @Override - public int compareTo(TemporalInstant o) ; - - @Override - public int hashCode() ; - /** - * Get the date as a byte array. - */ - public byte[] getAsKeyBytes(); - /** - * Get the date as a String. - */ - public String getAsKeyString(); - /** - * Get the date as a human readable for reporting with timeZone. - */ - public String getAsReadable(DateTimeZone tz); - /** - * Get the date as a human readable for reporting, timeZone is implementation specific. - */ - public String getAsReadable(); - /** - * Get the date as a Joda/Java v8 DateTime. - */ - public DateTime getAsDateTime(); - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInterval.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInterval.java b/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInterval.java deleted file mode 100644 index b23b99c..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/TemporalInterval.java +++ /dev/null @@ -1,181 +0,0 @@ -package mvm.rya.indexing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -import java.io.UnsupportedEncodingException; - -/** - * A time with beginning and end date and time, which could be indefinitely in - * the past or future. Immutable, so it's thread safe. For use in reading and - * writing from Rya's temporal indexing scheme. - * - */ -public class TemporalInterval implements Comparable<TemporalInterval> { - - // the beginning and end. Read-only because they are final references to immutable objects. - private final TemporalInstant hasBeginning; - private final TemporalInstant hasEnd; - - /** - * Separate the beginning and end with this. - * Used because Joda time library's interval uses this. - * TODO: Move this down to the TemporalInterval implementation. - * TODO: Then add a TemporalInterval.keyConcatenate(). - */ - public static final String DELIMITER = "/"; - -// /** -// * Empty constructor -- not allowed, no defaults. -// * For an infinite span of time: do it like this: -// * new TemporalInterval(TemporalInstantImpl.getMinimum, TemporalInstantImpl.getMaximum) -// */ -// public TemporalInterval() { -// hasBeginning = null; -// hasEnd = null; -// } - - /** - * Constructor setting beginning and end with an implementation of {@link TemporalInstant}. - * beginning must be less than end. - * - * @param hasBeginning - * @param hasEnd - */ - public TemporalInterval(TemporalInstant hasBeginning, TemporalInstant hasEnd) { - super(); - if (hasBeginning != null && hasEnd != null && 0 < hasBeginning.compareTo(hasEnd)) - throw new IllegalArgumentException("The Beginning instance must not compare greater than the end."); - this.hasBeginning = hasBeginning; - this.hasEnd = hasEnd; - } - - /** - * @return the hasBeginning - */ - public TemporalInstant getHasBeginning() { - return hasBeginning; - } - - /** - * @return the hasEnd - */ - public TemporalInstant getHasEnd() { - return hasEnd; - } - - /** - * True if CompareTo() says equal (0) - */ - @Override - public boolean equals(Object other) { - return other instanceof TemporalInterval - && this.compareTo((TemporalInterval) other) == 0; - }; - - /** - * Compare beginnings, if the same then compare ends, or equal if beginnings equal and endings equal. - * Nulls represent infinity. - */ - @Override - public int compareTo(TemporalInterval other) { - int compBegins = this.hasBeginning.compareTo(other.hasBeginning); - if (0 == compBegins) - return this.hasEnd.compareTo(other.hasEnd); - else - return compBegins; - - } - - /** - * Hashcode for - */ - @Override - public int hashCode() { - if (hasBeginning == null) - if (hasEnd == null) - return 0; - else - return hasEnd.hashCode(); - else - return hashboth(this.hasBeginning.hashCode(), - this.hasEnd.hashCode()); - } - - /** - * Hashcode combining two string hashcodes. - */ - protected static int hashboth(int i1, int i2) { - // return (int) (( 1L * i1 * i2) ; % (1L + Integer.MAX_VALUE)); - // let the overflow happen. It won't throw an error. - return (i1 + i2); - } - - /** - * Get the key use for rowid for the beginning of the interval. Use ascii - * for conversion to catch and prevent multi-byte chars. - * - * @return - */ - public byte[] getAsKeyBeginning() { - try { - return (hasBeginning.getAsKeyString() + DELIMITER + hasEnd - .getAsKeyString()).getBytes("US-ASCII"); - } catch (UnsupportedEncodingException e) { - // this is a code error, the strings are mostly numbers. - throw new Error("while converting key string to ascii bytes", e); - } - } - - /** - * get the key used for indexing the end of the interval. Use ascii for - * conversion to catch and prevent multi-byte chars. - * - * @return - */ - public byte[] getAsKeyEnd() { - try { - return (hasEnd.getAsKeyString() + DELIMITER + hasBeginning - .getAsKeyString()).getBytes("US-ASCII"); - } catch (UnsupportedEncodingException e) { - // this is a code error, the strings are mostly numbers and ascii - // symbols. - throw new Error("while converting key string to ascii bytes", e); - } - } - - /** - * Format as a "period" in this paper. This is not a standard, really. - * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.298.8948&rep=rep1&type=pdf - * also consider using the typed literal syntax: - * "[2010-01-01,2010-01-31]"^^xs:period - * @return [begindate,enddate] for example: [2010-01-01,2010-01-31] - * - */ - public String getAsPair() { - return "["+hasBeginning.getAsReadable() + "," + hasEnd.getAsReadable() + "]"; - } - - @Override - public String toString() { - return getAsPair() ; - // return hasBeginning.getAsReadable() + DELIMITER + hasEnd.getAsReadable(); - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java deleted file mode 100644 index ae16062..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java +++ /dev/null @@ -1,424 +0,0 @@ -package mvm.rya.indexing.accumulo; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import mvm.rya.accumulo.AccumuloRdfConfiguration; -import mvm.rya.api.RdfCloudTripleStoreConfiguration; -import mvm.rya.indexing.FilterFunctionOptimizer; -import mvm.rya.indexing.accumulo.entity.EntityCentricIndex; -import mvm.rya.indexing.accumulo.entity.EntityOptimizer; -import mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer; -import mvm.rya.indexing.accumulo.freetext.LuceneTokenizer; -import mvm.rya.indexing.accumulo.freetext.Tokenizer; -import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; -import mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer; -import mvm.rya.indexing.external.PrecompJoinOptimizer; -import mvm.rya.indexing.mongodb.MongoGeoIndexer; - -import org.apache.accumulo.core.client.AccumuloException; -import org.apache.accumulo.core.client.AccumuloSecurityException; -import org.apache.accumulo.core.client.BatchScanner; -import org.apache.accumulo.core.client.BatchWriter; -import org.apache.accumulo.core.client.Connector; -import org.apache.accumulo.core.client.Instance; -import org.apache.accumulo.core.client.MultiTableBatchWriter; -import org.apache.accumulo.core.client.Scanner; -import org.apache.accumulo.core.client.TableExistsException; -import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.ZooKeeperInstance; -import org.apache.accumulo.core.client.admin.TableOperations; -import org.apache.accumulo.core.client.mock.MockInstance; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.commons.lang.Validate; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.log4j.Logger; -import org.openrdf.model.URI; -import org.openrdf.model.impl.URIImpl; - -import com.google.common.collect.Lists; - -/** - * A set of configuration utils to read a Hadoop {@link Configuration} object and create Cloudbase/Accumulo objects. - */ -public class ConfigUtils { - private static final Logger logger = Logger.getLogger(ConfigUtils.class); - - public static final String CLOUDBASE_TBL_PREFIX = "sc.cloudbase.tableprefix"; - public static final String CLOUDBASE_AUTHS = "sc.cloudbase.authorizations"; - public static final String CLOUDBASE_INSTANCE = "sc.cloudbase.instancename"; - public static final String CLOUDBASE_ZOOKEEPERS = "sc.cloudbase.zookeepers"; - public static final String CLOUDBASE_USER = "sc.cloudbase.username"; - public static final String CLOUDBASE_PASSWORD = "sc.cloudbase.password"; - - public static final String CLOUDBASE_WRITER_MAX_WRITE_THREADS = "sc.cloudbase.writer.maxwritethreads"; - public static final String CLOUDBASE_WRITER_MAX_LATENCY = "sc.cloudbase.writer.maxlatency"; - public static final String CLOUDBASE_WRITER_MAX_MEMORY = "sc.cloudbase.writer.maxmemory"; - - public static final String FREE_TEXT_QUERY_TERM_LIMIT = "sc.freetext.querytermlimit"; - - public static final String FREE_TEXT_DOC_TABLENAME = "sc.freetext.doctable"; - public static final String FREE_TEXT_TERM_TABLENAME = "sc.freetext.termtable"; - public static final String GEO_TABLENAME = "sc.geo.table"; - public static final String GEO_NUM_PARTITIONS = "sc.geo.numPartitions"; - public static final String TEMPORAL_TABLENAME = "sc.temporal.index"; - public static final String ENTITY_TABLENAME = "sc.entity.index"; - - public static final String USE_GEO = "sc.use_geo"; - public static final String USE_FREETEXT = "sc.use_freetext"; - public static final String USE_TEMPORAL = "sc.use_temporal"; - public static final String USE_ENTITY = "sc.use_entity"; - public static final String USE_PCJ = "sc.use_pcj"; - public static final String USE_OPTIMAL_PCJ = "sc.use.optimal.pcj"; - - public static final String USE_INDEXING_SAIL = "sc.use.indexing.sail"; - public static final String USE_EXTERNAL_SAIL = "sc.use.external.sail"; - - public static final String USE_MOCK_INSTANCE = ".useMockInstance"; - - public static final String NUM_PARTITIONS = "sc.cloudbase.numPartitions"; - - private static final int WRITER_MAX_WRITE_THREADS = 1; - private static final long WRITER_MAX_LATNECY = Long.MAX_VALUE; - private static final long WRITER_MAX_MEMORY = 10000L; - - public static final String DISPLAY_QUERY_PLAN = "query.printqueryplan"; - - public static final String FREETEXT_PREDICATES_LIST = "sc.freetext.predicates"; - public static final String FREETEXT_DOC_NUM_PARTITIONS = "sc.freetext.numPartitions.text"; - public static final String FREETEXT_TERM_NUM_PARTITIONS = "sc.freetext.numPartitions.term"; - - public static final String TOKENIZER_CLASS = "sc.freetext.tokenizer.class"; - - public static final String GEO_PREDICATES_LIST = "sc.geo.predicates"; - - public static final String TEMPORAL_PREDICATES_LIST = "sc.temporal.predicates"; - - public static final String USE_MONGO = "sc.useMongo"; - - public static boolean isDisplayQueryPlan(Configuration conf){ - return conf.getBoolean(DISPLAY_QUERY_PLAN, false); - } - - /** - * get a value from the configuration file and throw an exception if the value does not exist. - * - * @param conf - * @param key - * @return - */ - private static String getStringCheckSet(Configuration conf, String key) { - String value = conf.get(key); - Validate.notNull(value, key + " not set"); - return value; - } - - /** - * @param conf - * @param tablename - * @return if the table was created - * @throws AccumuloException - * @throws AccumuloSecurityException - * @throws TableExistsException - */ - public static boolean createTableIfNotExists(Configuration conf, String tablename) throws AccumuloException, AccumuloSecurityException, - TableExistsException { - TableOperations tops = getConnector(conf).tableOperations(); - if (!tops.exists(tablename)) { - logger.info("Creating table: " + tablename); - tops.create(tablename); - return true; - } - return false; - } - - private static String getIndexTableName(Configuration conf, String indexTableNameConf, String altSuffix){ - String value = conf.get(indexTableNameConf); - if (value == null){ - String defaultTableName = conf.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX); - Validate.notNull(defaultTableName, indexTableNameConf + " not set and " + RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX + " not set. Cannot generate table name."); - value = conf.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX) + altSuffix; - } - return value; - } - - public static String getFreeTextDocTablename(Configuration conf) { - return getIndexTableName(conf, FREE_TEXT_DOC_TABLENAME, "freetext"); - } - - public static String getFreeTextTermTablename(Configuration conf) { - return getIndexTableName(conf, FREE_TEXT_TERM_TABLENAME, "freetext_term"); - } - - public static int getFreeTextTermLimit(Configuration conf) { - return conf.getInt(FREE_TEXT_QUERY_TERM_LIMIT, 100); - } - - public static String getGeoTablename(Configuration conf) { - return getIndexTableName(conf, GEO_TABLENAME, "geo"); - } - - public static String getTemporalTableName(Configuration conf) { - return getIndexTableName(conf, TEMPORAL_TABLENAME, "temporal"); - } - - - public static String getEntityTableName(Configuration conf) { - return getIndexTableName(conf, ENTITY_TABLENAME, "entity"); - } - - - public static Set<URI> getFreeTextPredicates(Configuration conf) { - return getPredicates(conf, FREETEXT_PREDICATES_LIST); - } - - public static Set<URI> getGeoPredicates(Configuration conf) { - return getPredicates(conf, GEO_PREDICATES_LIST); - } - /** - * Used for indexing statements about date & time instances and intervals. - * @param conf - * @return Set of predicate URI's whose objects should be date time literals. - */ - public static Set<URI> getTemporalPredicates(Configuration conf) { - return getPredicates(conf, TEMPORAL_PREDICATES_LIST); - } - - private static Set<URI> getPredicates(Configuration conf, String confName) { - String[] validPredicateStrings = conf.getStrings(confName, new String[] {}); - Set<URI> predicates = new HashSet<URI>(); - for (String prediateString : validPredicateStrings) { - predicates.add(new URIImpl(prediateString)); - } - return predicates; - } - - public static Tokenizer getFreeTextTokenizer(Configuration conf) { - Class<? extends Tokenizer> c = conf.getClass(TOKENIZER_CLASS, LuceneTokenizer.class, Tokenizer.class); - return ReflectionUtils.newInstance(c, conf); - } - - public static BatchWriter createDefaultBatchWriter(String tablename, Configuration conf) throws TableNotFoundException, - AccumuloException, AccumuloSecurityException { - Long DEFAULT_MAX_MEMORY = getWriterMaxMemory(conf); - Long DEFAULT_MAX_LATENCY = getWriterMaxLatency(conf); - Integer DEFAULT_MAX_WRITE_THREADS = getWriterMaxWriteThreads(conf); - Connector connector = ConfigUtils.getConnector(conf); - return connector.createBatchWriter(tablename, DEFAULT_MAX_MEMORY, DEFAULT_MAX_LATENCY, DEFAULT_MAX_WRITE_THREADS); - } - - public static MultiTableBatchWriter createMultitableBatchWriter(Configuration conf) throws AccumuloException, AccumuloSecurityException { - Long DEFAULT_MAX_MEMORY = getWriterMaxMemory(conf); - Long DEFAULT_MAX_LATENCY = getWriterMaxLatency(conf); - Integer DEFAULT_MAX_WRITE_THREADS = getWriterMaxWriteThreads(conf); - Connector connector = ConfigUtils.getConnector(conf); - return connector.createMultiTableBatchWriter(DEFAULT_MAX_MEMORY, DEFAULT_MAX_LATENCY, DEFAULT_MAX_WRITE_THREADS); - } - - public static Scanner createScanner(String tablename, Configuration conf) throws AccumuloException, AccumuloSecurityException, - TableNotFoundException { - Connector connector = ConfigUtils.getConnector(conf); - Authorizations auths = ConfigUtils.getAuthorizations(conf); - return connector.createScanner(tablename, auths); - - } - - public static BatchScanner createBatchScanner(String tablename, Configuration conf) throws AccumuloException, AccumuloSecurityException, - TableNotFoundException { - Connector connector = ConfigUtils.getConnector(conf); - Authorizations auths = ConfigUtils.getAuthorizations(conf); - Integer numThreads = null; - if (conf instanceof RdfCloudTripleStoreConfiguration) - numThreads = ((RdfCloudTripleStoreConfiguration) conf).getNumThreads(); - else - numThreads = conf.getInt(RdfCloudTripleStoreConfiguration.CONF_NUM_THREADS, 2); - return connector.createBatchScanner(tablename, auths, numThreads); - } - - public static int getWriterMaxWriteThreads(Configuration conf) { - return conf.getInt(CLOUDBASE_WRITER_MAX_WRITE_THREADS, WRITER_MAX_WRITE_THREADS); - } - - public static long getWriterMaxLatency(Configuration conf) { - return conf.getLong(CLOUDBASE_WRITER_MAX_LATENCY, WRITER_MAX_LATNECY); - } - - public static long getWriterMaxMemory(Configuration conf) { - return conf.getLong(CLOUDBASE_WRITER_MAX_MEMORY, WRITER_MAX_MEMORY); - } - - public static String getUsername(JobContext job) { - return getUsername(job.getConfiguration()); - } - - public static String getUsername(Configuration conf) { - return conf.get(CLOUDBASE_USER); - } - - public static Authorizations getAuthorizations(JobContext job) { - return getAuthorizations(job.getConfiguration()); - } - - public static Authorizations getAuthorizations(Configuration conf) { - String authString = conf.get(CLOUDBASE_AUTHS, ""); - if (authString.isEmpty()) { - return new Authorizations(); - } - return new Authorizations(authString.split(",")); - } - - public static Instance getInstance(JobContext job) { - return getInstance(job.getConfiguration()); - } - - public static Instance getInstance(Configuration conf) { - if (useMockInstance(conf)) { - return new MockInstance(conf.get(CLOUDBASE_INSTANCE)); - } - return new ZooKeeperInstance(conf.get(CLOUDBASE_INSTANCE), conf.get(CLOUDBASE_ZOOKEEPERS)); - } - - public static String getPassword(JobContext job) { - return getPassword(job.getConfiguration()); - } - - public static String getPassword(Configuration conf) { - return conf.get(CLOUDBASE_PASSWORD, ""); - } - - public static Connector getConnector(JobContext job) throws AccumuloException, AccumuloSecurityException { - return getConnector(job.getConfiguration()); - } - - public static Connector getConnector(Configuration conf) throws AccumuloException, AccumuloSecurityException { - Instance instance = ConfigUtils.getInstance(conf); - - return instance.getConnector(getUsername(conf), getPassword(conf)); - } - - public static boolean useMockInstance(Configuration conf) { - return conf.getBoolean(USE_MOCK_INSTANCE, false); - } - - private static int getNumPartitions(Configuration conf) { - return conf.getInt(NUM_PARTITIONS, 25); - } - - public static int getFreeTextDocNumPartitions(Configuration conf) { - return conf.getInt(FREETEXT_DOC_NUM_PARTITIONS, getNumPartitions(conf)); - } - - public static int getFreeTextTermNumPartitions(Configuration conf) { - return conf.getInt(FREETEXT_TERM_NUM_PARTITIONS, getNumPartitions(conf)); - } - - public static int getGeoNumPartitions(Configuration conf) { - return conf.getInt(GEO_NUM_PARTITIONS, getNumPartitions(conf)); - } - - public static boolean getUseGeo(Configuration conf) { - return conf.getBoolean(USE_GEO, false); - } - - public static boolean getUseFreeText(Configuration conf) { - return conf.getBoolean(USE_FREETEXT, false); - } - - public static boolean getUseTemporal(Configuration conf) { - return conf.getBoolean(USE_TEMPORAL, false); - } - - public static boolean getUseEntity(Configuration conf) { - return conf.getBoolean(USE_ENTITY, false); - } - - public static boolean getUsePCJ(Configuration conf) { - return conf.getBoolean(USE_PCJ, false); - } - - public static boolean getUseOptimalPCJ(Configuration conf) { - return conf.getBoolean(USE_OPTIMAL_PCJ, false); - } - - public static boolean getUseMongo(Configuration conf) { - return conf.getBoolean(USE_MONGO, false); - } - - - public static void setIndexers(RdfCloudTripleStoreConfiguration conf) { - - List<String> indexList = Lists.newArrayList(); - List<String> optimizers = Lists.newArrayList(); - - boolean useFilterIndex = false; - - if (ConfigUtils.getUseMongo(conf)) { - if (getUseGeo(conf)) { - indexList.add(MongoGeoIndexer.class.getName()); - useFilterIndex = true; - } - } else { - - if (getUsePCJ(conf) || getUseOptimalPCJ(conf)) { - conf.setPcjOptimizer(PrecompJoinOptimizer.class); - } - - if (getUseGeo(conf)) { - indexList.add(GeoMesaGeoIndexer.class.getName()); - useFilterIndex = true; - } - - if (getUseFreeText(conf)) { - indexList.add(AccumuloFreeTextIndexer.class.getName()); - useFilterIndex = true; - } - - if (getUseTemporal(conf)) { - indexList.add(AccumuloTemporalIndexer.class.getName()); - useFilterIndex = true; - } - - } - - if (useFilterIndex) { - optimizers.add(FilterFunctionOptimizer.class.getName()); - } - - if (getUseEntity(conf)) { - indexList.add(EntityCentricIndex.class.getName()); - optimizers.add(EntityOptimizer.class.getName()); - - } - - conf.setStrings(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS, indexList.toArray(new String[]{})); - conf.setStrings(AccumuloRdfConfiguration.CONF_OPTIMIZERS, optimizers.toArray(new String[]{})); - - } - - - -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/Md5Hash.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/Md5Hash.java b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/Md5Hash.java deleted file mode 100644 index 8fa3008..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/Md5Hash.java +++ /dev/null @@ -1,45 +0,0 @@ -package mvm.rya.indexing.accumulo; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import org.apache.accumulo.core.data.Value; -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.StringUtils; -import org.apache.commons.codec.digest.DigestUtils; - -/** - * Utility methods for generating hashes. Note that MD5 is 16 bytes, or 32 Hex chars. To make it smaller (but still printable), this class - * Base64 encodes those 16 bytes into 22 chars. - */ -public class Md5Hash { - public static String md5Base64(byte[] data) { - return Base64.encodeBase64URLSafeString(DigestUtils.md5(data)); - } - - public static String md5Base64(String string) { - return md5Base64(StringUtils.getBytesUtf8(string)); - } - - public static byte[] md5Binary(Value value) { - return DigestUtils.md5(value.get()); - } -} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/StatementSerializer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/StatementSerializer.java b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/StatementSerializer.java deleted file mode 100644 index f5d6d0e..0000000 --- a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/StatementSerializer.java +++ /dev/null @@ -1,227 +0,0 @@ -package mvm.rya.indexing.accumulo; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -import java.io.IOException; -import java.util.Set; - -import mvm.rya.indexing.StatementContraints; - -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.Validate; -import org.openrdf.model.Literal; -import org.openrdf.model.Resource; -import org.openrdf.model.Statement; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ContextStatementImpl; -import org.openrdf.model.impl.StatementImpl; -import org.openrdf.model.impl.ValueFactoryImpl; - -/** - * A set of Utilities to serialize {@link Statement}s to/from {@link String}s. - */ -public class StatementSerializer { - private static String SEP = "\u0000"; - - private static ValueFactory VALUE_FACTORY = new ValueFactoryImpl(); - - /** - * Read a {@link Statement} from a {@link String} - * - * @param in - * the {@link String} to parse - * @return a {@link Statement} - */ - public static Statement readStatement(String in) throws IOException { - String[] parts = in.split(SEP); - - if (parts.length != 4) { - throw new IOException("Not a valid statement: " + in); - } - - String contextString = parts[0]; - String subjectString = parts[1]; - String predicateString = parts[2]; - String objectString = parts[3]; - return readStatement(subjectString, predicateString, objectString, contextString); - } - - public static Statement readStatement(String subjectString, String predicateString, String objectString) { - return readStatement(subjectString, predicateString, objectString, ""); - } - - public static Statement readStatement(String subjectString, String predicateString, String objectString, String contextString) { - Resource subject = createResource(subjectString); - URI predicate = VALUE_FACTORY.createURI(predicateString); - - boolean isObjectLiteral = objectString.startsWith("\""); - - Value object = null; - if (isObjectLiteral) { - object = parseLiteral(objectString); - } else { - object = createResource(objectString); - } - - if (contextString == null || contextString.isEmpty()) { - return new StatementImpl(subject, predicate, object); - } else { - Resource context = VALUE_FACTORY.createURI(contextString); - return new ContextStatementImpl(subject, predicate, object, context); - } - } - - private static Resource createResource(String str) { - if (str.startsWith("_")) { - return VALUE_FACTORY.createBNode(str.substring(2)); - } - return VALUE_FACTORY.createURI(str); - - } - - private static Literal parseLiteral(String fullLiteralString) { - Validate.notNull(fullLiteralString); - Validate.isTrue(fullLiteralString.length() > 1); - - if (fullLiteralString.endsWith("\"")) { - String fullLiteralWithoutQuotes = fullLiteralString.substring(1, fullLiteralString.length() - 1); - return VALUE_FACTORY.createLiteral(fullLiteralWithoutQuotes, (String) null); - } else { - - // find the closing quote - int labelEnd = fullLiteralString.lastIndexOf("\""); - - String label = fullLiteralString.substring(1, labelEnd); - - String data = fullLiteralString.substring(labelEnd + 1); - - if (data.startsWith("@")) { - // the data is "language" - String lang = data.substring(1); - return VALUE_FACTORY.createLiteral(label, lang); - } else if (data.startsWith("^^<")) { - // the data is a "datatype" - String datatype = data.substring(3, data.length() - 1); - URI datatypeUri = VALUE_FACTORY.createURI(datatype); - return VALUE_FACTORY.createLiteral(label, datatypeUri); - } - } - return null; - - } - - public static String writeSubject(Statement statement) { - return statement.getSubject().toString(); - } - - public static String writeObject(Statement statement) { - return statement.getObject().toString(); - } - - public static String writePredicate(Statement statement) { - return statement.getPredicate().toString(); - } - - public static String writeSubjectPredicate(Statement statement) { - Validate.notNull(statement); - Validate.notNull(statement.getSubject()); - Validate.notNull(statement.getPredicate()); - return statement.getSubject().toString() + SEP + statement.getPredicate().toString(); - } - - public static String writeContext(Statement statement) { - if (statement.getContext() == null) { - return ""; - } - return statement.getContext().toString(); - } - - /** - * Write a {@link Statement} to a {@link String} - * - * @param statement - * the {@link Statement} to write - * @return a {@link String} representation of the statement - */ - public static String writeStatement(Statement statement) { - Resource subject = statement.getSubject(); - Resource context = statement.getContext(); - URI predicate = statement.getPredicate(); - Value object = statement.getObject(); - - Validate.notNull(subject); - Validate.notNull(predicate); - Validate.notNull(object); - - String s = ""; - if (context == null) { - s = SEP + subject.toString() + SEP + predicate.toString() + SEP + object.toString(); - } else { - s = context.toString() + SEP + subject.toString() + SEP + predicate.toString() + SEP + object.toString(); - } - return s; - } - - /** - * Creates a Regular Expression to match serialized statements meeting these constraints. A <code>null</code> or empty parameters imply - * no constraint. A <code>null</code> return value implies no constraints. - * - * @param context - * context constraint - * @param subject - * subject constraint - * @param predicates - * list of predicate constraints - * @return a regular expression that can be used to match serialized statements. A <code>null</code> return value implies no - * constraints. - */ - public static String createStatementRegex(StatementContraints contraints) { - Resource context = contraints.getContext(); - Resource subject = contraints.getSubject(); - Set<URI> predicates = contraints.getPredicates(); - if (context == null && subject == null && (predicates == null || predicates.isEmpty())) { - return null; - } - - // match on anything but a separator - String anyReg = "[^" + SEP + "]*"; - - // if context is empty, match on any context - String contextReg = (context == null) ? anyReg : context.stringValue(); - - // if subject is empty, match on any subject - String subjectReg = (subject == null) ? anyReg : subject.stringValue(); - - // if the predicates are empty, match on any predicate. Otherwise, "or" the predicates. - String predicateReg = ""; - if (predicates == null || predicates.isEmpty()) { - predicateReg = anyReg; - } else { - predicateReg = "(" + StringUtils.join(predicates, "|") + ")"; - } - - return "^" + contextReg + SEP + subjectReg + SEP + predicateReg + SEP + ".*"; - } - -}
