[36/51] [partial] incubator-rya git commit: Cannot delete temp branch, doc'd it.

mihalik Mon, 07 Dec 2015 04:06:43 -0800

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexer.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexer.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexer.java
deleted file mode 100644
index f529569..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexer.java
+++ /dev/null
@@ -1,611 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import static 
mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils.getNodeIterator;
-import info.aduna.iteration.CloseableIteration;
-
-import java.io.IOException;
-import java.nio.charset.CharacterCodingException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-import mvm.rya.accumulo.experimental.AbstractAccumuloIndexer;
-import mvm.rya.api.domain.RyaStatement;
-import mvm.rya.api.resolver.RyaToRdfConversions;
-import mvm.rya.indexing.FreeTextIndexer;
-import mvm.rya.indexing.StatementContraints;
-import mvm.rya.indexing.accumulo.ConfigUtils;
-import mvm.rya.indexing.accumulo.Md5Hash;
-import mvm.rya.indexing.accumulo.StatementSerializer;
-import mvm.rya.indexing.accumulo.freetext.iterators.BooleanTreeIterator;
-import mvm.rya.indexing.accumulo.freetext.query.ASTExpression;
-import mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils;
-import mvm.rya.indexing.accumulo.freetext.query.ASTSimpleNode;
-import mvm.rya.indexing.accumulo.freetext.query.ASTTerm;
-import mvm.rya.indexing.accumulo.freetext.query.ParseException;
-import mvm.rya.indexing.accumulo.freetext.query.QueryParser;
-import mvm.rya.indexing.accumulo.freetext.query.QueryParserTreeConstants;
-import mvm.rya.indexing.accumulo.freetext.query.SimpleNode;
-import mvm.rya.indexing.accumulo.freetext.query.TokenMgrError;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.MultiTableBatchWriter;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.TableExistsException;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.admin.TableOperations;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.file.keyfunctor.ColumnFamilyFunctor;
-import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.Validate;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.Logger;
-import org.openrdf.model.Literal;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.query.QueryEvaluationException;
-
-import com.google.common.base.Charsets;
-
-/**
- * The {@link AccumuloFreeTextIndexer} stores and queries "free text" data 
from statements into tables in Accumulo. Specifically, this class
- * stores data into two different Accumulo Tables. This is the <b>document 
table</b> (default name: triplestore_text) and the <b>terms
- * table</b> (default name: triplestore_terms).
- * <p>
- * The document table stores the document (i.e. a triple statement), document 
properties, and the terms within the document. This is the
- * main table used for processing a text search by using document partitioned 
indexing. See {@link IntersectingIterator}.
- * <p>
- * For each document, the document table will store the following information:
- * <P>
- * 
- * <pre>
- * Row (partition) | Column Family  | Column Qualifier | Value 
- * ================+================+==================+==========
- * shardID         | d\x00          | documentHash     | Document 
- * shardID         | s\x00Subject   | documentHash     | (empty) 
- * shardID         | p\x00Predicate | documentHash     | (empty) 
- * shardID         | o\x00Object    | documentHash     | (empty) 
- * shardID         | c\x00Context   | documentHash     | (empty) 
- * shardID         | t\x00token     | documentHash     | (empty)
- * </pre>
- * <p>
- * Note: documentHash is a sha256 Hash of the Document's Content
- * <p>
- * The terms table is used for expanding wildcard search terms. For each token 
in the document table, the table sill store the following
- * information:
- * 
- * <pre>
- * Row (partition)   | CF/CQ/Value 
- * ==================+=============
- * l\x00token        | (empty) 
- * r\x00Reversetoken | (empty)
- * </pre>
- * <p>
- * There are two prefixes in the table, "token list" (keys with an "l" prefix) 
and "reverse token list" (keys with a "r" prefix). This table
- * is uses the "token list" to expand foo* into terms like food, foot, and 
football. This table uses the "reverse token list" to expand *ar
- * into car, bar, and far.
- * <p>
- * Example: Given these three statements as inputs:
- * 
- * <pre>
- *     <uri:paul> rdfs:label "paul smith"@en <uri:graph1>
- *     <uri:steve> rdfs:label "steven anthony miller"@en <uri:graph1>
- *     <uri:steve> rdfs:label "steve miller"@en <uri:graph1>
- * </pre>
- * <p>
- * Here's what the tables would look like: (Note: the hashes aren't real, the 
rows are not sorted, and the partition ids will vary.)
- * <p>
- * Triplestore_text
- * 
- * <pre>
- * Row (partition) | Column Family                   | Column Qualifier | 
Value 
- * 
================+=================================+==================+==========
- * 000000          | d\x00                           | 08b3d233a        | 
uri:graph1x00uri:paul\x00rdfs:label\x00"paul smith"@en
- * 000000          | s\x00uri:paul                   | 08b3d233a        | 
(empty)
- * 000000          | p\x00rdfs:label                 | 08b3d233a        | 
(empty)
- * 000000          | o\x00"paul smith"@en            | 08b3d233a        | 
(empty)
- * 000000          | c\x00uri:graph1                 | 08b3d233a        | 
(empty)
- * 000000          | t\x00paul                       | 08b3d233a        | 
(empty)
- * 000000          | t\x00smith                      | 08b3d233a        | 
(empty)
- * 
- * 000000          | d\x00                           | 3a575534b        | 
uri:graph1x00uri:steve\x00rdfs:label\x00"steven anthony miller"@en
- * 000000          | s\x00uri:steve                  | 3a575534b        | 
(empty)
- * 000000          | p\x00rdfs:label                 | 3a575534b        | 
(empty)
- * 000000          | o\x00"steven anthony miller"@en | 3a575534b        | 
(empty)
- * 000000          | c\x00uri:graph1                 | 3a575534b        | 
(empty)
- * 000000          | t\x00steven                     | 3a575534b        | 
(empty)
- * 000000          | t\x00anthony                    | 3a575534b        | 
(empty)
- * 000000          | t\x00miller                     | 3a575534b        | 
(empty)
- * 
- * 000001          | d\x00                           | 7bf670d06        | 
uri:graph1x00uri:steve\x00rdfs:label\x00"steve miller"@en
- * 000001          | s\x00uri:steve                  | 7bf670d06        | 
(empty)
- * 000001          | p\x00rdfs:label                 | 7bf670d06        | 
(empty)
- * 000001          | o\x00"steve miller"@en          | 7bf670d06        | 
(empty)
- * 000001          | c\x00uri:graph1                 | 7bf670d06        | 
(empty)
- * 000001          | t\x00steve                      | 7bf670d06        | 
(empty)
- * 000001          | t\x00miller                     | 7bf670d06        | 
(empty)
- * </pre>
- * <p>
- * triplestore_terms
- * <p>
- * 
- * <pre>
- * Row (partition)   | CF/CQ/Value 
- * ==================+=============
- * l\x00paul         | (empty)
- * l\x00smith        | (empty)
- * l\x00steven       | (empty)
- * l\x00anthony      | (empty)
- * l\x00miller       | (empty)
- * l\x00steve        | (empty)
- * r\x00luap         | (empty)
- * r\x00htims        | (empty)
- * r\x00nevets       | (empty)
- * r\x00ynohtna      | (empty)
- * r\x00rellim       | (empty)
- * r\x00evets        | (empty)
- * 
- * <pre>
- */
-public class AccumuloFreeTextIndexer extends AbstractAccumuloIndexer 
implements FreeTextIndexer  {
-    private static final Logger logger = 
Logger.getLogger(AccumuloFreeTextIndexer.class);
-
-    private static final byte[] EMPTY_BYTES = new byte[] {};
-    private static final Text EMPTY_TEXT = new Text(EMPTY_BYTES);
-    private static final Value EMPTY_VALUE = new Value(EMPTY_BYTES);
-
-    private Tokenizer tokenizer;
-
-    private BatchWriter docTableBw;
-    private BatchWriter termTableBw;
-    private MultiTableBatchWriter mtbw;
-
-    private int queryTermLimit;
-
-    private int docTableNumPartitions;
-
-    private Set<URI> validPredicates;
-
-    private Configuration conf;
-    
-    private boolean isInit = false;
-
-    
-    private void init() throws AccumuloException, AccumuloSecurityException, 
TableNotFoundException,
-            TableExistsException {
-        String doctable = ConfigUtils.getFreeTextDocTablename(conf);
-        String termtable = ConfigUtils.getFreeTextTermTablename(conf);
-
-        docTableNumPartitions = ConfigUtils.getFreeTextDocNumPartitions(conf);
-        int termTableNumPartitions = 
ConfigUtils.getFreeTextTermNumPartitions(conf);
-
-        TableOperations tableOps = 
ConfigUtils.getConnector(conf).tableOperations();
-
-        // Create term table partitions
-        boolean createdTermTable = ConfigUtils.createTableIfNotExists(conf, 
termtable);
-        if (createdTermTable && !ConfigUtils.useMockInstance(conf) && 
termTableNumPartitions > 0) {
-            TreeSet<Text> splits = new TreeSet<Text>();
-
-            // split on the "Term List" and "Reverse Term list" boundary
-            splits.add(new Text(ColumnPrefixes.getRevTermListColFam("")));
-
-            // Symmetrically split the "Term List" and "Reverse Term list"
-            int numSubpartitions = ((termTableNumPartitions - 1) / 2);
-            if (numSubpartitions > 0) {
-                int step = (26 / numSubpartitions);
-                for (int i = 0; i < numSubpartitions; i++) {
-                    String nextChar = String.valueOf((char) ('a' + (step * 
i)));
-                    splits.add(new 
Text(ColumnPrefixes.getTermListColFam(nextChar)));
-                    splits.add(new 
Text(ColumnPrefixes.getRevTermListColFam(nextChar)));
-                }
-            }
-            tableOps.addSplits(termtable, splits);
-        }
-
-        // Create document (text) table partitions
-        boolean createdDocTable = ConfigUtils.createTableIfNotExists(conf, 
doctable);
-        if (createdDocTable && !ConfigUtils.useMockInstance(conf)) {
-            TreeSet<Text> splits = new TreeSet<Text>();
-            for (int i = 0; i < docTableNumPartitions; i++) {
-                splits.add(genPartition(i, docTableNumPartitions));
-            }
-            tableOps.addSplits(doctable, splits);
-
-            // Add a tablet level Bloom filter for the Column Family.
-            // This will allow us to quickly determine if a term is contained 
in a tablet.
-            tableOps.setProperty(doctable, "table.bloom.key.functor", 
ColumnFamilyFunctor.class.getCanonicalName());
-            tableOps.setProperty(doctable, "table.bloom.enabled", 
Boolean.TRUE.toString());
-        }
-
-        mtbw = ConfigUtils.createMultitableBatchWriter(conf);
-
-        docTableBw = mtbw.getBatchWriter(doctable);
-        termTableBw = mtbw.getBatchWriter(termtable);
-
-        tokenizer = ConfigUtils.getFreeTextTokenizer(conf);
-        validPredicates = ConfigUtils.getFreeTextPredicates(conf);
-
-        queryTermLimit = ConfigUtils.getFreeTextTermLimit(conf);
-    }
-    
-    
-  //initialization occurs in setConf because index is created using reflection
-    @Override
-    public void setConf(Configuration conf) {
-        this.conf = conf;
-        if (!isInit) {
-            try {
-                init();
-                isInit = true;
-            } catch (AccumuloException e) {
-                logger.warn("Unable to initialize index.  Throwing Runtime 
Exception. ", e);
-                throw new RuntimeException(e);
-            } catch (AccumuloSecurityException e) {
-                logger.warn("Unable to initialize index.  Throwing Runtime 
Exception. ", e);
-                throw new RuntimeException(e);
-            } catch (TableNotFoundException e) {
-                logger.warn("Unable to initialize index.  Throwing Runtime 
Exception. ", e);
-                throw new RuntimeException(e);
-            } catch (TableExistsException e) {
-                logger.warn("Unable to initialize index.  Throwing Runtime 
Exception. ", e);
-                throw new RuntimeException(e);
-            }
-        }
-    }
-    
-    @Override
-    public Configuration getConf() {
-        return this.conf;
-    }
-    
-
-    private void storeStatement(Statement statement) throws IOException {
-        // if the predicate list is empty, accept all predicates.
-        // Otherwise, make sure the predicate is on the "valid" list
-        boolean isValidPredicate = validPredicates.isEmpty() || 
validPredicates.contains(statement.getPredicate());
-
-        if (isValidPredicate && (statement.getObject() instanceof Literal)) {
-
-            // Get the tokens
-            String text = statement.getObject().stringValue().toLowerCase();
-            SortedSet<String> tokens = tokenizer.tokenize(text);
-
-            if (!tokens.isEmpty()) {
-                // Get Document Data
-                String docContent = 
StatementSerializer.writeStatement(statement);
-
-                String docId = Md5Hash.md5Base64(docContent);
-
-                // Setup partition
-                Text partition = genPartition(docContent.hashCode(), 
docTableNumPartitions);
-
-                Mutation docTableMut = new Mutation(partition);
-                List<Mutation> termTableMutations = new ArrayList<Mutation>();
-
-                Text docIdText = new Text(docId);
-
-                // Store the Document Data
-                docTableMut.put(ColumnPrefixes.DOCS_CF_PREFIX, docIdText, new 
Value(docContent.getBytes(Charsets.UTF_8)));
-
-                // index the statement parts
-                docTableMut.put(ColumnPrefixes.getSubjColFam(statement), 
docIdText, EMPTY_VALUE);
-                docTableMut.put(ColumnPrefixes.getPredColFam(statement), 
docIdText, EMPTY_VALUE);
-                docTableMut.put(ColumnPrefixes.getObjColFam(statement), 
docIdText, EMPTY_VALUE);
-                docTableMut.put(ColumnPrefixes.getContextColFam(statement), 
docIdText, EMPTY_VALUE);
-
-                // index the statement terms
-                for (String token : tokens) {
-                    // tie the token to the document
-                    docTableMut.put(ColumnPrefixes.getTermColFam(token), 
docIdText, EMPTY_VALUE);
-
-                    // store the term in the term table (useful for wildcard 
searches)
-                    
termTableMutations.add(createEmptyPutMutation(ColumnPrefixes.getTermListColFam(token)));
-                    
termTableMutations.add(createEmptyPutMutation(ColumnPrefixes.getRevTermListColFam(token)));
-                }
-
-                // write the mutations
-                try {
-                    docTableBw.addMutation(docTableMut);
-                    termTableBw.addMutations(termTableMutations);
-                } catch (MutationsRejectedException e) {
-                    logger.error("error adding mutation", e);
-                    throw new IOException(e);
-                }
-
-            }
-
-        }
-    }
-
-    @Override
-    public void storeStatement(RyaStatement statement) throws IOException {
-        storeStatement(RyaToRdfConversions.convertStatement(statement));
-    }
-
-    private static Mutation createEmptyPutMutation(Text row) {
-        Mutation m = new Mutation(row);
-        m.put(EMPTY_TEXT, EMPTY_TEXT, EMPTY_VALUE);
-        return m;
-    }
-
-    private static Text genPartition(int partition, int numParitions) {
-        int length = Integer.toString(numParitions).length();
-        return new Text(String.format("%0" + length + "d", Math.abs(partition 
% numParitions)));
-    }
-
-    @Override
-    public Set<URI> getIndexablePredicates() {
-        return validPredicates;
-    }
-
-    /** {@inheritDoc} */
-    @Override
-    public void flush() throws IOException {
-        try {
-            mtbw.flush();
-        } catch (MutationsRejectedException e) {
-            logger.error("error flushing the batch writer", e);
-            throw new IOException(e);
-        }
-    }
-
-    /** {@inheritDoc} */
-    @Override
-    public void close() throws IOException {
-        try {
-            mtbw.close();
-        } catch (MutationsRejectedException e) {
-            logger.error("error closing the batch writer", e);
-            throw new IOException(e);
-        }
-    }
-
-    private Set<String> unrollWildcard(String string, boolean reverse) throws 
IOException {
-        Scanner termTableScan = 
getScanner(ConfigUtils.getFreeTextTermTablename(conf));
-
-        Set<String> unrolledTerms = new HashSet<String>();
-
-        Text queryTerm;
-        if (reverse) {
-            String t = StringUtils.removeStart(string, "*").toLowerCase();
-            queryTerm = ColumnPrefixes.getRevTermListColFam(t);
-        } else {
-            String t = StringUtils.removeEnd(string, "*").toLowerCase();
-            queryTerm = ColumnPrefixes.getTermListColFam(t);
-        }
-
-        // perform query and read results
-        termTableScan.setRange(Range.prefix(queryTerm));
-
-        for (Entry<Key, Value> e : termTableScan) {
-            String term = 
ColumnPrefixes.removePrefix(e.getKey().getRow()).toString();
-            if (reverse) {
-                unrolledTerms.add(StringUtils.reverse(term));
-            } else {
-                unrolledTerms.add(term);
-            }
-        }
-
-        if (unrolledTerms.isEmpty()) {
-            // put in a placeholder term that will never be in the index.
-            unrolledTerms.add("\1\1\1");
-        }
-
-        return unrolledTerms;
-    }
-
-    private void unrollWildcards(SimpleNode node) throws IOException {
-        if (node instanceof ASTExpression || node instanceof ASTSimpleNode) {
-            for (SimpleNode n : getNodeIterator(node)) {
-                unrollWildcards(n);
-            }
-        } else if (node instanceof ASTTerm) {
-            ASTTerm term = (ASTTerm) node;
-            boolean isWildTerm = term.getType().equals(ASTTerm.WILDTERM);
-            boolean isPreWildTerm = term.getType().equals(ASTTerm.PREFIXTERM);
-            if (isWildTerm || isPreWildTerm) {
-                Set<String> unrolledTerms = unrollWildcard(term.getTerm(), 
isPreWildTerm);
-
-                // create a new expression
-                ASTExpression newExpression = new 
ASTExpression(QueryParserTreeConstants.JJTEXPRESSION);
-                newExpression.setType(ASTExpression.OR);
-                newExpression.setNotFlag(term.isNotFlag());
-
-                for (String unrolledTerm : unrolledTerms) {
-                    ASTTerm t = new ASTTerm(QueryParserTreeConstants.JJTTERM);
-                    t.setNotFlag(false);
-                    t.setTerm(unrolledTerm);
-                    t.setType(ASTTerm.TERM);
-                    ASTNodeUtils.pushChild(newExpression, t);
-                }
-
-                // replace "term" node with "expression" node in "term" node 
parent
-                SimpleNode parent = (SimpleNode) term.jjtGetParent();
-                int index = ASTNodeUtils.getChildIndex(parent, term);
-
-                Validate.isTrue(index >= 0, "child not found in parent");
-
-                parent.jjtAddChild(newExpression, index);
-            }
-
-        } else {
-            throw new IllegalArgumentException("Node is of unknown type: " + 
node.getClass().getName());
-        }
-    }
-
-    private Scanner getScanner(String tablename) throws IOException {
-        try {
-            return ConfigUtils.createScanner(tablename, conf);
-        } catch (AccumuloException e) {
-            logger.error("Error connecting to " + tablename);
-            throw new IOException(e);
-        } catch (AccumuloSecurityException e) {
-            logger.error("Error connecting to " + tablename);
-            throw new IOException(e);
-        } catch (TableNotFoundException e) {
-            logger.error("Error connecting to " + tablename);
-            throw new IOException(e);
-        }
-    }
-
-    /** {@inheritDoc} */
-    @Override
-    public CloseableIteration<Statement, QueryEvaluationException> 
queryText(String query, StatementContraints contraints)
-            throws IOException {
-        Scanner docTableScan = 
getScanner(ConfigUtils.getFreeTextDocTablename(conf));
-
-        // test the query to see if it's parses correctly.
-        SimpleNode root = parseQuery(query);
-
-        // unroll any wildcard nodes before it goes to the server
-        unrollWildcards(root);
-
-        String unrolledQuery = ASTNodeUtils.serializeExpression(root);
-
-        // Add S P O C constraints to query
-        StringBuilder constrainedQuery = new StringBuilder("(" + unrolledQuery 
+ ")");
-
-        if (contraints.hasSubject()) {
-            constrainedQuery.append(" AND ");
-            
constrainedQuery.append(ColumnPrefixes.getSubjColFam(contraints.getSubject().toString()).toString());
-        }
-        if (contraints.hasContext()) {
-            constrainedQuery.append(" AND ");
-            
constrainedQuery.append(ColumnPrefixes.getContextColFam(contraints.getContext().toString()).toString());
-        }
-        if (contraints.hasPredicates()) {
-            constrainedQuery.append(" AND (");
-            List<String> predicates = new ArrayList<String>();
-            for (URI u : contraints.getPredicates()) {
-                
predicates.add(ColumnPrefixes.getPredColFam(u.stringValue()).toString());
-            }
-            constrainedQuery.append(StringUtils.join(predicates, " OR "));
-            constrainedQuery.append(")");
-        }
-
-        // Verify that the query is a reasonable size
-        root = parseQuery(constrainedQuery.toString());
-        int termCount = ASTNodeUtils.termCount(root);
-
-        if (termCount > queryTermLimit) {
-            throw new IOException("Query contains too many terms.  Term limit: 
" + queryTermLimit + ".  Term Count: " + termCount);
-        }
-
-        // perform query
-        docTableScan.clearScanIterators();
-        docTableScan.clearColumns();
-
-        int iteratorPriority = 20;
-        String iteratorName = "booleanTree";
-        IteratorSetting ii = new IteratorSetting(iteratorPriority, 
iteratorName, BooleanTreeIterator.class);
-        BooleanTreeIterator.setQuery(ii, constrainedQuery.toString());
-        docTableScan.addScanIterator(ii);
-        docTableScan.setRange(new Range());
-
-        return getIteratorWrapper(docTableScan);
-    }
-
-    private static CloseableIteration<Statement, QueryEvaluationException> 
getIteratorWrapper(final Scanner s) {
-
-        final Iterator<Entry<Key, Value>> i = s.iterator();
-
-        return new CloseableIteration<Statement, QueryEvaluationException>() {
-            @Override
-            public boolean hasNext() {
-                return i.hasNext();
-            }
-
-            @Override
-            public Statement next() throws QueryEvaluationException {
-                Entry<Key, Value> entry = i.next();
-                Value v = entry.getValue();
-                try {
-                    String dataString = Text.decode(v.get(), 0, v.getSize());
-                    Statement s = 
StatementSerializer.readStatement(dataString);
-                    return s;
-                } catch (CharacterCodingException e) {
-                    logger.error("Error decoding value", e);
-                    throw new QueryEvaluationException(e);
-                } catch (IOException e) {
-                    logger.error("Error deserializing statement", e);
-                    throw new QueryEvaluationException(e);
-                }
-            }
-
-            @Override
-            public void remove() {
-                throw new UnsupportedOperationException("Remove not 
implemented");
-            }
-
-            @Override
-            public void close() throws QueryEvaluationException {
-                s.close();
-            }
-        };
-    }
-
-    /**
-     * Simple adapter that parses the query using {@link QueryParser}. Note: 
any checked exceptions thrown by {@link QueryParser} are
-     * re-thrown as {@link IOException}s.
-     * 
-     * @param query
-     * @return
-     * @throws IOException
-     */
-    private static SimpleNode parseQuery(String query) throws IOException {
-        SimpleNode root = null;
-        try {
-            root = QueryParser.parse(query);
-        } catch (ParseException e) {
-            logger.error("Parser Exception on Client Side. Query: " + query, 
e);
-            throw new IOException(e);
-        } catch (TokenMgrError e) {
-            logger.error("Token Manager Exception on Client Side. Query: " + 
query, e);
-            throw new IOException(e);
-        }
-        return root;
-    }
-    
-   
-    @Override
-    public String getTableName() {
-       return ConfigUtils.getFreeTextDocTablename(conf);
-    }
-
-    
-}


http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/ColumnPrefixes.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/ColumnPrefixes.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/ColumnPrefixes.java
deleted file mode 100644
index 31666c9..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/ColumnPrefixes.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-
-import mvm.rya.indexing.accumulo.StatementSerializer;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.io.Text;
-import org.openrdf.model.Statement;
-
-/**
- * Row ID: shardId
- * <p>
- * CF: CF Prefix + Term
- */
-public class ColumnPrefixes {
-       public static final Text DOCS_CF_PREFIX = new Text("d\0");
-       public static final Text TERM_CF_PREFIX = new Text("t\0");
-       public static final Text TERM_LIST_CF_PREFIX = new Text("l\0");
-       public static final Text REVERSE_TERM_LIST_CF_PREFIX = new Text("r\0");
-
-       public static final Text SUBJECT_CF_PREFIX = new Text("s\0");
-       public static final Text PREDICATE_CF_PREFIX = new Text("p\0");
-       public static final Text OBJECT_CF_PREFIX = new Text("o\0");
-       public static final Text CONTEXT_CF_PREFIX = new Text("c\0");
-
-       private static Text concat(Text prefix, String str) {
-               Text temp = new Text(prefix);
-
-               try {
-                       ByteBuffer buffer = Text.encode(str, false);
-                       temp.append(buffer.array(), 0, buffer.limit());
-               } catch (CharacterCodingException cce) {
-                       throw new IllegalArgumentException(cce);
-               }
-
-               return temp;
-       }
-
-       public static Text getTermColFam(String term) {
-               return concat(TERM_CF_PREFIX, term);
-       }
-
-       public static Text getTermListColFam(String term) {
-               return concat(TERM_LIST_CF_PREFIX, term);
-       }
-
-       public static Text getRevTermListColFam(String term) {
-               return concat(REVERSE_TERM_LIST_CF_PREFIX, 
StringUtils.reverse(term));
-       }
-
-       public static Text getDocColFam(String term) {
-               return concat(DOCS_CF_PREFIX, term);
-       }
-
-       public static Text getSubjColFam(String term) {
-               return concat(SUBJECT_CF_PREFIX, term);
-       }
-
-       public static Text getSubjColFam(Statement statement) {
-               String subj = StatementSerializer.writeSubject(statement);
-               return getSubjColFam(subj);
-       }
-
-       public static Text getPredColFam(String term) {
-               return concat(PREDICATE_CF_PREFIX, term);
-       }
-
-       public static Text getPredColFam(Statement statement) {
-               String pred = StatementSerializer.writePredicate(statement);
-               return getPredColFam(pred);
-       }
-
-       public static Text getObjColFam(String term) {
-               return concat(OBJECT_CF_PREFIX, term);
-       }
-
-       public static Text getObjColFam(Statement statement) {
-               String obj = StatementSerializer.writeObject(statement);
-               return getObjColFam(obj);
-       }
-
-       public static Text getContextColFam(String term) {
-               return concat(CONTEXT_CF_PREFIX, term);
-       }
-
-       public static Text getContextColFam(Statement statement) {
-               String cont = StatementSerializer.writeContext(statement);
-               return getContextColFam(cont);
-       }
-
-       public static Text removePrefix(Text termWithPrefix) {
-               Text temp = new Text();
-               temp.set(termWithPrefix.getBytes(), 2, 
termWithPrefix.getLength() - 2);
-               return temp;
-       }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/FreeTextTupleSet.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/FreeTextTupleSet.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/FreeTextTupleSet.java
deleted file mode 100644
index 471870b..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/FreeTextTupleSet.java
+++ /dev/null
@@ -1,160 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-import info.aduna.iteration.CloseableIteration;
-
-import java.io.IOException;
-import java.util.Set;
-
-import mvm.rya.indexing.FreeTextIndexer;
-import mvm.rya.indexing.IndexingExpr;
-import mvm.rya.indexing.IteratorFactory;
-import mvm.rya.indexing.SearchFunction;
-import mvm.rya.indexing.StatementContraints;
-import mvm.rya.indexing.external.tupleSet.ExternalTupleSet;
-
-import org.apache.hadoop.conf.Configuration;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.query.BindingSet;
-import org.openrdf.query.QueryEvaluationException;
-import org.openrdf.query.algebra.QueryModelVisitor;
-
-import com.google.common.base.Joiner;
-
-
-//Indexing Node for freetext expressions to be inserted into execution plan 
-//to delegate freetext portion of query to free text index
-public class FreeTextTupleSet extends ExternalTupleSet {
-    
-    private Configuration conf;
-    private FreeTextIndexer freeTextIndexer;
-    private IndexingExpr filterInfo;
-    
-
-    public FreeTextTupleSet(IndexingExpr filterInfo, FreeTextIndexer 
freeTextIndexer) {
-        this.filterInfo = filterInfo;
-        this.freeTextIndexer = freeTextIndexer;
-        this.conf = freeTextIndexer.getConf();
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public Set<String> getBindingNames() {
-        return filterInfo.getBindingNames();
-    }
-
-    /**
-     * {@inheritDoc}
-     * <p>
-     * Note that we need a deep copy for everything that (during optimizations)
-     * can be altered via {@link #visitChildren(QueryModelVisitor)}
-     */
-    public FreeTextTupleSet clone() {
-        return new FreeTextTupleSet(filterInfo, freeTextIndexer);
-    }
-
-    @Override
-    public double cardinality() {
-        return 0.0; // No idea how the estimate cardinality here.
-    }
-    
-    
-    
-    
-    @Override
-    public String getSignature() {
-        
-        return "(FreeTextTuple Projection) " + "variables: " + Joiner.on(", 
").join(this.getBindingNames()).replaceAll("\\s+", " ");
-    }
-    
-    
-    
-    @Override
-    public boolean equals(Object other) {
-        if (other == this) {
-            return true;
-        }
-        if (!(other instanceof FreeTextTupleSet)) {
-            return false;
-        }
-
-        FreeTextTupleSet arg = (FreeTextTupleSet) other;
-        return this.filterInfo.equals(arg.filterInfo);
-    }
-    
-    
-    @Override
-    public int hashCode() {
-        int result = 17;
-        result = 31*result + filterInfo.hashCode();
-        
-        return result;
-    }
-    
-    
-
-    /**
-     * Returns an iterator over the result set of the contained {@link 
IndexExpr}.
-     * <p>
-     * Should be thread-safe (concurrent invocation {@link OfflineIterable} 
this
-     * method can be expected with some query evaluators.
-     */
-    @Override
-    public CloseableIteration<BindingSet, QueryEvaluationException> 
evaluate(BindingSet bindings)
-            throws QueryEvaluationException {
-        
-      
-        URI funcURI = filterInfo.getFunction();
-        
-        SearchFunction searchFunction = new SearchFunction() {
-
-            @Override
-            public CloseableIteration<Statement, QueryEvaluationException> 
performSearch(String queryText,
-                    StatementContraints contraints) throws 
QueryEvaluationException {
-                try {
-                    CloseableIteration<Statement, QueryEvaluationException> 
statements = freeTextIndexer.queryText(
-                            queryText, contraints);
-                    return statements;
-                } catch (IOException e) {
-                    throw new QueryEvaluationException(e);
-                }
-            }
-
-            @Override
-            public String toString() {
-                return "TEXT";
-            };
-        };
-
-        if (filterInfo.getArguments().length > 1) {
-            throw new IllegalArgumentException("Index functions do not support 
more than two arguments.");
-        }
-
-        String queryText = filterInfo.getArguments()[0].stringValue();
-
-        return IteratorFactory.getIterator(filterInfo.getSpConstraint(), 
bindings, queryText, searchFunction);
-    }
-    
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/LuceneTokenizer.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/LuceneTokenizer.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/LuceneTokenizer.java
deleted file mode 100644
index abda04a..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/LuceneTokenizer.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
-
-/**
- * A {@link Tokenizer} that delegates to Lucene functions
- */
-public class LuceneTokenizer implements Tokenizer {
-       private static final Analyzer analyzer = new 
StandardAnalyzer(Version.LUCENE_36);
-
-       @Override
-       public SortedSet<String> tokenize(String string) {
-               SortedSet<String> set = new TreeSet<String>();
-               try {
-                       TokenStream stream = analyzer.tokenStream(null, new 
StringReader(string));
-                       stream.reset();
-                       while (stream.incrementToken()) {
-                               
set.add(stream.getAttribute(CharTermAttribute.class).toString());
-                       }
-               } catch (IOException e) {
-                       // not thrown b/c we're using a string reader...
-                       throw new RuntimeException(e);
-               }
-
-               return set;
-       }
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/SimpleTokenizer.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/SimpleTokenizer.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/SimpleTokenizer.java
deleted file mode 100644
index e98e676..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/SimpleTokenizer.java
+++ /dev/null
@@ -1,43 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-/**
- * A {@link Tokenizer} that splits on whitespace.
- */
-public class SimpleTokenizer implements Tokenizer {
-
-       @Override
-       public SortedSet<String> tokenize(String sting) {
-               SortedSet<String> set = new TreeSet<String>();
-               for (String token : sting.split("\\s+")) {
-                       String t = token.trim().toLowerCase();
-                       if (!t.isEmpty()) {
-                               set.add(t);
-                       }
-               }
-               return set;
-       }
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/Tokenizer.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/Tokenizer.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/Tokenizer.java
deleted file mode 100644
index 24b40cd..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/Tokenizer.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.util.SortedSet;
-
-/**
- * A utility that splits a string into tokens.
- */
-public interface Tokenizer {
-       public SortedSet<String> tokenize(String sting);
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/AndingIterator.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/AndingIterator.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/AndingIterator.java
deleted file mode 100644
index 355fe14..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/AndingIterator.java
+++ /dev/null
@@ -1,563 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext.iterators;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Map;
-
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.data.ArrayByteSequence;
-import org.apache.accumulo.core.data.ByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.PartialKey;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorEnvironment;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-import org.apache.accumulo.core.util.TextUtil;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.Logger;
-
-/**
- * Adapted from {@link IntersectingIterator} with very slight modifications. 
Specifically, the comparator on the TermSource internal class was
- * modified to handle exhausted iterators and multiple rows per tablet server.
- */
-public class AndingIterator implements SortedKeyValueIterator<Key, Value> {
-
-       protected Text nullText = new Text();
-
-       protected Text getPartition(Key key) {
-               return key.getRow();
-       }
-
-       protected Text getTerm(Key key) {
-               return key.getColumnFamily();
-       }
-
-       protected Text getDocID(Key key) {
-               return key.getColumnQualifier();
-       }
-
-       protected Key buildKey(Text partition, Text term) {
-               return new Key(partition, (term == null) ? nullText : term);
-       }
-
-       protected Key buildKey(Text partition, Text term, Text docID) {
-               return new Key(partition, (term == null) ? nullText : term, 
docID);
-       }
-
-       protected Key buildFollowingPartitionKey(Key key) {
-               return key.followingKey(PartialKey.ROW);
-       }
-
-       protected static final Logger log = 
Logger.getLogger(AndingIterator.class);
-
-       protected static class TermSource {
-               public SortedKeyValueIterator<Key, Value> iter;
-               public Text term;
-               public Collection<ByteSequence> seekColfams;
-               public boolean notFlag;
-
-               public TermSource(TermSource other) {
-                       this.iter = other.iter;
-                       this.term = other.term;
-                       this.notFlag = other.notFlag;
-                       this.seekColfams = other.seekColfams;
-               }
-
-               public TermSource(SortedKeyValueIterator<Key, Value> iter, Text 
term) {
-                       this(iter, term, false);
-               }
-
-               public TermSource(SortedKeyValueIterator<Key, Value> iter, Text 
term, boolean notFlag) {
-                       this.iter = iter;
-                       this.term = term;
-                       this.notFlag = notFlag;
-                       // The desired column families for this source is the 
term itself
-
-                       // handle the case where the term is null.
-                       if (term == null) {
-                               this.seekColfams = Collections.<ByteSequence> 
emptyList();
-                       } else {
-                               this.seekColfams = Collections.<ByteSequence> 
singletonList(new ArrayByteSequence(term.getBytes(), 0, term.getLength()));
-                       }
-               }
-
-               public String getTermString() {
-                       return (this.term == null) ? new String("Iterator") : 
this.term.toString();
-               }
-       }
-
-       TermSource[] sources;
-       int sourcesCount = 0;
-
-       Range overallRange;
-
-       // query-time settings
-       protected Text currentPartition = null;
-       protected Text currentDocID = new Text(emptyByteArray);
-       static final byte[] emptyByteArray = new byte[0];
-
-       protected Key topKey = null;
-       protected Value value = new Value(emptyByteArray);
-
-       public AndingIterator() {
-       }
-
-       @Override
-       public SortedKeyValueIterator<Key, Value> deepCopy(IteratorEnvironment 
env) {
-               return new AndingIterator(this, env);
-       }
-
-       private AndingIterator(AndingIterator other, IteratorEnvironment env) {
-               if (other.sources != null) {
-                       sourcesCount = other.sourcesCount;
-                       sources = new TermSource[sourcesCount];
-                       for (int i = 0; i < sourcesCount; i++) {
-                               sources[i] = new 
TermSource(other.sources[i].iter.deepCopy(env), other.sources[i].term);
-                       }
-               }
-       }
-
-       @Override
-       public Key getTopKey() {
-               return topKey;
-       }
-
-       @Override
-       public Value getTopValue() {
-               // we don't really care about values
-               return value;
-       }
-
-       @Override
-       public boolean hasTop() {
-               return currentPartition != null;
-       }
-
-       // precondition: currentRow is not null
-       private boolean seekOneSource(int sourceID) throws IOException {
-               // find the next key in the appropriate column family that is 
at or beyond the cursor (currentRow, currentCQ)
-               // advance the cursor if this source goes beyond it
-               // return whether we advanced the cursor
-
-               // within this loop progress must be made in one of the 
following forms:
-               // - currentRow or currentCQ must be increased
-               // - the given source must advance its iterator
-               // this loop will end when any of the following criteria are met
-               // - the iterator for the given source is pointing to the key 
(currentRow, columnFamilies[sourceID], currentCQ)
-               // - the given source is out of data and currentRow is set to 
null
-               // - the given source has advanced beyond the endRow and 
currentRow is set to null
-               boolean advancedCursor = false;
-
-               if (sources[sourceID].notFlag) {
-                       while (true) {
-                               if (sources[sourceID].iter.hasTop() == false) {
-                                       // an empty column that you are 
negating is a valid condition
-                                       break;
-                               }
-                               // check if we're past the end key
-                               int endCompare = -1;
-                               // we should compare the row to the end of the 
range
-                               if (overallRange.getEndKey() != null) {
-                                       endCompare = 
overallRange.getEndKey().getRow().compareTo(sources[sourceID].iter.getTopKey().getRow());
-                                       if ((!overallRange.isEndKeyInclusive() 
&& endCompare <= 0) || endCompare < 0) {
-                                               // an empty column that you are 
negating is a valid condition
-                                               break;
-                                       }
-                               }
-                               int partitionCompare = 
currentPartition.compareTo(getPartition(sources[sourceID].iter.getTopKey()));
-                               // check if this source is already at or beyond 
currentRow
-                               // if not, then seek to at least the current row
-
-                               if (partitionCompare > 0) {
-                                       // seek to at least the currentRow
-                                       Key seekKey = 
buildKey(currentPartition, sources[sourceID].term);
-                                       sources[sourceID].iter.seek(new 
Range(seekKey, true, null, false), sources[sourceID].seekColfams, true);
-                                       continue;
-                               }
-                               // check if this source has gone beyond 
currentRow
-                               // if so, this is a valid condition for negation
-                               if (partitionCompare < 0) {
-                                       break;
-                               }
-                               // we have verified that the current source is 
positioned in currentRow
-                               // now we must make sure we're in the right 
columnFamily in the current row
-                               // Note: Iterators are auto-magically set to 
the correct columnFamily
-                               if (sources[sourceID].term != null) {
-                                       int termCompare = 
sources[sourceID].term.compareTo(getTerm(sources[sourceID].iter.getTopKey()));
-                                       // check if this source is already on 
the right columnFamily
-                                       // if not, then seek forwards to the 
right columnFamily
-                                       if (termCompare > 0) {
-                                               Key seekKey = 
buildKey(currentPartition, sources[sourceID].term, currentDocID);
-                                               sources[sourceID].iter.seek(new 
Range(seekKey, true, null, false), sources[sourceID].seekColfams, true);
-                                               continue;
-                                       }
-                                       // check if this source is beyond the 
right columnFamily
-                                       // if so, then this is a valid 
condition for negating
-                                       if (termCompare < 0) {
-                                               break;
-                                       }
-                               }
-
-                               // we have verified that we are in currentRow 
and the correct column family
-                               // make sure we are at or beyond columnQualifier
-                               Text docID = 
getDocID(sources[sourceID].iter.getTopKey());
-                               int docIDCompare = 
currentDocID.compareTo(docID);
-                               // If we are past the target, this is a valid 
result
-                               if (docIDCompare < 0) {
-                                       break;
-                               }
-                               // if this source is not yet at the currentCQ 
then advance in this source
-                               if (docIDCompare > 0) {
-                                       // seek forwards
-                                       Key seekKey = 
buildKey(currentPartition, sources[sourceID].term, currentDocID);
-                                       sources[sourceID].iter.seek(new 
Range(seekKey, true, null, false), sources[sourceID].seekColfams, true);
-                                       continue;
-                               }
-                               // if we are equal to the target, this is an 
invalid result.
-                               // Force the entire process to go to the next 
row.
-                               // We are advancing column 0 because we forced 
that column to not contain a !
-                               // when we did the init()
-                               if (docIDCompare == 0) {
-                                       sources[0].iter.next();
-                                       advancedCursor = true;
-                                       break;
-                               }
-                       }
-               } else {
-                       while (true) {
-                               if (sources[sourceID].iter.hasTop() == false) {
-                                       currentPartition = null;
-                                       // setting currentRow to null counts as 
advancing the cursor
-                                       return true;
-                               }
-                               // check if we're past the end key
-                               int endCompare = -1;
-                               // we should compare the row to the end of the 
range
-
-                               if (overallRange.getEndKey() != null) {
-                                       endCompare = 
overallRange.getEndKey().getRow().compareTo(sources[sourceID].iter.getTopKey().getRow());
-                                       if ((!overallRange.isEndKeyInclusive() 
&& endCompare <= 0) || endCompare < 0) {
-                                               currentPartition = null;
-                                               // setting currentRow to null 
counts as advancing the cursor
-                                               return true;
-                                       }
-                               }
-                               int partitionCompare = 
currentPartition.compareTo(getPartition(sources[sourceID].iter.getTopKey()));
-                               // check if this source is already at or beyond 
currentRow
-                               // if not, then seek to at least the current row
-                               if (partitionCompare > 0) {
-                                       // seek to at least the currentRow
-                                       Key seekKey = 
buildKey(currentPartition, sources[sourceID].term);
-                                       sources[sourceID].iter.seek(new 
Range(seekKey, true, null, false), sources[sourceID].seekColfams, true);
-                                       continue;
-                               }
-                               // check if this source has gone beyond 
currentRow
-                               // if so, advance currentRow
-                               if (partitionCompare < 0) {
-                                       
currentPartition.set(getPartition(sources[sourceID].iter.getTopKey()));
-                                       currentDocID.set(emptyByteArray);
-                                       advancedCursor = true;
-                                       continue;
-                               }
-                               // we have verified that the current source is 
positioned in currentRow
-                               // now we must make sure we're in the right 
columnFamily in the current row
-                               // Note: Iterators are auto-magically set to 
the correct columnFamily
-
-                               if (sources[sourceID].term != null) {
-                                       int termCompare = 
sources[sourceID].term.compareTo(getTerm(sources[sourceID].iter.getTopKey()));
-                                       // check if this source is already on 
the right columnFamily
-                                       // if not, then seek forwards to the 
right columnFamily
-                                       if (termCompare > 0) {
-                                               Key seekKey = 
buildKey(currentPartition, sources[sourceID].term, currentDocID);
-                                               sources[sourceID].iter.seek(new 
Range(seekKey, true, null, false), sources[sourceID].seekColfams, true);
-                                               continue;
-                                       }
-                                       // check if this source is beyond the 
right columnFamily
-                                       // if so, then seek to the next row
-                                       if (termCompare < 0) {
-                                               // we're out of entries in the 
current row, so seek to the next one
-                                               // byte[] currentRowBytes = 
currentRow.getBytes();
-                                               // byte[] nextRow = new 
byte[currentRowBytes.length + 1];
-                                               // 
System.arraycopy(currentRowBytes, 0, nextRow, 0, currentRowBytes.length);
-                                               // 
nextRow[currentRowBytes.length] = (byte)0;
-                                               // // we should reuse text 
objects here
-                                               // sources[sourceID].seek(new 
Key(new Text(nextRow),columnFamilies[sourceID]));
-                                               if (endCompare == 0) {
-                                                       // we're done
-                                                       currentPartition = null;
-                                                       // setting currentRow 
to null counts as advancing the cursor
-                                                       return true;
-                                               }
-                                               Key seekKey = 
buildFollowingPartitionKey(sources[sourceID].iter.getTopKey());
-                                               sources[sourceID].iter.seek(new 
Range(seekKey, true, null, false), sources[sourceID].seekColfams, true);
-                                               continue;
-                                       }
-                               }
-                               // we have verified that we are in currentRow 
and the correct column family
-                               // make sure we are at or beyond columnQualifier
-                               Text docID = 
getDocID(sources[sourceID].iter.getTopKey());
-                               int docIDCompare = 
currentDocID.compareTo(docID);
-                               // if this source has advanced beyond the 
current column qualifier then advance currentCQ and return true
-                               if (docIDCompare < 0) {
-                                       currentDocID.set(docID);
-                                       advancedCursor = true;
-                                       break;
-                               }
-                               // if this source is not yet at the currentCQ 
then seek in this source
-                               if (docIDCompare > 0) {
-                                       // seek forwards
-                                       Key seekKey = 
buildKey(currentPartition, sources[sourceID].term, currentDocID);
-                                       sources[sourceID].iter.seek(new 
Range(seekKey, true, null, false), sources[sourceID].seekColfams, true);
-                                       continue;
-                               }
-                               // this source is at the current row, in its 
column family, and at currentCQ
-                               break;
-                       }
-               }
-               return advancedCursor;
-       }
-
-       @Override
-       public void next() throws IOException {
-               if (currentPartition == null) {
-                       return;
-               }
-               // precondition: the current row is set up and the sources all 
have the same column qualifier
-               // while we don't have a match, seek in the source with the 
smallest column qualifier
-               sources[0].iter.next();
-               advanceToIntersection();
-       }
-
-       protected void advanceToIntersection() throws IOException {
-               boolean cursorChanged = true;
-               while (cursorChanged) {
-                       // seek all of the sources to at least the highest seen 
column qualifier in the current row
-                       cursorChanged = false;
-                       for (int i = 0; i < sourcesCount; i++) {
-                               if (currentPartition == null) {
-                                       topKey = null;
-                                       return;
-                               }
-                               if (seekOneSource(i)) {
-                                       cursorChanged = true;
-                                       break;
-                               }
-                       }
-               }
-               topKey = buildKey(currentPartition, nullText, currentDocID);
-       }
-
-       public static String stringTopKey(SortedKeyValueIterator<Key, Value> 
iter) {
-               if (iter.hasTop())
-                       return iter.getTopKey().toString();
-               return "";
-       }
-
-       private static final String columnFamiliesOptionName = "columnFamilies";
-       private static final String notFlagOptionName = "notFlag";
-
-       /**
-        * @param columns
-        * @return encoded columns
-        * @deprecated since 1.4. To be made protected. Do not interact with 
flags string directly, just use
-        *             {@link #setColumnFamilies(IteratorSetting, Text[], 
boolean[])}.
-        */
-       public static String encodeColumns(Text[] columns) {
-               StringBuilder sb = new StringBuilder();
-               for (int i = 0; i < columns.length; i++) {
-                       sb.append(new 
String(Base64.encodeBase64(TextUtil.getBytes(columns[i]))));
-                       sb.append('\n');
-               }
-               return sb.toString();
-       }
-
-       /**
-        * @param flags
-        * @return encoded flags
-        * @deprecated since 1.4. To be made protected. Do not interact with 
flags string directly, just use
-        *             {@link #setColumnFamilies(IteratorSetting, Text[], 
boolean[])}.
-        */
-       public static String encodeBooleans(boolean[] flags) {
-               byte[] bytes = new byte[flags.length];
-               for (int i = 0; i < flags.length; i++) {
-                       if (flags[i])
-                               bytes[i] = 1;
-                       else
-                               bytes[i] = 0;
-               }
-               return new String(Base64.encodeBase64(bytes));
-       }
-
-       protected static Text[] decodeColumns(String columns) {
-               String[] columnStrings = columns.split("\n");
-               Text[] columnTexts = new Text[columnStrings.length];
-               for (int i = 0; i < columnStrings.length; i++) {
-                       columnTexts[i] = new 
Text(Base64.decodeBase64(columnStrings[i].getBytes()));
-               }
-               return columnTexts;
-       }
-
-       /**
-        * to be made protected
-        * 
-        * @param flags
-        * @return decoded flags
-        * @deprecated since 1.4. To be made protected. Do not interact with 
flags string directly, just use
-        *             {@link #setColumnFamilies(IteratorSetting, Text[], 
boolean[])}.
-        */
-       public static boolean[] decodeBooleans(String flags) {
-               // return null of there were no flags
-               if (flags == null)
-                       return null;
-
-               byte[] bytes = Base64.decodeBase64(flags.getBytes());
-               boolean[] bFlags = new boolean[bytes.length];
-               for (int i = 0; i < bytes.length; i++) {
-                       if (bytes[i] == 1)
-                               bFlags[i] = true;
-                       else
-                               bFlags[i] = false;
-               }
-               return bFlags;
-       }
-
-       @Override
-       public void init(SortedKeyValueIterator<Key, Value> source, Map<String, 
String> options, IteratorEnvironment env) throws IOException {
-               Text[] terms = 
decodeColumns(options.get(columnFamiliesOptionName));
-               boolean[] notFlag = 
decodeBooleans(options.get(notFlagOptionName));
-
-               if (terms.length < 2) {
-                       throw new 
IllegalArgumentException("IntersectionIterator requires two or more columns 
families");
-               }
-
-               // Scan the not flags.
-               // There must be at least one term that isn't negated
-               // And we are going to re-order such that the first term is not 
a ! term
-               if (notFlag == null) {
-                       notFlag = new boolean[terms.length];
-                       for (int i = 0; i < terms.length; i++)
-                               notFlag[i] = false;
-               }
-               if (notFlag[0]) {
-                       for (int i = 1; i < notFlag.length; i++) {
-                               if (notFlag[i] == false) {
-                                       Text swapFamily = new Text(terms[0]);
-                                       terms[0].set(terms[i]);
-                                       terms[i].set(swapFamily);
-                                       notFlag[0] = false;
-                                       notFlag[i] = true;
-                                       break;
-                               }
-                       }
-                       if (notFlag[0]) {
-                               throw new 
IllegalArgumentException("IntersectionIterator requires at lest one column 
family without not");
-                       }
-               }
-
-               sources = new TermSource[terms.length];
-               sources[0] = new TermSource(source, terms[0]);
-               for (int i = 1; i < terms.length; i++) {
-                       sources[i] = new TermSource(source.deepCopy(env), 
terms[i], notFlag[i]);
-               }
-               sourcesCount = terms.length;
-       }
-
-       @Override
-       public void seek(Range range, Collection<ByteSequence> 
seekColumnFamilies, boolean inclusive) throws IOException {
-               overallRange = new Range(range);
-               currentPartition = new Text();
-               currentDocID.set(emptyByteArray);
-
-               // seek each of the sources to the right column family within 
the row given by key
-               for (int i = 0; i < sourcesCount; i++) {
-                       Key sourceKey;
-                       if (range.getStartKey() != null) {
-                               if (range.getStartKey().getColumnQualifier() != 
null) {
-                                       sourceKey = 
buildKey(getPartition(range.getStartKey()), sources[i].term, 
range.getStartKey().getColumnQualifier());
-                               } else {
-                                       sourceKey = 
buildKey(getPartition(range.getStartKey()), sources[i].term);
-                               }
-                               // Seek only to the term for this source as a 
column family
-                               sources[i].iter.seek(new Range(sourceKey, true, 
null, false), sources[i].seekColfams, true);
-                       } else {
-                               // Seek only to the term for this source as a 
column family
-                               sources[i].iter.seek(range, 
sources[i].seekColfams, true);
-                       }
-               }
-               advanceToIntersection();
-       }
-
-       public void addSource(SortedKeyValueIterator<Key, Value> source, 
IteratorEnvironment env, Text term, boolean notFlag) {
-               // Check if we have space for the added Source
-               if (sources == null) {
-                       sources = new TermSource[1];
-               } else {
-                       // allocate space for node, and copy current tree.
-                       // TODO: Should we change this to an ArrayList so that 
we can just add() ?
-                       TermSource[] localSources = new 
TermSource[sources.length + 1];
-                       int currSource = 0;
-                       for (TermSource myTerm : sources) {
-                               // TODO: Do I need to call new here? or can I 
just re-use the term?
-                               localSources[currSource] = new 
TermSource(myTerm);
-                               currSource++;
-                       }
-                       sources = localSources;
-               }
-               sources[sourcesCount] = new TermSource(source.deepCopy(env), 
term, notFlag);
-               sourcesCount++;
-       }
-
-       /**
-        * Encode the columns to be used when iterating.
-        * 
-        * @param cfg
-        * @param columns
-        */
-       public static void setColumnFamilies(IteratorSetting cfg, Text[] 
columns) {
-               if (columns.length < 2)
-                       throw new IllegalArgumentException("Must supply at 
least two terms to intersect");
-               cfg.addOption(AndingIterator.columnFamiliesOptionName, 
AndingIterator.encodeColumns(columns));
-       }
-
-       /**
-        * Encode columns and NOT flags indicating which columns should be 
negated (docIDs will be excluded if matching negated columns, instead
-        * of included).
-        * 
-        * @param cfg
-        * @param columns
-        * @param notFlags
-        */
-       public static void setColumnFamilies(IteratorSetting cfg, Text[] 
columns, boolean[] notFlags) {
-               if (columns.length < 2)
-                       throw new IllegalArgumentException("Must supply at 
least two terms to intersect");
-               if (columns.length != notFlags.length)
-                       throw new IllegalArgumentException("columns and 
notFlags arrays must be the same length");
-               setColumnFamilies(cfg, columns);
-               cfg.addOption(AndingIterator.notFlagOptionName, 
AndingIterator.encodeBooleans(notFlags));
-       }
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/BooleanTreeIterator.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/BooleanTreeIterator.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/BooleanTreeIterator.java
deleted file mode 100644
index a69b78a..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/iterators/BooleanTreeIterator.java
+++ /dev/null
@@ -1,322 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext.iterators;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import static 
mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils.allChildrenAreNot;
-import static 
mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils.findFirstNonNotChild;
-import static 
mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils.getNodeIterator;
-import static mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils.isNotFlag;
-import static mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils.pushChild;
-import static 
mvm.rya.indexing.accumulo.freetext.query.ASTNodeUtils.swapChildren;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-
-import mvm.rya.indexing.accumulo.freetext.ColumnPrefixes;
-import mvm.rya.indexing.accumulo.freetext.query.ASTExpression;
-import mvm.rya.indexing.accumulo.freetext.query.ASTTerm;
-import mvm.rya.indexing.accumulo.freetext.query.ParseException;
-import mvm.rya.indexing.accumulo.freetext.query.QueryParser;
-import mvm.rya.indexing.accumulo.freetext.query.QueryParserTreeConstants;
-import mvm.rya.indexing.accumulo.freetext.query.SimpleNode;
-import mvm.rya.indexing.accumulo.freetext.query.TokenMgrError;
-
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.data.ByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorEnvironment;
-import org.apache.accumulo.core.iterators.OptionDescriber;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.iterators.system.MultiIterator;
-import org.apache.commons.lang.Validate;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.Logger;
-
-public class BooleanTreeIterator implements SortedKeyValueIterator<Key, 
Value>, OptionDescriber {
-    private static Logger logger = Logger.getLogger(BooleanTreeIterator.class);
-
-    private static String queryOptionName = "query";
-
-    private SortedKeyValueIterator<Key, Value> iter;
-    private SortedKeyValueIterator<Key, Value> docSource;
-
-    @Override
-    public void init(SortedKeyValueIterator<Key, Value> source, Map<String, 
String> options, IteratorEnvironment env) throws IOException {
-
-        // pull out the query
-        String query = options.get(queryOptionName);
-
-        // create the parse tree
-        SimpleNode root;
-        try {
-            root = QueryParser.parse(query);
-        } catch (ParseException e) {
-            // log and wrap in IOException
-            logger.error("ParseException encountered while parsing: " + query, 
e);
-            throw new IOException(e);
-        } catch (TokenMgrError e) {
-            // log and wrap in IOException
-            logger.error("TokenMgrError encountered while parsing: " + query, 
e);
-            throw new IOException(e);
-        }
-
-        docSource = source.deepCopy(env);
-        iter = createIterator((SimpleNode) root.jjtGetChild(0), source, env);
-    }
-
-    private SortedKeyValueIterator<Key, Value> createIterator(SimpleNode root, 
SortedKeyValueIterator<Key, Value> source,
-            IteratorEnvironment env) {
-        // if the root is only a single term, wrap it in an expression node
-        if (root instanceof ASTTerm) {
-            ASTExpression expression = new 
ASTExpression(QueryParserTreeConstants.JJTEXPRESSION);
-            expression.setNotFlag(false);
-            expression.setType(ASTExpression.AND);
-
-            pushChild(expression, root);
-            root.jjtSetParent(expression);
-
-            root = expression;
-        }
-
-        // Pre-process the tree to compensate for iterator specific issues 
with certain topologies
-        preProcessTree(root);
-
-        // Build an iterator tree
-        return createIteratorRecursive(root, source, env);
-    }
-
-    private SortedKeyValueIterator<Key, Value> 
createIteratorRecursive(SimpleNode node, SortedKeyValueIterator<Key, Value> 
source,
-            IteratorEnvironment env) {
-
-        Validate.isTrue(node instanceof ASTExpression, "node must be of type 
ASTExpression.  Node is instance of "
-                + node.getClass().getName());
-
-        ASTExpression expression = (ASTExpression) node;
-
-        if (expression.getType().equals(ASTExpression.AND)) {
-            return getAndIterator(node, source, env);
-        }
-
-        if (expression.getType().equals(ASTExpression.OR)) {
-            return getOrIterator(node, source, env);
-        }
-
-        throw new IllegalArgumentException("Expression is of unknown type: " + 
expression.getType());
-
-    }
-
-    private MultiIterator getOrIterator(SimpleNode node, 
SortedKeyValueIterator<Key, Value> source, IteratorEnvironment env) {
-        List<SortedKeyValueIterator<Key, Value>> iters = new 
ArrayList<SortedKeyValueIterator<Key, Value>>();
-
-        for (SimpleNode n : getNodeIterator(node)) {
-            if (n instanceof ASTExpression) {
-                iters.add(createIteratorRecursive(n, source, env));
-            } else if (n instanceof ASTTerm) {
-                iters.add(getSimpleAndingIterator((ASTTerm) n, source, env));
-            } else {
-                throw new IllegalArgumentException("Node is of unknown type: " 
+ n.getClass().getName());
-            }
-        }
-
-        return new MultiIterator(iters, new Range());
-    }
-
-    private AndingIterator getAndIterator(SimpleNode node, 
SortedKeyValueIterator<Key, Value> source, IteratorEnvironment env) {
-
-        AndingIterator anding = new AndingIterator();
-
-        for (SimpleNode n : getNodeIterator(node)) {
-            boolean isNotFlag = isNotFlag(n);
-            if (n instanceof ASTExpression) {
-                anding.addSource(createIteratorRecursive(n, source, env), env, 
null, isNotFlag);
-            } else if (n instanceof ASTTerm) {
-                ASTTerm term = ((ASTTerm) n);
-                anding.addSource(source, env, getTermColFam(term), isNotFlag);
-            } else {
-                throw new IllegalArgumentException("Node is of unknown type: " 
+ n.getClass().getName());
-            }
-        }
-
-        return anding;
-    }
-
-    private static Text getTermColFam(ASTTerm termnode) {
-        String term = termnode.getTerm();
-        if (term == null) {
-            // if the term is null, then I want all of the documents
-            return ColumnPrefixes.DOCS_CF_PREFIX;
-        }
-        if (term.contains("\0")) {
-            // if the term is contain a null char, then it's already formated 
for a CF
-            return new Text(term);
-        }
-
-        // otherwise, point to the term CF
-        return ColumnPrefixes.getTermColFam(term.toLowerCase());
-    }
-
-    private AndingIterator getSimpleAndingIterator(ASTTerm node, 
SortedKeyValueIterator<Key, Value> source, IteratorEnvironment env) {
-        Validate.isTrue(!node.isNotFlag(), "Simple Anding node must not have 
\"not\" flag set");
-
-        AndingIterator anding = new AndingIterator();
-        anding.addSource(source, env, getTermColFam(node), false);
-        return anding;
-    }
-
-    /**
-     * Handle "lonely nots" (i.e. expressions with only nots), "or" statements 
containing nots, and make sure that the first term in an
-     * "and" statement is not a not. This is due to implementation specific 
limitations of the iterators.
-     * <p>
-     * For example:
-     * <ul>
-     * <li>lonely nots: (!a & !b) -> [all] & !a & !b</li>
-     * <li>"or" nots: (!a | b) -> ( ([all] & !a) | b)</li>
-     * <li>reorder "and" nots: (!a & b) -> ( b & !a )</li>
-     * </ul>
-     **/
-    public static void preProcessTree(SimpleNode s) {
-        for (SimpleNode child : getNodeIterator(s)) {
-            preProcessTree(child);
-        }
-
-        if (s instanceof ASTExpression) {
-            ASTExpression expression = (ASTExpression) s;
-
-            if (expression.getType().equals(ASTExpression.AND)) {
-                if (allChildrenAreNot(expression)) {
-                    // lonely nots: (!a & !b) -> [all] & !a & !b
-                    ASTTerm allDocsTerm = createAllDocTermNode();
-                    pushChild(expression, allDocsTerm);
-                } else if (isNotFlag(expression.jjtGetChild(0))) {
-                    // reorder "and" nots: (!a & b) -> ( b & !a )
-                    int firstNonNotChild = findFirstNonNotChild(expression);
-                    swapChildren(expression, 0, firstNonNotChild);
-                }
-            }
-
-            if (expression.getType().equals(ASTExpression.OR)) {
-                for (int i = 0; i < expression.jjtGetNumChildren(); i++) {
-                    SimpleNode child = (SimpleNode) expression.jjtGetChild(i);
-                    if (isNotFlag(child)) {
-                        // "or" nots: (!a | b) -> ( ([all] & !a) | b)
-                        // create the new expression
-                        ASTExpression newExpression = new 
ASTExpression(QueryParserTreeConstants.JJTEXPRESSION);
-                        newExpression.setNotFlag(false);
-                        newExpression.setType(ASTExpression.AND);
-                        pushChild(newExpression, child);
-                        pushChild(newExpression, createAllDocTermNode());
-
-                        // tie the new expression to the old one
-                        newExpression.jjtSetParent(expression);
-                        expression.jjtAddChild(newExpression, i);
-                    }
-                }
-            }
-        }
-
-    }
-
-    public static ASTTerm createAllDocTermNode() {
-        ASTTerm t = new ASTTerm(QueryParserTreeConstants.JJTTERM);
-        t.setNotFlag(false);
-        t.setType(ASTTerm.TERM);
-        // note: a "null" signifies "all docs" should be returned.
-        t.setTerm(null);
-        return t;
-    }
-
-    @Override
-    public boolean hasTop() {
-        return iter.hasTop();
-    }
-
-    @Override
-    public void next() throws IOException {
-        iter.next();
-        if (iter.hasTop()) {
-            seekDocSource(iter.getTopKey());
-        }
-    }
-
-    @Override
-    public void seek(Range range, Collection<ByteSequence> columnFamilies, 
boolean inclusive) throws IOException {
-        iter.seek(range, columnFamilies, inclusive);
-        if (iter.hasTop()) {
-            seekDocSource(iter.getTopKey());
-        }
-    }
-
-    private void seekDocSource(Key key) throws IOException {
-        Key docKey = new Key(key.getRow(), ColumnPrefixes.DOCS_CF_PREFIX, 
key.getColumnQualifier());
-        docSource.seek(new Range(docKey, true, null, false), 
Collections.<ByteSequence> emptyList(), false);
-    }
-
-    @Override
-    public Key getTopKey() {
-        // from intersecting iterator:
-        // RowID: shardID
-        // CF: (empty)
-        // CQ: docID
-        return iter.getTopKey();
-    }
-
-    @Override
-    public Value getTopValue() {
-        if (!iter.hasTop()) {
-            throw new NoSuchElementException();
-        }
-
-        return docSource.getTopValue();
-    }
-
-    @Override
-    public SortedKeyValueIterator<Key, Value> deepCopy(IteratorEnvironment 
env) {
-        throw new UnsupportedOperationException();
-    }
-
-    public static void setQuery(IteratorSetting cfg, String query) {
-        cfg.addOption(BooleanTreeIterator.queryOptionName, query);
-    }
-
-    @Override
-    public IteratorOptions describeOptions() {
-        return new IteratorOptions("FreeTextBooleanTree", "Perform a FreeText 
Query on properly formated table",
-                Collections.singletonMap(queryOptionName, "the free text 
query"),
-                null);
-    }
-
-    @Override
-    public boolean validateOptions(Map<String, String> options) {
-        String q = options.get(queryOptionName);
-        if (q == null || q.isEmpty())
-            throw new IllegalArgumentException(queryOptionName + " must not be 
empty");
-        return true;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTExpression.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTExpression.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTExpression.java
deleted file mode 100644
index 95783e5..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTExpression.java
+++ /dev/null
@@ -1,63 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext.query;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-/**
- * This is a slightly modified version of the ASTExpression file created by 
JavaCC. This version adds more state to the standard ASTTerm
- * file including a "type", and "notFlag".
- */
-public class ASTExpression extends SimpleNode {
-       public static final String AND = "AND";
-       public static final String OR = "OR";
-
-       private String type = "";
-       private boolean notFlag = false;
-
-       public ASTExpression(int id) {
-               super(id);
-       }
-
-       public ASTExpression(QueryParser p, int id) {
-               super(p, id);
-       }
-
-       public void setType(String type) {
-               this.type = type;
-       }
-
-       public String getType() {
-               return type;
-       }
-
-       public boolean isNotFlag() {
-               return notFlag;
-       }
-
-       public void setNotFlag(boolean notFlag) {
-               this.notFlag = notFlag;
-       }
-
-       @Override
-       public String toString() {
-               return super.toString() + " [type: " + type + ", notFlag: " + 
notFlag + "]";
-       }
-}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5a03ef61/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTNodeUtils.java
----------------------------------------------------------------------
diff --git 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTNodeUtils.java
 
b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTNodeUtils.java
deleted file mode 100644
index 27edaac..0000000
--- 
a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/freetext/query/ASTNodeUtils.java
+++ /dev/null
@@ -1,210 +0,0 @@
-package mvm.rya.indexing.accumulo.freetext.query;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.Validate;
-
-public class ASTNodeUtils {
-
-       /**
-        * Serialize a node (and it's children) to a parsable string.
-        * 
-        * @param s
-        * @return
-        */
-       public static String serializeExpression(Node s) {
-               if (s instanceof ASTTerm) {
-                       ASTTerm a = (ASTTerm) s;
-                       return (a.isNotFlag() ? "!" : "") + " " + a.getTerm();
-               }
-
-               String prefix = "";
-               String suffix = "";
-               String join = " ";
-               if (s instanceof ASTExpression) {
-                       ASTExpression a = (ASTExpression) s;
-                       prefix = (a.isNotFlag() ? "!" : "") + "(";
-                       suffix = ")";
-                       join = " " + a.getType() + " ";
-               }
-
-               List<String> children = new ArrayList<String>();
-               for (int i = 0; i < s.jjtGetNumChildren(); i++) {
-                       children.add(serializeExpression(s.jjtGetChild(i)));
-               }
-               return prefix + StringUtils.join(children, join) + suffix;
-
-       }
-
-       /**
-        * count the number of terms in this query tree.
-        * 
-        * @param node
-        * @return
-        */
-       public static int termCount(Node node) {
-               if (node instanceof SimpleNode) {
-                       int count = 0;
-                       for (SimpleNode n : getNodeIterator((SimpleNode) node)) 
{
-                               count += termCount(n);
-                       }
-                       return count;
-               } else if (node instanceof ASTTerm) {
-                       return 1;
-               } else {
-                       throw new IllegalArgumentException("Node is of unknown 
type: " + node.getClass().getName());
-               }
-       }
-
-       /**
-        * Add the child as the parent's first child.
-        * 
-        * @param parent
-        * @param child
-        */
-       public static void pushChild(SimpleNode parent, SimpleNode child) {
-               // note: this implementation is very coupled with the 
SimpleNode jjt implementation
-               int parentSize = parent.jjtGetNumChildren();
-
-               // expand the parent node
-               parent.jjtAddChild(null, parentSize);
-
-               // get the current head child
-               Node currentHeadChild = parent.jjtGetChild(0);
-
-               // set the parameter as the parent's first child
-               parent.jjtAddChild(child, 0);
-
-               // add the former head child to the end of the list
-               if (currentHeadChild != null) {
-                       parent.jjtAddChild(currentHeadChild, parentSize);
-               }
-
-               // tie the child to the parent
-               child.jjtSetParent(parent);
-
-       }
-
-       /**
-        * Get the index of the child, -1 if child not found.
-        * 
-        * @param parent
-        * @param child
-        */
-       public static int getChildIndex(SimpleNode parent, SimpleNode child) {
-               int parentSize = parent.jjtGetNumChildren();
-
-               for (int i = 0; i < parentSize; i++) {
-                       if (child.equals(parent.jjtGetChild(i))) {
-                               return i;
-                       }
-               }
-
-               return -1;
-       }
-
-       /**
-        * return true is all of the node's children have the not flag enabled.
-        * 
-        * @param node
-        * @return
-        */
-       public static boolean allChildrenAreNot(ASTExpression node) {
-               for (SimpleNode child : getNodeIterator(node)) {
-                       if (!isNotFlag(child)) {
-                               return false;
-                       }
-               }
-               return true;
-       }
-
-       /**
-        * return the node's not flag value. node must be of type {@link 
ASTTerm} or {@link ASTExpression}
-        * 
-        * @param node
-        * @return
-        */
-       public static boolean isNotFlag(Node node) {
-               if (node instanceof ASTExpression) {
-                       return ((ASTExpression) node).isNotFlag();
-               } else if (node instanceof ASTTerm) {
-                       return ((ASTTerm) node).isNotFlag();
-               } else {
-                       throw new IllegalArgumentException("Node is of unknown 
type: " + node.getClass().getName());
-               }
-       }
-
-       public static Iterable<SimpleNode> getNodeIterator(final SimpleNode n) {
-               return new Iterable<SimpleNode>() {
-
-                       @Override
-                       public Iterator<SimpleNode> iterator() {
-                               return new Iterator<SimpleNode>() {
-                                       int pointer = 0;
-
-                                       @Override
-                                       public boolean hasNext() {
-                                               return pointer < 
n.jjtGetNumChildren();
-                                       }
-
-                                       @Override
-                                       public SimpleNode next() {
-                                               Node rtn = 
n.jjtGetChild(pointer);
-                                               pointer++;
-                                               return (SimpleNode) rtn;
-                                       }
-
-                                       @Override
-                                       public void remove() {
-                                               throw new 
UnsupportedOperationException();
-                                       }
-                               };
-                       }
-               };
-       }
-
-       public static void swapChildren(ASTExpression parent, int 
childOneIndex, int childTwoIndex) {
-               Validate.isTrue(childOneIndex > -1 && childOneIndex < 
parent.jjtGetNumChildren());
-               Validate.isTrue(childTwoIndex > -1 && childTwoIndex < 
parent.jjtGetNumChildren());
-
-               Node childOne = parent.jjtGetChild(childOneIndex);
-               Node childTwo = parent.jjtGetChild(childTwoIndex);
-               parent.jjtAddChild(childOne, childTwoIndex);
-               parent.jjtAddChild(childTwo, childOneIndex);
-       }
-
-       public static int findFirstNonNotChild(ASTExpression expression) {
-               for (int i = 0; i < expression.jjtGetNumChildren(); i++) {
-                       if (!isNotFlag(expression.jjtGetChild(i))) {
-                               return i;
-                       }
-               }
-               return -1;
-       }
-
-}

[36/51] [partial] incubator-rya git commit: Cannot delete temp branch, doc'd it.

Reply via email to