[GitHub] lucene-solr pull request #345: LUCENE-8229: Add Weight.matches() method

jpountz Fri, 06 Apr 2018 05:40:39 -0700

Github user jpountz commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/345#discussion_r179742139
  
    --- Diff: 
lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java 
---
    @@ -0,0 +1,160 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.lucene.search;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +import org.apache.lucene.index.LeafReaderContext;
    +import org.apache.lucene.index.PostingsEnum;
    +import org.apache.lucene.index.Term;
    +import org.apache.lucene.index.Terms;
    +import org.apache.lucene.index.TermsEnum;
    +import org.apache.lucene.util.BytesRef;
    +import org.apache.lucene.util.BytesRefIterator;
    +import org.apache.lucene.util.PriorityQueue;
    +
    +/**
    + * A {@link MatchesIterator} that combines matches from a set of 
sub-iterators
    + *
    + * Matches are sorted by their start positions, and then by their end 
positions, so that
    + * prefixes sort first.  Matches may overlap, or be duplicated if they 
appear in more
    + * than one of the sub-iterators.
    + */
    +public final class DisjunctionMatchesIterator implements MatchesIterator {
    +
    +  /**
    +   * Create a {@link DisjunctionMatchesIterator} over a list of terms
    +   *
    +   * Only terms that have at least one match in the given document will be 
included
    +   */
    +  public static DisjunctionMatchesIterator fromTerms(LeafReaderContext 
context, int doc, String field, List<Term> terms) throws IOException {
    +    return fromTermsEnum(context, doc, field, asBytesRefIterator(terms));
    +  }
    +
    +  private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
    +    return new BytesRefIterator() {
    +      int i = 0;
    +      @Override
    +      public BytesRef next() {
    +        if (i >= terms.size())
    +          return null;
    +        return terms.get(i++).bytes();
    +      }
    +    };
    +  }
    +
    +  /**
    +   * Create a {@link DisjunctionMatchesIterator} over a list of terms 
extracted from a {@link BytesRefIterator}
    +   *
    +   * Only terms that have at least one match in the given document will be 
included
    +   */
    +  public static DisjunctionMatchesIterator fromTermsEnum(LeafReaderContext 
context, int doc, String field, BytesRefIterator terms) throws IOException {
    +    List<MatchesIterator> mis = new ArrayList<>();
    +    Terms t = context.reader().terms(field);
    +    if (t == null)
    +      return null;
    +    TermsEnum te = t.iterator();
    +    PostingsEnum reuse = null;
    +    for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    +      if (te.seekExact(term)) {
    +        PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
    +        if (pe.advance(doc) == doc) {
    +          // TODO do we want to use the copied term here, or instead 
create a label that associates all of the TMIs with a single term?
    +          mis.add(new TermMatchesIterator(BytesRef.deepCopyOf(term), pe));
    +          reuse = null;
    +        }
    +        else {
    +          reuse = pe;
    +        }
    +      }
    +    }
    +    if (mis.size() == 0)
    +      return null;
    +    return new DisjunctionMatchesIterator(mis);
    --- End diff --
    
    should we specialize the size==1 case as well?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org

[GitHub] lucene-solr pull request #345: LUCENE-8229: Add Weight.matches() method

Reply via email to