Hi,
I've been trying to use the new/experimental positions code from LUCENE_2878 in solr. I've extended oashc.QueryComponent as a chance to learn the code a bit. I've been able to access (scorer) frequencies, but not interval positions. I'm not sure of the best method for sharing my test and component, so feel free to point me in a better direction. For now I'll include the component and test case below.

NOTES:

 * I picked the schema field 'test_posofftv' in an attempt to use a
   field stored with vectors, positions, offsets...
 * I'm not clear of the difference between
     o searcher.createNormalizedWeight(query) and
     o query.createWeight()
     o query.createWeight() seems to need a rewritten query, so perhaps
       searcher... is the right approach?
 * It looks like a simple queries (Boolean, Term,...) createWeight
   resolve to MatchOnlyTermScorers, which throw an
   UnsupportedOperationException.
     o Most likely I am missing something obvious in setup/implementation.
     o The phrase query gets me a functioning IntervalIterator, but
       null Intervals
 * I am naively setting parameters in the weight.scorer(), particularly
   scoreDocsInOrder and topScorer
 * I'm a bit lost on Scorer.advance(n), IntervalIterator.next(), and
   IntervalIterator.scorerAdvanced(n)



// ----------------- Start Component code ---------------------------
package org.apache.solr.handler.component;

import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.positions.Interval;
import org.apache.lucene.search.positions.IntervalIterator;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.List;


public class PositionFooComponent extends QueryComponent {
public static Logger log = LoggerFactory.getLogger(PositionFooComponent.class);

    /**
* Just testing -- let parent do the actual work, just want to see if I can access positions here * I not to be able to getLiveDocs() -- I'm doing something fubar but not sure what....
     */
    @Override
    public void process(ResponseBuilder rb) throws IOException {
        super.process(rb);      // useless for this setup?
log.info("PositionFooComponent process: " + rb); // sanity check that we are actually being called

        SolrQueryRequest req = rb.req;
        SolrIndexSearcher searcher = req.getSearcher();

//I'm not clear on the best practice/rules with contexts & leaves, could be the source of my problems? IndexReaderContext topReaderContext = searcher.getTopReaderContext();
        List<AtomicReaderContext> leaves = topReaderContext.leaves();

Query userQuery = rb.getQuery(); // anything special about query type and positions?
        log.debug("RB.queries: {}", userQuery);

Query rewrite = searcher.rewrite(userQuery); // saw a position test call rewrite, is this necesary?

        for (AtomicReaderContext arc : leaves) {
log.info("\nContext docbase:{} -- ord:{}", arc.docBase, arc.ord);
            AtomicReader r = arc.reader();

Weight weight = searcher.createNormalizedWeight(userQuery); // not sure of difference between searcher.createNormalizedWeight and query.createWeight Scorer scorer = weight.scorer(arc, true, true, true, true, true, r.getLiveDocs());
            int zero = scorer.advance(0);
            IntervalIterator positions = scorer.positions();
            Interval interval = positions.next();
log.info("position begin (will error if interval==null): {}", interval.begin); // interval is coming back null, I'm doing something wrong...

            Weight fooWeight = rewrite.createWeight(searcher);
Scorer fooScorer = fooWeight.scorer(arc, true, true, true, true, true, r.getLiveDocs());
            int fooZero = fooScorer.advance(0);
            IntervalIterator fooPositions = scorer.positions();
//            int fooZero2 = fooPositions.scorerAdvanced(fooZero);
            Interval fooInterval = fooPositions.next();
log.info("position begin (will error if interval==null): {}", fooInterval.begin); // interval is coming back null, I'm doing something wrong...

        }
    }
}


// ---------------------- Start test case ----------------------------
package org.apache.solr.handler.component;

import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;


public class PositionFooComponentTest extends SolrTestCaseJ4 {
    public static final String FIELD = "test_posofftv";
  //    public static final String FIELD = "subject";

      @BeforeClass
      public static void beforeClass() throws Exception {
          initCore("solrconfig-foo.xml", "schema.xml");

          assertU(adoc("id", "1", FIELD, "one two three four"));
          assertU(adoc("id", "2", FIELD, "two three four"));
          assertU(adoc("id", "3", FIELD, "three four"));
          assertU(adoc("id", "4", FIELD, "four"));
          assertU(commit());
      }


      public void testTwoThree() {
SolrQueryRequest sqr = req(CommonParams.DF, FIELD, CommonParams.Q, "(\"two three\"~3)", CommonParams.FL, "id, " + FIELD + ", score", CommonParams.QT, "foo");
          assertQ("Error in testing?", sqr, "//*[@numFound='2']");      //
      }
}


// ---------------- Start solrconfig-foo.xml --------------------------
<?xml version="1.0" encoding="UTF-8" ?>
<config>
  <luceneMatchVersion>LUCENE_50</luceneMatchVersion>
  <dataDir>${solr.data.dir:}</dataDir>

<!--<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>-->

<searchComponent name="fooComponent" class="org.apache.solr.handler.component.PositionFooComponent">
    <lst name="defaults">
      <str name="fl">test_posofftv</str>
    </lst>
  </searchComponent>

<requestHandler name="foo" class="org.apache.solr.handler.component.SearchHandler">
      <lst name="defaults">
          <!--<str name="defType">sdr</str>-->
          <str name="fl">id,test_posofftv</str>
      </lst>
      <arr name="components">
          <str>fooComponent</str>
      </arr>
  </requestHandler>


</config>


The assertQ should be ignored, it is just a quick way to get the query to run (I am overdue to digging into the test framework and learning to use it properly). At the moment, the Intervals in the component come back null. I assume this is because I am misusing the new code. If anyone could straighten me out on how to 'properly' use the positions code in Solr I would be most grateful.

Thanks,

Sean


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to