[GitHub] lucene-solr pull request #416: WIP: SOLR-12519

moshebla Tue, 28 Aug 2018 04:06:01 -0700

Github user moshebla commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/416#discussion_r213270075
  
    --- Diff: 
solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java 
---
    @@ -0,0 +1,263 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.solr.response.transform;
    +
    +import java.io.IOException;
    +import java.lang.invoke.MethodHandles;
    +import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +
    +import com.google.common.collect.ArrayListMultimap;
    +import com.google.common.collect.Multimap;
    +import org.apache.lucene.index.DocValues;
    +import org.apache.lucene.index.LeafReaderContext;
    +import org.apache.lucene.index.ReaderUtil;
    +import org.apache.lucene.index.SortedDocValues;
    +import org.apache.lucene.search.join.BitSetProducer;
    +import org.apache.lucene.util.BitSet;
    +import org.apache.solr.common.SolrDocument;
    +import org.apache.solr.search.DocSet;
    +import org.apache.solr.search.SolrDocumentFetcher;
    +import org.apache.solr.search.SolrIndexSearcher;
    +import org.apache.solr.search.SolrReturnFields;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import static 
org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR;
    +import static 
org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR;
    +import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
    +
    +class ChildDocTransformer extends DocTransformer {
    +  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    +
    +  private static final String ANON_CHILD_KEY = "_childDocuments_";
    +
    +  private final String name;
    +  private final BitSetProducer parentsFilter;
    +  private final DocSet childDocSet;
    +  private final int limit;
    +  private final boolean isNestedSchema;
    +
    +  private final SolrReturnFields childReturnFields = new 
SolrReturnFields();
    +
    +  ChildDocTransformer(String name, BitSetProducer parentsFilter,
    +                      DocSet childDocSet, boolean isNestedSchema, int 
limit) {
    +    this.name = name;
    +    this.parentsFilter = parentsFilter;
    +    this.childDocSet = childDocSet;
    +    this.limit = limit;
    +    this.isNestedSchema = isNestedSchema;
    +  }
    +
    +  @Override
    +  public String getName()  {
    +    return name;
    +  }
    +
    +  @Override
    +  public void transform(SolrDocument rootDoc, int rootDocId) {
    +    // note: this algorithm works if both if we have have _nest_path_  and 
also if we don't!
    +
    +    try {
    +
    +      // lookup what the *previous* rootDocId is, and figure which segment 
this is
    +      final SolrIndexSearcher searcher = context.getSearcher();
    +      final List<LeafReaderContext> leaves = 
searcher.getIndexReader().leaves();
    +      final int seg = ReaderUtil.subIndex(rootDocId, leaves);
    +      final LeafReaderContext leafReaderContext = leaves.get(seg);
    +      final int segBaseId = leafReaderContext.docBase;
    +      final int segRootId = rootDocId - segBaseId;
    +      final BitSet segParentsBitSet = 
parentsFilter.getBitSet(leafReaderContext);
    +
    +      final int segPrevRootId = segRootId==0? -1: 
segParentsBitSet.prevSetBit(segRootId - 1); // can return -1 and that's okay
    +
    +      if(segPrevRootId == (segRootId - 1)) {
    +        // doc has no children, return fast
    +        return;
    +      }
    +
    +      // we'll need this soon...
    +      final SortedDocValues segPathDocValues = 
DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME);
    +      // passing a different SortedDocValues obj since the child documents 
which come after are of smaller docIDs,
    +      // and the iterator can not be reversed.
    +      final String transformedDocPath = getPathByDocId(segRootId, 
DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME));
    +
    +      // the key in the Map is the document's ancestors key(one above the 
parent), while the key in the intermediate
    +      // MultiMap is the direct child document's key(of the parent 
document)
    +      Map<String, Multimap<String, SolrDocument>> 
pendingParentPathsToChildren = new HashMap<>();
    +
    +      SolrDocumentFetcher docFetcher = searcher.getDocFetcher();
    +      final int lastChildId = segBaseId + segPrevRootId + 1;
    +      // Loop each child ID up to the parent (exclusive).
    +      for (int docId = calcDocIdToIterateFrom(lastChildId, rootDocId); 
docId < rootDocId; ++docId) {
    +
    +        // get the path.  (note will default to ANON_CHILD_KEY if schema 
is not nested or empty string if blank)
    +        String fullDocPath = getPathByDocId(docId - segBaseId, 
segPathDocValues);
    +
    +        if(isNestedSchema && !fullDocPath.contains(transformedDocPath)) {
    --- End diff --
    
    Perhaps a better way to do this would be building a new Filter for every 
transformed doc e.g. `_nest_path_:transformedDocPath`?
    I am not quite sure of the performance overhead such technique would impose,
    WDYT?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] lucene-solr pull request #416: WIP: SOLR-12519

Reply via email to