Github user moshebla commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/416#discussion_r213270075 --- Diff: solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java --- @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.response.transform; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Multimap; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.util.BitSet; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SolrReturnFields; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR; +import static org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR; +import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME; + +class ChildDocTransformer extends DocTransformer { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static final String ANON_CHILD_KEY = "_childDocuments_"; + + private final String name; + private final BitSetProducer parentsFilter; + private final DocSet childDocSet; + private final int limit; + private final boolean isNestedSchema; + + private final SolrReturnFields childReturnFields = new SolrReturnFields(); + + ChildDocTransformer(String name, BitSetProducer parentsFilter, + DocSet childDocSet, boolean isNestedSchema, int limit) { + this.name = name; + this.parentsFilter = parentsFilter; + this.childDocSet = childDocSet; + this.limit = limit; + this.isNestedSchema = isNestedSchema; + } + + @Override + public String getName() { + return name; + } + + @Override + public void transform(SolrDocument rootDoc, int rootDocId) { + // note: this algorithm works if both if we have have _nest_path_ and also if we don't! + + try { + + // lookup what the *previous* rootDocId is, and figure which segment this is + final SolrIndexSearcher searcher = context.getSearcher(); + final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves(); + final int seg = ReaderUtil.subIndex(rootDocId, leaves); + final LeafReaderContext leafReaderContext = leaves.get(seg); + final int segBaseId = leafReaderContext.docBase; + final int segRootId = rootDocId - segBaseId; + final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext); + + final int segPrevRootId = segRootId==0? -1: segParentsBitSet.prevSetBit(segRootId - 1); // can return -1 and that's okay + + if(segPrevRootId == (segRootId - 1)) { + // doc has no children, return fast + return; + } + + // we'll need this soon... + final SortedDocValues segPathDocValues = DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME); + // passing a different SortedDocValues obj since the child documents which come after are of smaller docIDs, + // and the iterator can not be reversed. + final String transformedDocPath = getPathByDocId(segRootId, DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME)); + + // the key in the Map is the document's ancestors key(one above the parent), while the key in the intermediate + // MultiMap is the direct child document's key(of the parent document) + Map<String, Multimap<String, SolrDocument>> pendingParentPathsToChildren = new HashMap<>(); + + SolrDocumentFetcher docFetcher = searcher.getDocFetcher(); + final int lastChildId = segBaseId + segPrevRootId + 1; + // Loop each child ID up to the parent (exclusive). + for (int docId = calcDocIdToIterateFrom(lastChildId, rootDocId); docId < rootDocId; ++docId) { + + // get the path. (note will default to ANON_CHILD_KEY if schema is not nested or empty string if blank) + String fullDocPath = getPathByDocId(docId - segBaseId, segPathDocValues); + + if(isNestedSchema && !fullDocPath.contains(transformedDocPath)) { --- End diff -- Perhaps a better way to do this would be building a new Filter for every transformed doc e.g. `_nest_path_:transformedDocPath`? I am not quite sure of the performance overhead such technique would impose, WDYT?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org