Hi,
I have some problems working with BinaryDocValues. The code below works
well with a few thousands of documents, but with more than 65000
documents it does not return the correct BinaryDocValues after the docId
(with docBase rebasing) reaches a certain id. From this point on, it
cycles returning the BinaryDocValues of first docs. I'm working wiht
lucene/solr 4.3.
I tested this code indexing 100000 documents, each with a
"binary_ids_campaigns" value equal to docId. After docId 65500 aprox. it
return BinaryDocValues corresponding to first doc ids. I have followed
the API instructions on how to rebase the docId, but I guess I'm missing
something. If someone could point me in the right direction, I would
really appreciate it.
Best regards,
Xavier
public void computeVals(ResponseBuilder rb, SolrCore core, final
CampaignObserver observer) {
RefCounted<SolrIndexSearcher> searchHolder = null;
try {
searchHolder = core.getNewestSearcher(false);
AtomicReader reader = searchHolder.get().getAtomicReader();
SolrIndexSearcher searcher = searchHolder.get();
idsCampaigns = reader.getBinaryDocValues("binary_ids_campaigns");
final float[] topscore = new float[]{Float.NEGATIVE_INFINITY};
CpcCollector delegate = new CpcCollector(reader, topscore,
observer, maxCpc, idsCampaigns, maxDocCpc);
DocSet filter = null;
//Only filter in ppc, not for search, in search only apply sorting
SolrIndexSearcher.ProcessedFilter pf =
searcher.getProcessedFilter(filter, rb.getFilters());
//Check for existing filters, apply them
if (pf != null && pf.filter != null) {
searcher.search(rb.getQuery(), pf.filter, delegate);
} else {
searcher.search(rb.getQuery(), delegate);
}
float[] collectedTopscore = delegate.getTopscore();
maxOrganicScore = collectedTopscore[0];
maxCpc = delegate.getMaxCpc();
if (core.getName().indexOf("ppc") > -1) {
filter = delegate.getDocSet();
List<Query> filters = rb.getFilters();
if (filters == null) {
filters = new ArrayList<Query>();
}
filters.add(new FilteredQuery(rb.getQuery(),
filter.getTopFilter()));
rb.setFilters(filters);
}
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Error loading FieldCache.Ints for idcampaigns field", e);
} finally {
if (searchHolder != null) {
searchHolder.decref();
}
}
}
----------------- Collector code -----------------------
public class CpcCollector extends Collector {
private static Logger log = LoggerFactory.getLogger(CpcCollector.class);
private SortedIntDocSet docSet = null;
Scorer scorer;
private final float[] topscore;
private CampaignObserver observer;
private float maxCpc;
private com.carrotsearch.hppc.IntArrayList idDocs;
private BinaryDocValues values;
private com.carrotsearch.hppc.IntFloatOpenHashMap maxDocCpc;//Maximum
cpc per document
private int docBase = 0;
/**
*
* @param reader
* @param topscore
* @param observer
* @param ids
* @param maxCpc
* @param values
*/
public CpcCollector(IndexReader reader, final float[] topscore,
CampaignObserver observer, float maxCpc, BinaryDocValues values,
com.carrotsearch.hppc.IntFloatOpenHashMap maxDocCpc) {
this.topscore = topscore;
this.observer = observer;
this.maxCpc = maxCpc;
idDocs = new com.carrotsearch.hppc.IntArrayList();
this.maxDocCpc = maxDocCpc;
this.values = values;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
float score = scorer.score();
if (score > getTopscore()[0]) {
topscore[0] = score;
}
BytesRef term = new BytesRef();
values.get(doc + docBase, term);
int size = (int) term.bytes[term.offset] * 4 + 1;
byte[] docValues = new byte[size];
ByteBuffer.wrap(term.bytes, term.offset, size).get(docValues, 0, size);
int[] campIds = observer.parseBinaryIdsOldSkoolWayArray(docValues);
if (campIds != null) {
float cpc = observer.getMaxActiveCpc(campIds);
getMaxDocCpc().put(doc + docBase, cpc);
if (cpc > 0) {
if (cpc > getMaxCpc()) {
maxCpc = cpc;
}
//active campaign
idDocs.add(doc + docBase);
}
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;//podria ser tru
}
@Override
public void setNextReader(AtomicReaderContext context) throws
IOException {
this.docBase = context.docBase;
}
/**
* @return the topscore
*/
public float[] getTopscore() {
return topscore;
}
/**
* @return the maxCpc
*/
public float getMaxCpc() {
return maxCpc;
}
/**
* @return the docSet
*/
public SortedIntDocSet getDocSet() {
docSet = new SortedIntDocSet(idDocs.toArray());
return docSet;
}
/**
* @return the maxDocCpc
*/
public com.carrotsearch.hppc.IntFloatOpenHashMap getMaxDocCpc() {
return maxDocCpc;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]