On Aug 13, 2013, at 12:55 PM, Michael McCandless wrote:
> I'm less familiar with the older highlighters but likely it's possible
> to get the absolute offsets from them as well.
Using vector highlighter I've achieved that by extending and cloning the code
of ScoreOrderFragmentsBuilder#makeFragment something like this:
final List offsets = new ArrayList<>();
ScoreOrderFragmentsBuilder fragmentsBuilder = new
ScoreOrderFragmentsBuilder(new String[]{highlightBlockStart}, new
String[]{highlightBlockEnd}) {
@Override
protected String makeFragment(StringBuilder buffer, int[] index,
Field[] values, FieldFragList.WeightedFragInfo fragInfo, String[] preTags,
String[] postTags, Encoder encoder) {
// todo You might not want to keep the StringBuilder if only
accessing offsets...
StringBuilder fragment = new StringBuilder();
final int s = fragInfo.getStartOffset();
int[] modifiedStartOffset = {s};
String src = getFragmentSourceMSO(buffer, index, values, s,
fragInfo.getEndOffset(), modifiedStartOffset);
int srcIndex = 0;
for (FieldFragList.WeightedFragInfo.SubInfo subInfo :
fragInfo.getSubInfos()) {
for (FieldPhraseList.WeightedPhraseInfo.Toffs to :
subInfo.getTermsOffsets()) {
offsets.add(new int[]{to.getStartOffset(),
to.getEndOffset()});
fragment
.append(encoder.encodeText(src.substring(srcIndex,
to.getStartOffset() - modifiedStartOffset[0])))
.append(getPreTag(preTags, subInfo.getSeqnum()))
.append(encoder.encodeText(src.substring(to.getStartOffset() -
modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0])))
.append(getPostTag(postTags, subInfo.getSeqnum()));
srcIndex = to.getEndOffset() - modifiedStartOffset[0];
}
}
fragment.append(encoder.encodeText(src.substring(srcIndex)));
return fragment.toString();
}
};
FastVectorHighlighter fastVectorHighlighter = new
FastVectorHighlighter(true, true, fragListBuilder, fragmentsBuilder);
String fragment = fastVectorHighlighter.getBestFragment(fieldQuery,
finalReader, 0, fieldName, fragmentLength);
for (int[] offset : offsets) {
...
}
kalle