bruno-roustant commented on a change in pull request #980: LUCENE-8920: Reduce the memory used by direct addressing of arcs URL: https://github.com/apache/lucene-solr/pull/980#discussion_r344284382
########## File path: lucene/core/src/java/org/apache/lucene/util/fst/FST.java ########## @@ -726,6 +759,57 @@ private void writeArrayPacked(Builder<T> builder, Builder.UnCompiledNode<T> node } } + private void writeArrayDirectAddressing(Builder<T> builder, Builder.UnCompiledNode<T> nodeIn, long fixedArrayStart, int maxBytesPerArc, int labelRange) { + int numPresenceBytes = getNumPresenceBytes(labelRange); + // expand the arcs in place, backwards + long srcPos = builder.bytes.getPosition(); + long destPos = fixedArrayStart + numPresenceBytes + nodeIn.numArcs * maxBytesPerArc; + // if destPos == srcPos it means all the arcs were the same length, and the array of them is *already* direct + assert destPos >= srcPos; + if (destPos > srcPos) { + builder.bytes.skipBytes((int) (destPos - srcPos)); + assert builder.bytes.getPosition() == destPos; + for (int arcIdx = nodeIn.numArcs - 1; arcIdx >= 0; arcIdx--) { + destPos -= maxBytesPerArc; + int arcLen = builder.reusedBytesPerArc[arcIdx]; + srcPos -= arcLen; + if (srcPos != destPos) { + assert destPos > srcPos: "destPos=" + destPos + " srcPos=" + srcPos + " arcIdx=" + arcIdx + " maxBytesPerArc=" + maxBytesPerArc + " reusedBytesPerArc[arcIdx]=" + builder.reusedBytesPerArc[arcIdx] + " nodeIn.numArcs=" + nodeIn.numArcs; + builder.bytes.copyBytes(srcPos, destPos, arcLen); + } + } + } + assert destPos - numPresenceBytes == fixedArrayStart; + writePresenceBits(builder, nodeIn, labelRange, fixedArrayStart); + } + + private void writePresenceBits(Builder<T> builder, Builder.UnCompiledNode<T> nodeIn, int labelRange, long dest) { + long bytePos = dest; + byte presenceBits = 1; // The first arc is always present. + int presenceIndex = 0; + int previousLabel = nodeIn.arcs[0].label; + for (int arcIdx = 1; arcIdx < nodeIn.numArcs; arcIdx++) { + int label = nodeIn.arcs[arcIdx].label; + presenceIndex += label - previousLabel; + while (presenceIndex >= 8) { Review comment: Ok ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org