dweiss commented on a change in pull request #657: LUCENE-8781: FST direct arc 
addressing
URL: https://github.com/apache/lucene-solr/pull/657#discussion_r279208143
 
 

 ##########
 File path: lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
 ##########
 @@ -281,10 +335,81 @@ protected void doSeekFloor() throws IOException {
       //System.out.println("  cycle upto=" + upto + " arc.label=" + arc.label 
+ " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + 
arc.isLast() + " bba=" + arc.bytesPerArc);
 
       if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
-        // Arcs are fixed array -- use binary search to find
-        // the target.
-
+        // arcs are in an array
+        
         final FST.BytesReader in = fst.getBytesReader();
+
+        if (arc.arcIdx == Integer.MIN_VALUE) {
+          // The array is addressed directly by label and may contain holes.
+          in.setPosition(arc.posArcsStart);
+          in.skipBytes(1);
+          int firstLabel = fst.readLabel(in);
+          int targetOffset = targetLabel - firstLabel;
+          if (targetOffset < 0) {
+            //System.out.println(" before first"); Very first arc is after our 
target TODO: if each
+            // arc could somehow read the arc just before, we can save this 
re-scan.  The ceil case
+            // doesn't need this because it reads the next arc instead:
+            while(true) {
+              // First, walk backwards until we find a first arc
+              // that's before our target label:
+              fst.readFirstTargetArc(getArc(upto-1), arc, fstReader);
+              if (arc.label < targetLabel) {
+                // Then, scan forwards to the arc just before
+                // the targetLabel:
+                while(!arc.isLast() && fst.readNextArcLabel(arc, in) < 
targetLabel) {
+                  fst.readNextArc(arc, fstReader);
+                }
+                pushLast();
+                return;
+              }
+              upto--;
+              if (upto == 0) {
+                return;
+              }
+              targetLabel = getTargetLabel();
+              arc = getArc(upto);
+            }
+          } else {
+            if (targetOffset >= arc.numArcs) {
+              arc.nextArc = arc.posArcsStart - arc.bytesPerArc * (arc.numArcs 
- 1);
+              fst.readNextRealArc(arc, in);
+              assert arc.isLast();
+              assert arc.label < targetLabel: "arc.label=" + arc.label + " vs 
targetLabel=" + targetLabel;
+              pushLast();
+              return;
+            }
+            arc.nextArc = arc.posArcsStart - arc.bytesPerArc * targetOffset;
+            fst.readNextRealArc(arc, in);
+            if (arc.label == targetLabel) {
+              // found -- copy pasta from below
 
 Review comment:
   pasta is good, but paste is better.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to