romseygeek commented on a change in pull request #1037: LUCENE-9062: 
QueryVisitor.consumeTermsMatching
URL: https://github.com/apache/lucene-solr/pull/1037#discussion_r350744040
 
 

 ##########
 File path: 
lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
 ##########
 @@ -47,44 +47,39 @@ public boolean run(byte[] s, int offset, int length) {
     return accept[p];
   }
 
-  public CharArrayMatcher asCharArrayMatcher(String label) {
-    return new CharArrayMatcher() {
-      @Override
-      public boolean run(char[] chars, int offset, int length) {
-        int state = 0;
-        final int maxIdx = offset + length;
-        for (int i = offset; i < maxIdx; i++) {
-          final int code = chars[i];
-          int b;
-          // UTF16 to UTF8   (inlined logic from UnicodeUtil.UTF16toUTF8 )
-          if (code < 0x80) {
-            state = step(state, code);
-            if (state == -1) return false;
-          } else if (code < 0x800) {
-            b = (0xC0 | (code >> 6));
-            state = step(state, b);
-            if (state == -1) return false;
-            b = (0x80 | (code & 0x3F));
-            state = step(state, b);
+  /**
+   * Returns a representation of the automaton that matches char[] instead of 
byte[]
+   */
+  public CharArrayMatcher asCharArrayMatcher() {
+    return (chars, offset, length) -> {
+      int state = 0;
+      final int maxIdx = offset + length;
+      for (int i = offset; i < maxIdx; i++) {
+        final int code = chars[i];
+        int b;
+        // UTF16 to UTF8   (inlined logic from UnicodeUtil.UTF16toUTF8 )
+        if (code < 0x80) {
+          state = step(state, code);
+          if (state == -1) return false;
+        } else if (code < 0x800) {
+          b = (0xC0 | (code >> 6));
+          state = step(state, b);
+          if (state == -1) return false;
+          b = (0x80 | (code & 0x3F));
+          state = step(state, b);
+          if (state == -1) return false;
+        } else {
+          // more complex
+          byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
 
 Review comment:
   ++ let's do that in a followup

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to