Github user paul-rogers commented on a diff in the pull request:

    https://github.com/apache/drill/pull/1001#discussion_r145577894
  
    --- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java
 ---
    @@ -17,36 +17,133 @@
      */
     package org.apache.drill.exec.expr.fn.impl;
     
    -public class SqlPatternContainsMatcher implements SqlPatternMatcher {
    +public final class SqlPatternContainsMatcher implements SqlPatternMatcher {
       final String patternString;
       CharSequence charSequenceWrapper;
       final int patternLength;
     
       public SqlPatternContainsMatcher(String patternString, CharSequence 
charSequenceWrapper) {
    -    this.patternString = patternString;
    +    this.patternString       = patternString;
         this.charSequenceWrapper = charSequenceWrapper;
    -    patternLength = patternString.length();
    +    patternLength            = patternString.length();
       }
     
       @Override
    -  public int match() {
    -    final int txtLength = charSequenceWrapper.length();
    -    int patternIndex = 0;
    -    int txtIndex = 0;
    +  public final int match() {
    +    // The idea is to write loops with simple condition checks to allow 
the Java Hotspot vectorize
    +    // the generate code.
    +    if (patternLength == 1) {
    +      return match_1();
    +    } else if (patternLength == 2) {
    +      return match_2();
    +    } else if (patternLength == 3) {
    +      return match_3();
    +    } else {
    +      return match_N();
    +    }
    +  }
    +
    +  private final int match_1() {
    +    final CharSequence sequenceWrapper = charSequenceWrapper;
    +    final int lengthToProcess          = sequenceWrapper.length();
    +    final char first_patt_char         = patternString.charAt(0);
    +
    +    // simplePattern string has meta characters i.e % and _ and escape 
characters removed.
    +    // so, we can just directly compare.
    +    for (int idx = 0; idx < lengthToProcess; idx++) {
    +      char input_char = sequenceWrapper.charAt(idx);
    +
    +      if (first_patt_char != input_char) {
    +        continue;
    +      }
    +      return 1;
    +    }
    +    return 0;
    +  }
    +
    +  private final int match_2() {
    +    final CharSequence sequenceWrapper = charSequenceWrapper;
    +    final int lengthToProcess          = sequenceWrapper.length() - 1;
    +    final char first_patt_char         = patternString.charAt(0);
    +
    +    // simplePattern string has meta characters i.e % and _ and escape 
characters removed.
    +    // so, we can just directly compare.
    +    for (int idx = 0; idx < lengthToProcess; idx++) {
    +      char input_char = sequenceWrapper.charAt(idx);
    +
    +      if (first_patt_char != input_char) {
    +        continue;
    +      } else {
    +        char ch2_1 = sequenceWrapper.charAt(idx+1);
    +        char ch2_2 = patternString.charAt(1);
    +
    +        if (ch2_1 == ch2_2) {
    +          return 1;
    +        }
    +      }
    +    }
    +    return 0;
    +  }
    +
    +  private final int match_3() {
    +    final CharSequence sequenceWrapper = charSequenceWrapper;
    +    final int lengthToProcess          = sequenceWrapper.length() -2;
    +    final char first_patt_char         = patternString.charAt(0);
     
         // simplePattern string has meta characters i.e % and _ and escape 
characters removed.
         // so, we can just directly compare.
    -    while (patternIndex < patternLength && txtIndex < txtLength) {
    -      if (patternString.charAt(patternIndex) != 
charSequenceWrapper.charAt(txtIndex)) {
    -        // Go back if there is no match
    -        txtIndex = txtIndex - patternIndex;
    -        patternIndex = 0;
    +    for (int idx = 0; idx < lengthToProcess; idx++) {
    +      char input_char = sequenceWrapper.charAt(idx);
    +
    +      if (first_patt_char != input_char) {
    +        continue;
           } else {
    -        patternIndex++;
    +        char ch2_1 = sequenceWrapper.charAt(idx+1);
    +        char ch2_2 = patternString.charAt(1);
    +        char ch3_1 = sequenceWrapper.charAt(idx+2);
    +        char ch3_2 = patternString.charAt(2);
    +
    +        if (ch2_1 == ch2_2 && ch3_1 == ch3_2) {
    +          return 1;
    +        }
           }
    -      txtIndex++;
    +    }
    +    return 0;
    +  }
    +
    +  private final int match_N() {
    +
    +    if (patternLength == 0) {
    --- End diff --
    
    Can't this be optimized away in the pattern parser stage? We should not be 
calling this function if we had a zero-length pattern.


---

Reply via email to