Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1001#discussion_r145577894 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java --- @@ -17,36 +17,133 @@ */ package org.apache.drill.exec.expr.fn.impl; -public class SqlPatternContainsMatcher implements SqlPatternMatcher { +public final class SqlPatternContainsMatcher implements SqlPatternMatcher { final String patternString; CharSequence charSequenceWrapper; final int patternLength; public SqlPatternContainsMatcher(String patternString, CharSequence charSequenceWrapper) { - this.patternString = patternString; + this.patternString = patternString; this.charSequenceWrapper = charSequenceWrapper; - patternLength = patternString.length(); + patternLength = patternString.length(); } @Override - public int match() { - final int txtLength = charSequenceWrapper.length(); - int patternIndex = 0; - int txtIndex = 0; + public final int match() { + // The idea is to write loops with simple condition checks to allow the Java Hotspot vectorize + // the generate code. + if (patternLength == 1) { + return match_1(); + } else if (patternLength == 2) { + return match_2(); + } else if (patternLength == 3) { + return match_3(); + } else { + return match_N(); + } + } + + private final int match_1() { + final CharSequence sequenceWrapper = charSequenceWrapper; + final int lengthToProcess = sequenceWrapper.length(); + final char first_patt_char = patternString.charAt(0); + + // simplePattern string has meta characters i.e % and _ and escape characters removed. + // so, we can just directly compare. + for (int idx = 0; idx < lengthToProcess; idx++) { + char input_char = sequenceWrapper.charAt(idx); + + if (first_patt_char != input_char) { + continue; + } + return 1; + } + return 0; + } + + private final int match_2() { + final CharSequence sequenceWrapper = charSequenceWrapper; + final int lengthToProcess = sequenceWrapper.length() - 1; + final char first_patt_char = patternString.charAt(0); + + // simplePattern string has meta characters i.e % and _ and escape characters removed. + // so, we can just directly compare. + for (int idx = 0; idx < lengthToProcess; idx++) { + char input_char = sequenceWrapper.charAt(idx); + + if (first_patt_char != input_char) { + continue; + } else { + char ch2_1 = sequenceWrapper.charAt(idx+1); + char ch2_2 = patternString.charAt(1); + + if (ch2_1 == ch2_2) { + return 1; + } + } + } + return 0; + } + + private final int match_3() { + final CharSequence sequenceWrapper = charSequenceWrapper; + final int lengthToProcess = sequenceWrapper.length() -2; + final char first_patt_char = patternString.charAt(0); // simplePattern string has meta characters i.e % and _ and escape characters removed. // so, we can just directly compare. - while (patternIndex < patternLength && txtIndex < txtLength) { - if (patternString.charAt(patternIndex) != charSequenceWrapper.charAt(txtIndex)) { - // Go back if there is no match - txtIndex = txtIndex - patternIndex; - patternIndex = 0; + for (int idx = 0; idx < lengthToProcess; idx++) { + char input_char = sequenceWrapper.charAt(idx); + + if (first_patt_char != input_char) { + continue; } else { - patternIndex++; + char ch2_1 = sequenceWrapper.charAt(idx+1); + char ch2_2 = patternString.charAt(1); + char ch3_1 = sequenceWrapper.charAt(idx+2); + char ch3_2 = patternString.charAt(2); + + if (ch2_1 == ch2_2 && ch3_1 == ch3_2) { + return 1; + } } - txtIndex++; + } + return 0; + } + + private final int match_N() { + + if (patternLength == 0) { --- End diff -- Can't this be optimized away in the pattern parser stage? We should not be calling this function if we had a zero-length pattern.
---