Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1001#discussion_r145576718 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java --- @@ -17,36 +17,133 @@ */ package org.apache.drill.exec.expr.fn.impl; -public class SqlPatternContainsMatcher implements SqlPatternMatcher { +public final class SqlPatternContainsMatcher implements SqlPatternMatcher { final String patternString; CharSequence charSequenceWrapper; final int patternLength; public SqlPatternContainsMatcher(String patternString, CharSequence charSequenceWrapper) { - this.patternString = patternString; + this.patternString = patternString; this.charSequenceWrapper = charSequenceWrapper; - patternLength = patternString.length(); + patternLength = patternString.length(); } @Override - public int match() { - final int txtLength = charSequenceWrapper.length(); - int patternIndex = 0; - int txtIndex = 0; + public final int match() { + // The idea is to write loops with simple condition checks to allow the Java Hotspot vectorize + // the generate code. + if (patternLength == 1) { + return match_1(); + } else if (patternLength == 2) { + return match_2(); + } else if (patternLength == 3) { + return match_3(); + } else { + return match_N(); + } + } + + private final int match_1() { --- End diff -- See note about UTF-8. If we don't care about the match position (that is, we don't need `strpos()`, and all we care is whether it matches or not, then we can do the work on the undecoded UTF-8 bytes, saving a large amount of complexity.
---