[ https://issues.apache.org/jira/browse/DRILL-8402?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17692805#comment-17692805 ]
ASF GitHub Bot commented on DRILL-8402: --------------------------------------- vvysotskyi commented on code in PR #2762: URL: https://github.com/apache/drill/pull/2762#discussion_r1116028724 ########## exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java: ########## @@ -293,6 +293,109 @@ public void eval() { } } + /* + * This function returns the capturing groups from a regex. + */ + @FunctionTemplate(name = "regexp_extract", scope = FunctionScope.SIMPLE, + outputWidthCalculatorType = OutputWidthCalculatorType.CUSTOM_FIXED_WIDTH_DEFAULT) + public static class RegexpExtract implements DrillSimpleFunc { + + @Param VarCharHolder input; + @Param(constant=true) VarCharHolder pattern; + @Inject + DrillBuf buffer; + @Workspace + java.util.regex.Matcher matcher; + @Workspace + org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; + @Output + ComplexWriter out; + + @Override + public void setup() { + matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); + } + + @Override + public void eval() { + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + + // Reusing same charSequenceWrapper, no need to pass it in. + matcher.reset(); + boolean result = matcher.find(); + + // Start the list here. If there are no matches, we return an empty list. + org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter listWriter = out.rootAsList(); + listWriter.startList(); + + if (result) { + org.apache.drill.exec.vector.complex.writer.VarCharWriter varCharWriter = listWriter.varChar(); + + for(int i = 1; i <= matcher.groupCount(); i++) { + final byte[] strBytes = matcher.group(i).getBytes(com.google.common.base.Charsets.UTF_8); Review Comment: `matcher.group(i)` creates and returns string > Add REGEXP_EXTRACT Function > --------------------------- > > Key: DRILL-8402 > URL: https://issues.apache.org/jira/browse/DRILL-8402 > Project: Apache Drill > Issue Type: Improvement > Components: Functions - Drill > Affects Versions: 1.21.0 > Reporter: Charles Givre > Assignee: Charles Givre > Priority: Major > Fix For: 1.21.1 > > > This PR adds two UDFs to Drill: > regexp_extract(<text>, <pattern>) which returns an array of strings which > were captured by capturing groups in the regex. > regexp_extract(<text>, <pattern>, <index>) returns the text captured by a > specific capturing group. -- This message was sent by Atlassian Jira (v8.20.10#820010)