[ https://issues.apache.org/jira/browse/SPARK-12451?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
William Dee updated SPARK-12451: -------------------------------- Description: When using the regexp functions in Spark SQL, patterns containing '*/' create runtime errors in the auto generated code. This is due to the fact that the code generator creates a multiline comment containing, amongst other things, the pattern. Here is an excerpt from my stacktrace to illustrate: (Helpfully, the stack trace includes all of the auto-generated code) {code} Caused by: org.codehaus.commons.compiler.CompileException: Line 232, Column 54: Unexpected token "," in primary at org.codehaus.janino.Parser.compileException(Parser.java:3125) at org.codehaus.janino.Parser.parsePrimary(Parser.java:2512) at org.codehaus.janino.Parser.parseUnaryExpression(Parser.java:2252) at org.codehaus.janino.Parser.parseMultiplicativeExpression(Parser.java:2211) at org.codehaus.janino.Parser.parseAdditiveExpression(Parser.java:2190) at org.codehaus.janino.Parser.parseShiftExpression(Parser.java:2169) at org.codehaus.janino.Parser.parseRelationalExpression(Parser.java:2072) at org.codehaus.janino.Parser.parseEqualityExpression(Parser.java:2046) at org.codehaus.janino.Parser.parseAndExpression(Parser.java:2025) at org.codehaus.janino.Parser.parseExclusiveOrExpression(Parser.java:2004) at org.codehaus.janino.Parser.parseInclusiveOrExpression(Parser.java:1983) at org.codehaus.janino.Parser.parseConditionalAndExpression(Parser.java:1962) at org.codehaus.janino.Parser.parseConditionalOrExpression(Parser.java:1941) at org.codehaus.janino.Parser.parseConditionalExpression(Parser.java:1922) at org.codehaus.janino.Parser.parseAssignmentExpression(Parser.java:1901) at org.codehaus.janino.Parser.parseExpression(Parser.java:1886) at org.codehaus.janino.Parser.parseBlockStatement(Parser.java:1149) at org.codehaus.janino.Parser.parseBlockStatements(Parser.java:1085) at org.codehaus.janino.Parser.parseMethodDeclarationRest(Parser.java:938) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:620) at org.codehaus.janino.Parser.parseClassBody(Parser.java:515) at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:481) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:577) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:192) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:84) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:77) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:387) ... line 232 ... /* regexp_replace(input[46, StringType],^.*/,) */ /* input[46, StringType] */ boolean isNull31 = i.isNullAt(46); UTF8String primitive32 = isNull31 ? null : (i.getUTF8String(46)); boolean isNull24 = true; UTF8String primitive25 = null; if (!isNull31) { /* ^.*/ */ /* expression: ^.*/ */ Object obj35 = expressions[4].eval(i); boolean isNull33 = obj35 == null; UTF8String primitive34 = null; if (!isNull33) { primitive34 = (UTF8String) obj35; } ... {code} Note the multiple multiline comments, these obviously break when the regex pattern contains the end-of-comment token '*/' was: When using the regexp functions in Spark SQL, patterns containing '*/' create runtime errors in the auto generated code. This is due to the fact that the code generator creates a multiline comment containing, amongst other things, the pattern. Here is an excerpt from my stacktrace to illustrate: {code} Caused by: org.codehaus.commons.compiler.CompileException: Line 232, Column 54: Unexpected token "," in primary at org.codehaus.janino.Parser.compileException(Parser.java:3125) at org.codehaus.janino.Parser.parsePrimary(Parser.java:2512) at org.codehaus.janino.Parser.parseUnaryExpression(Parser.java:2252) at org.codehaus.janino.Parser.parseMultiplicativeExpression(Parser.java:2211) at org.codehaus.janino.Parser.parseAdditiveExpression(Parser.java:2190) at org.codehaus.janino.Parser.parseShiftExpression(Parser.java:2169) at org.codehaus.janino.Parser.parseRelationalExpression(Parser.java:2072) at org.codehaus.janino.Parser.parseEqualityExpression(Parser.java:2046) at org.codehaus.janino.Parser.parseAndExpression(Parser.java:2025) at org.codehaus.janino.Parser.parseExclusiveOrExpression(Parser.java:2004) at org.codehaus.janino.Parser.parseInclusiveOrExpression(Parser.java:1983) at org.codehaus.janino.Parser.parseConditionalAndExpression(Parser.java:1962) at org.codehaus.janino.Parser.parseConditionalOrExpression(Parser.java:1941) at org.codehaus.janino.Parser.parseConditionalExpression(Parser.java:1922) at org.codehaus.janino.Parser.parseAssignmentExpression(Parser.java:1901) at org.codehaus.janino.Parser.parseExpression(Parser.java:1886) at org.codehaus.janino.Parser.parseBlockStatement(Parser.java:1149) at org.codehaus.janino.Parser.parseBlockStatements(Parser.java:1085) at org.codehaus.janino.Parser.parseMethodDeclarationRest(Parser.java:938) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:620) at org.codehaus.janino.Parser.parseClassBody(Parser.java:515) at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:481) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:577) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:192) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:84) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:77) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:387) ... line 232 ... /* regexp_replace(input[46, StringType],^.*/,) */ /* input[46, StringType] */ boolean isNull31 = i.isNullAt(46); UTF8String primitive32 = isNull31 ? null : (i.getUTF8String(46)); boolean isNull24 = true; UTF8String primitive25 = null; if (!isNull31) { /* ^.*/ */ /* expression: ^.*/ */ Object obj35 = expressions[4].eval(i); boolean isNull33 = obj35 == null; UTF8String primitive34 = null; if (!isNull33) { primitive34 = (UTF8String) obj35; } ... {code} Note the multiple multiline comments, these obviously break when the regex pattern contains the end-of-comment token '*/' > Regexp functions don't support patterns containing '*/' > ------------------------------------------------------- > > Key: SPARK-12451 > URL: https://issues.apache.org/jira/browse/SPARK-12451 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.5.2 > Reporter: William Dee > > When using the regexp functions in Spark SQL, patterns containing '*/' create > runtime errors in the auto generated code. This is due to the fact that the > code generator creates a multiline comment containing, amongst other things, > the pattern. > Here is an excerpt from my stacktrace to illustrate: (Helpfully, the stack > trace includes all of the auto-generated code) > {code} > Caused by: org.codehaus.commons.compiler.CompileException: Line 232, Column > 54: Unexpected token "," in primary > at org.codehaus.janino.Parser.compileException(Parser.java:3125) > at org.codehaus.janino.Parser.parsePrimary(Parser.java:2512) > at org.codehaus.janino.Parser.parseUnaryExpression(Parser.java:2252) > at > org.codehaus.janino.Parser.parseMultiplicativeExpression(Parser.java:2211) > at org.codehaus.janino.Parser.parseAdditiveExpression(Parser.java:2190) > at org.codehaus.janino.Parser.parseShiftExpression(Parser.java:2169) > at > org.codehaus.janino.Parser.parseRelationalExpression(Parser.java:2072) > at org.codehaus.janino.Parser.parseEqualityExpression(Parser.java:2046) > at org.codehaus.janino.Parser.parseAndExpression(Parser.java:2025) > at > org.codehaus.janino.Parser.parseExclusiveOrExpression(Parser.java:2004) > at > org.codehaus.janino.Parser.parseInclusiveOrExpression(Parser.java:1983) > at > org.codehaus.janino.Parser.parseConditionalAndExpression(Parser.java:1962) > at > org.codehaus.janino.Parser.parseConditionalOrExpression(Parser.java:1941) > at > org.codehaus.janino.Parser.parseConditionalExpression(Parser.java:1922) > at > org.codehaus.janino.Parser.parseAssignmentExpression(Parser.java:1901) > at org.codehaus.janino.Parser.parseExpression(Parser.java:1886) > at org.codehaus.janino.Parser.parseBlockStatement(Parser.java:1149) > at org.codehaus.janino.Parser.parseBlockStatements(Parser.java:1085) > at > org.codehaus.janino.Parser.parseMethodDeclarationRest(Parser.java:938) > at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:620) > at org.codehaus.janino.Parser.parseClassBody(Parser.java:515) > at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:481) > at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:577) > at > org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229) > at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:192) > at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:84) > at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:77) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:387) > ... line 232 ... > /* regexp_replace(input[46, StringType],^.*/,) */ > > /* input[46, StringType] */ > > boolean isNull31 = i.isNullAt(46); > UTF8String primitive32 = isNull31 ? null : (i.getUTF8String(46)); > > boolean isNull24 = true; > UTF8String primitive25 = null; > if (!isNull31) { > /* ^.*/ */ > > /* expression: ^.*/ */ > Object obj35 = expressions[4].eval(i); > boolean isNull33 = obj35 == null; > UTF8String primitive34 = null; > if (!isNull33) { > primitive34 = (UTF8String) obj35; > } > ... > {code} > Note the multiple multiline comments, these obviously break when the regex > pattern contains the end-of-comment token '*/' -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org