viirya commented on a change in pull request #24939: [SPARK-18569][ML][R] 
Support RFormula arithmetic, I() and spark functions
URL: https://github.com/apache/spark/pull/24939#discussion_r365040912
 
 

 ##########
 File path: 
mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala
 ##########
 @@ -269,40 +310,81 @@ private[ml] object RFormulaParser extends RegexParsers {
   }
 
   private val intercept: Parser[Term] =
-    "([01])".r ^^ { case a => Intercept(a == "1") }
+    skipSpace("([01])".r) ^^ { case a => Intercept(a == "1") }
 
   private val columnRef: Parser[ColumnRef] =
-    "([a-zA-Z]|\\.[a-zA-Z_])[a-zA-Z0-9._]*".r ^^ { case a => ColumnRef(a) }
+    skipSpace("([a-zA-Z]|\\.[a-zA-Z_])[a-zA-Z0-9._]*".r) ^^ { case a => 
ColumnRef(a) }
 
-  private val empty: Parser[ColumnRef] = "" ^^ { case a => ColumnRef("") }
+  private val empty: Parser[ColumnRef] = skipSpace("".r) ^^ { case a => 
ColumnRef("") }
 
-  private val label: Parser[ColumnRef] = columnRef | empty
+  private val label: Parser[Label] = evalExpr | columnRef | empty
 
-  private val dot: Parser[Term] = "\\.".r ^^ { case _ => Dot }
+  private val dot: Parser[Term] = skipSpace("\\.".r) ^^ { case _ => Dot }
 
-  private val parens: Parser[Term] = "(" ~> expr <~ ")"
+  private val parens: Parser[Term] = skipSpace("\\(".r) ~> expr <~ 
skipSpace("\\)".r)
 
-  private val term: Parser[Term] = parens | intercept | columnRef | dot
+  private val term: Parser[Term] = evalExpr | parens | intercept | columnRef | 
dot
 
-  private val pow: Parser[Term] = term ~ "^" ~ "^[1-9]\\d*".r ^^ {
+  private val pow: Parser[Term] = term ~ "^" ~ skipSpace("^[1-9]\\d*".r) ^^ {
     case base ~ "^" ~ degree => power(base, degree.toInt)
   } | term
 
-  private val interaction: Parser[Term] = pow * (":" ^^^ { interact _ })
+  private val interaction: Parser[Term] = pow * (skipSpace("\\:".r) ^^^ { 
interact _ })
 
-  private val factor = interaction * ("*" ^^^ { cross _ })
+  private val factor = interaction * (skipSpace("\\*".r) ^^^ { cross _ })
 
-  private val sum = factor * ("+" ^^^ { add _ } |
-    "-" ^^^ { subtract _ })
+  private val sum = factor * (skipSpace("\\+".r) ^^^ { add _ } |
+    skipSpace("\\-".r) ^^^ { subtract _ })
 
   private val expr = (sum | term)
 
-  private val formula: Parser[ParsedRFormula] =
-    (label ~ "~" ~ expr) ^^ { case r ~ "~" ~ t => ParsedRFormula(r, 
t.asTerms.terms) }
+  private val formula: Parser[ParsedRFormula] = (label ~ skipSpace("\\~".r) ~ 
expr) ^^ {
+    case r ~ "~" ~ t => ParsedRFormula(r, t.asTerms.terms) }
 
   def parse(value: String): ParsedRFormula = parseAll(formula, value) match {
     case Success(result, _) => result
     case failure: NoSuccess => throw new IllegalArgumentException(
       "Could not parse formula: " + value)
   }
 }
+
+/**
+ * Parser for evaluated expressions in a formula. An evaluated expression is
+ * any alphanumeric identifiers followed by (), e.g. `func123(a+b, func())`,
+ * or anything inside `I()`. A valid expression is any string-parentheses 
product,
+ * such that if there are any parentheses ('(' or ')') they're all balanced.
+ */
+private[ml] trait EvalExprParser extends RegexParsers {
+
+  /* Any char except `(` or `)`. */
+  private def char: Parser[Char] = """[^\(\)]""".r ^^ { _.head }
+
+  /* Characters followed by any number of numeric characters. */
+  private def functionIdentifier: Parser[String] = """[a-zA-Z_]\w*""".r
+
+  private val space = "[ \\n]*".r
+
+  private def string: Parser[String] = rep(char) ^^ {_.mkString}
+
+  private def nonEmptyString: Parser[String] = rep1(char) ^^ {_.mkString}
+
+  private def stringParenProduct: Parser[String] = ((string) ~ paren ~ 
(string)) ^^ {
+    case l ~ m ~ r => l + m  + r }
 
 Review comment:
   extra space after `m`.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to