Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21501#discussion_r194346431 --- Diff: mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala --- @@ -65,6 +65,56 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest { testStopWordsRemover(remover, dataSet) } + test("StopWordsRemover with localed input (case insensitive)") { + val stopWords = Array("milk", "cookie") + val remover = new StopWordsRemover() + .setInputCol("raw") + .setOutputCol("filtered") + .setStopWords(stopWords) + .setLocale("tr") // Turkish alphabet: has no Q, W, X but has dotted and dotless 'I's. --- End diff -- Lets explicitly call `.setCaseSensitive(false)` here.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org