Hi Janardhan, You need the classifier "models" attribute on the second entry for stanford-corenlp to indicate that you want the models JAR, as shown below. Right now you are importing two instances of stanford-corenlp JARs.
libraryDependencies ++= { val sparkVersion = "2.0.0" Seq( "org.apache.spark" %% "spark-core" % sparkVersion % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", "org.apache.spark" %% "spark-streaming" % sparkVersion % "provided", "org.apache.spark" %% "spark-mllib" % sparkVersion % "provided", "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0", "com.google.protobuf" % "protobuf-java" % "2.6.1", "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0" classifier "models", "org.scalatest" %% "scalatest" % "2.2.6" % "test" ) } -sujit On Sun, Sep 18, 2016 at 5:12 PM, janardhan shetty <janardhan...@gmail.com> wrote: > Hi Sujit, > > Tried that option but same error: > > java version "1.8.0_51" > > > libraryDependencies ++= { > val sparkVersion = "2.0.0" > Seq( > "org.apache.spark" %% "spark-core" % sparkVersion % "provided", > "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", > "org.apache.spark" %% "spark-streaming" % sparkVersion % "provided", > "org.apache.spark" %% "spark-mllib" % sparkVersion % "provided", > "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0", > "com.google.protobuf" % "protobuf-java" % "2.6.1", > "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0", > "org.scalatest" %% "scalatest" % "2.2.6" % "test" > ) > } > > Error: > > Exception in thread "main" java.lang.NoClassDefFoundError: > edu/stanford/nlp/pipeline/StanfordCoreNLP > at transformers.ml.Lemmatizer$$anonfun$createTransformFunc$1. > apply(Lemmatizer.scala:37) > at transformers.ml.Lemmatizer$$anonfun$createTransformFunc$1. > apply(Lemmatizer.scala:33) > at org.apache.spark.sql.catalyst.expressions.ScalaUDF$$anonfun$ > 2.apply(ScalaUDF.scala:88) > at org.apache.spark.sql.catalyst.expressions.ScalaUDF$$anonfun$ > 2.apply(ScalaUDF.scala:87) > at org.apache.spark.sql.catalyst.expressions.ScalaUDF.eval( > ScalaUDF.scala:1060) > at org.apache.spark.sql.catalyst.expressions.Alias.eval( > namedExpressions.scala:142) > at org.apache.spark.sql.catalyst.expressions. > InterpretedProjection.apply(Projection.scala:45) > at org.apache.spark.sql.catalyst.expressions. > InterpretedProjection.apply(Projection.scala:29) > at scala.collection.TraversableLike$$anonfun$map$ > 1.apply(TraversableLike.scala:234) > at scala.collection.TraversableLike$$anonfun$map$ > 1.apply(TraversableLike.scala:234) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.map( > TraversableLike.scala:234) > > > > On Sun, Sep 18, 2016 at 2:21 PM, Sujit Pal <sujitatgt...@gmail.com> wrote: > >> Hi Janardhan, >> >> Maybe try removing the string "test" from this line in your build.sbt? >> IIRC, this restricts the models JAR to be called from a test. >> >> "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0" % "test" classifier >> "models", >> >> -sujit >> >> >> On Sun, Sep 18, 2016 at 11:01 AM, janardhan shetty < >> janardhan...@gmail.com> wrote: >> >>> Hi, >>> >>> I am trying to use lemmatization as a transformer and added belwo to the >>> build.sbt >>> >>> "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0", >>> "com.google.protobuf" % "protobuf-java" % "2.6.1", >>> "edu.stanford.nlp" % "stanford-corenlp" % "3.6.0" % "test" >>> classifier "models", >>> "org.scalatest" %% "scalatest" % "2.2.6" % "test" >>> >>> >>> Error: >>> *Exception in thread "main" java.lang.NoClassDefFoundError: >>> edu/stanford/nlp/pipeline/StanfordCoreNLP* >>> >>> I have tried other versions of this spark package. >>> >>> Any help is appreciated.. >>> >> >> >