[
https://issues.apache.org/jira/browse/MAHOUT-2093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17048853#comment-17048853
]
Stefan Goldener edited comment on MAHOUT-2093 at 3/2/20 8:44 AM:
-----------------------------------------------------------------
What is really interesting... why do all test run successfully (without
-DskipTests)? Looks like the tests seem different to the true environment.
Here to build with main branch:
{code:yaml}
FROM openjdk:8-alpineENV spark_uid=185
ENV SCALA_MAJOR=2.11
ENV SCALA_MAJOR_MINOR=2.11.12
ENV HADOOP_MAJOR=2.7
ENV SPARK_MAJOR_MINOR=2.4.5
ENV MAHOUT_MAJOR_MINOR=14.1
ENV MAHOUT_VERSION=mahout-${MAHOUT_MAJOR_MINOR}
ENV MAHOUT_BASE=/opt/mahout
ENV MAHOUT_HOME=${MAHOUT_BASE}/${MAHOUT_VERSION}
ENV SPARK_VERSION=spark-${SPARK_MAJOR_MINOR}
ENV SPARK_BASE=/opt/spark
ENV SPARK_HOME=${SPARK_BASE}/${SPARK_VERSION}
ENV MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
ENV
SPARK_SRC_URL="https://archive.apache.org/dist/spark/${SPARK_VERSION}/${SPARK_VERSION}.tgz"
ENV ZINC_PORT=3030
### build spark
RUN set -ex && \
apk upgrade --no-cache && \
ln -s /lib /lib64 && \
apk add --no-cache bash python py-pip tini libc6-compat linux-pam krb5
krb5-libs nss curl openssl git maven && \
pip install setuptools && \
mkdir -p ${MAHOUT_HOME} && \
mkdir -p ${SPARK_BASE} && \
curl -LfsS ${SPARK_SRC_URL} -o ${SPARK_HOME}.tgz && \
tar -xzvf ${SPARK_HOME}.tgz -C ${SPARK_BASE}/ && \
rm ${SPARK_HOME}.tgz && \
export
PATH=$PATH:$MAHOUT_HOME/bin:$MAHOUT_HOME/lib:$SPARK_HOME/bin:$JAVA_HOME/bin && \
bash ${SPARK_HOME}/dev/change-scala-version.sh ${SCALA_MAJOR} && \
bash ${SPARK_HOME}/dev/make-distribution.sh --name ${DATE}-${REVISION}
--pip --tgz -DzincPort=${ZINC_PORT} \
-Phadoop-${HADOOP_MAJOR} -Pkubernetes -Pkinesis-asl -Phive
-Phive-thriftserver -Pscala-${SCALA_MAJOR}
### build mahout
RUN git clone https://github.com/apache/mahout.git ${MAHOUT_HOME} && \
cd ${MAHOUT_HOME} && \
sed -i '257d' ./bin/mahout && \
mvn -Dspark.version=${SPARK_MAJOR_MINOR}
-Dscala.version=${SCALA_MAJOR_MINOR} -Dscala.compat.version=${SCALA_MAJOR}
-DskipTests -Dmaven.javadoc.skip=true clean package
{code}
Please note the
*sed -i '257d' ./bin/mahout*
This is a fix for an issue in the main branch causing an error.
In addition the scopt/OptionParser is throwing now an error:
{code:bash}
bash-4.4# ./bin/mahout spark-itemsimilarity
Adding lib/ to CLASSPATH
:/opt/mahout/mahout-14.1/lib/mahout-core_2.11-14.1-SNAPSHOT.jar:/opt/mahout/mahout-14.1/lib/mahout-hdfs_2.11-14.1-SNAPSHOT.jar:/opt/mahout/mahout-14.1/lib/mahout-spark-cli-drivers_2.11-14.1-SNAPSHOT.jar:/opt/mahout/mahout-14.1/lib/mahout-spark_2.11-14.1-SNAPSHOT-dependency-reduced.jar:/opt/mahout/mahout-14.1/lib/mahout-spark_2.11-14.1-SNAPSHOT.jar:/opt/spark/spark-2.4.5/jars/*::/opt/mahout/mahout-14.1/bin/mahout-spark-class.sh
Error: A JNI error has occurred, please check your installation and try again
Exception in thread "main" java.lang.NoClassDefFoundError: scopt/OptionParser
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at
java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:468)
at java.net.URLClassLoader.access$100(URLClassLoader.java:74)
at java.net.URLClassLoader$1.run(URLClassLoader.java:369)
at java.net.URLClassLoader$1.run(URLClassLoader.java:363)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:362)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
at java.lang.Class.privateGetMethodRecursive(Class.java:3048)
at java.lang.Class.getMethod0(Class.java:3018)
at java.lang.Class.getMethod(Class.java:1784)
at
sun.launcher.LauncherHelper.validateMainClass(LauncherHelper.java:544)
at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:526)
Caused by: java.lang.ClassNotFoundException: scopt.OptionParser
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 19 more
{code}
was (Author: renedlog):
What is really interesting... why do all test run successfully (without
-DskipTests)? Looks like the tests seem different to the true environment.
Here to build with main branch:
{code:yaml}
FROM openjdk:8-alpineENV spark_uid=185
ENV SCALA_MAJOR=2.11
ENV SCALA_MAJOR_MINOR=2.11.12
ENV HADOOP_MAJOR=2.7
ENV SPARK_MAJOR_MINOR=2.4.5
ENV MAHOUT_MAJOR_MINOR=14.1
ENV MAHOUT_VERSION=mahout-${MAHOUT_MAJOR_MINOR}
ENV MAHOUT_BASE=/opt/mahout
ENV MAHOUT_HOME=${MAHOUT_BASE}/${MAHOUT_VERSION}
ENV SPARK_VERSION=spark-${SPARK_MAJOR_MINOR}
ENV SPARK_BASE=/opt/spark
ENV SPARK_HOME=${SPARK_BASE}/${SPARK_VERSION}
ENV MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
ENV
SPARK_SRC_URL="https://archive.apache.org/dist/spark/${SPARK_VERSION}/${SPARK_VERSION}.tgz"
ENV ZINC_PORT=3030
### build spark
RUN set -ex && \
apk upgrade --no-cache && \
ln -s /lib /lib64 && \
apk add --no-cache bash python py-pip tini libc6-compat linux-pam krb5
krb5-libs nss curl openssl git maven && \
pip install setuptools && \
mkdir -p ${MAHOUT_HOME} && \
mkdir -p ${SPARK_BASE} && \
curl -LfsS ${SPARK_SRC_URL} -o ${SPARK_HOME}.tgz && \
tar -xzvf ${SPARK_HOME}.tgz -C ${SPARK_BASE}/ && \
rm ${SPARK_HOME}.tgz && \
export
PATH=$PATH:$MAHOUT_HOME/bin:$MAHOUT_HOME/lib:$SPARK_HOME/bin:$JAVA_HOME/bin && \
bash ${SPARK_HOME}/dev/change-scala-version.sh ${SCALA_MAJOR} && \
bash ${SPARK_HOME}/dev/make-distribution.sh --name ${DATE}-${REVISION}
--pip --tgz -DzincPort=${ZINC_PORT} \
-Phadoop-${HADOOP_MAJOR} -Pkubernetes -Pkinesis-asl -Phive
-Phive-thriftserver -Pscala-${SCALA_MAJOR}
### build mahout
RUN git clone https://github.com/apache/mahout.git ${MAHOUT_HOME} && \
cd ${MAHOUT_HOME} && \
sed -i '257d' ./bin/mahout \
mvn -Dspark.version=${SPARK_MAJOR_MINOR}
-Dscala.version=${SCALA_MAJOR_MINOR} -Dscala.compat.version=${SCALA_MAJOR}
-DskipTests -Dmaven.javadoc.skip=true clean package
{code}
Please note the
*sed -i '257d' ./bin/mahout*
This is a fix for an issue in the main branch causing an error.
In addition the scopt/OptionParser is throwing now an error:
{code:bash}
bash-4.4# ./bin/mahout spark-itemsimilarity
Adding lib/ to CLASSPATH
:/opt/mahout/mahout-14.1/lib/mahout-core_2.11-14.1-SNAPSHOT.jar:/opt/mahout/mahout-14.1/lib/mahout-hdfs_2.11-14.1-SNAPSHOT.jar:/opt/mahout/mahout-14.1/lib/mahout-spark-cli-drivers_2.11-14.1-SNAPSHOT.jar:/opt/mahout/mahout-14.1/lib/mahout-spark_2.11-14.1-SNAPSHOT-dependency-reduced.jar:/opt/mahout/mahout-14.1/lib/mahout-spark_2.11-14.1-SNAPSHOT.jar:/opt/spark/spark-2.4.5/jars/*::/opt/mahout/mahout-14.1/bin/mahout-spark-class.sh
Error: A JNI error has occurred, please check your installation and try again
Exception in thread "main" java.lang.NoClassDefFoundError: scopt/OptionParser
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at
java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:468)
at java.net.URLClassLoader.access$100(URLClassLoader.java:74)
at java.net.URLClassLoader$1.run(URLClassLoader.java:369)
at java.net.URLClassLoader$1.run(URLClassLoader.java:363)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:362)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
at java.lang.Class.privateGetMethodRecursive(Class.java:3048)
at java.lang.Class.getMethod0(Class.java:3018)
at java.lang.Class.getMethod(Class.java:1784)
at
sun.launcher.LauncherHelper.validateMainClass(LauncherHelper.java:544)
at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:526)
Caused by: java.lang.ClassNotFoundException: scopt.OptionParser
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 19 more
{code}
> Mahout Source Broken
> --------------------
>
> Key: MAHOUT-2093
> URL: https://issues.apache.org/jira/browse/MAHOUT-2093
> Project: Mahout
> Issue Type: Bug
> Components: Algorithms, Collaborative Filtering, Documentation
> Affects Versions: 0.14.0, 0.13.2
> Reporter: Stefan Goldener
> Priority: Blocker
>
> Seems like newer versions of Mahout do have problems with spark bindings e.g.
> mahout spark-itemsimilarity or mahout spark-rowsimilarity do not work due to
> class not found exceptions.
> {code:java}
> Error: Could not find or load main class
> org.apache.mahout.drivers.RowSimilarityDriver
> {code}
> {code:java}
> Error: Could not find or load main class
> org.apache.mahout.drivers.ItemSimilarityDriver
> {code}
> whereas *mahout spark-shell* works flawlessly.
> Here is a short Dockerfile to show the issue:
> {code:yaml}
> FROM openjdk:8-alpine
> ENV spark_uid=185
> ENV SCALA_MAJOR=2.11
> ENV SCALA_MAJOR_MINOR=2.11.12
> ENV HADOOP_MAJOR=2.7
> ENV SPARK_MAJOR_MINOR=2.4.5
> ENV MAHOUT_MAJOR_MINOR=0.14.0
> ENV MAHOUT_VERSION=mahout-${MAHOUT_MAJOR_MINOR}
> ENV MAHOUT_BASE=/opt/mahout
> ENV MAHOUT_HOME=${MAHOUT_BASE}/${MAHOUT_VERSION}
> ENV SPARK_VERSION=spark-${SPARK_MAJOR_MINOR}
> ENV SPARK_BASE=/opt/spark
> ENV SPARK_HOME=${SPARK_BASE}/${SPARK_VERSION}
> ENV MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
> ENV
> SPARK_SRC_URL="https://archive.apache.org/dist/spark/${SPARK_VERSION}/${SPARK_VERSION}.tgz"
> ENV
> MAHOUT_SRC_URL="https://archive.apache.org/dist/mahout/${MAHOUT_MAJOR_MINOR}/mahout-${MAHOUT_MAJOR_MINOR}-source-release.zip"
> ENV ZINC_PORT=3030
> ### build spark
> RUN set -ex && \
> apk upgrade --no-cache && \
> ln -s /lib /lib64 && \
> apk add --no-cache bash python py-pip tini libc6-compat linux-pam krb5
> krb5-libs nss curl openssl git maven && \
> pip install setuptools && \
> mkdir -p ${MAHOUT_HOME} && \
> mkdir -p ${SPARK_BASE} && \
> curl -LfsS ${SPARK_SRC_URL} -o ${SPARK_HOME}.tgz && \
> tar -xzvf ${SPARK_HOME}.tgz -C ${SPARK_BASE}/ && \
> rm ${SPARK_HOME}.tgz && \
> export
> PATH=$PATH:$MAHOUT_HOME/bin:$MAHOUT_HOME/lib:$SPARK_HOME/bin:$JAVA_HOME/bin
> && \
> bash ${SPARK_HOME}/dev/change-scala-version.sh ${SCALA_MAJOR} && \
> bash ${SPARK_HOME}/dev/make-distribution.sh --name ${DATE}-${REVISION}
> --pip --tgz -DzincPort=${ZINC_PORT} \
> -Phadoop-${HADOOP_MAJOR} -Pkubernetes -Pkinesis-asl -Phive
> -Phive-thriftserver -Pscala-${SCALA_MAJOR}
>
> ### build mahout
> RUN curl -LfsS $MAHOUT_SRC_URL -o ${MAHOUT_BASE}.zip && \
> unzip ${MAHOUT_BASE}.zip -d ${MAHOUT_BASE} && \
> rm ${MAHOUT_BASE}.zip && \
> cd ${MAHOUT_HOME} && \
> mvn -Dspark.version=${SPARK_MAJOR_MINOR}
> -Dscala.version=${SCALA_MAJOR_MINOR} -Dscala.compat.version=${SCALA_MAJOR}
> -DskipTests -Dmaven.javadoc.skip=true clean package
> {code}
> docker build . -t mahout-test
> docker run -it mahout-test /bin/bash
--
This message was sent by Atlassian Jira
(v8.3.4#803005)