Repository: mahout Updated Branches: refs/heads/branch-0.14.0 4f18f402a -> 020a18752
MAHOUT-2032 Delete files from lib on mvn clean Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/020a1875 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/020a1875 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/020a1875 Branch: refs/heads/branch-0.14.0 Commit: 020a187523266dad14da2dc36ca87d7cc186de82 Parents: 4f18f40 Author: Trevor a.k.a @rawkintrevo <[email protected]> Authored: Mon Jun 4 11:34:19 2018 -0500 Committer: Trevor a.k.a @rawkintrevo <[email protected]> Committed: Mon Jun 4 11:34:19 2018 -0500 ---------------------------------------------------------------------- community/community-engines/flink-batch/pom.xml | 9 +- community/community-engines/h2o/pom.xml | 10 +- community/mahout-mr/pom.xml | 23 +- community/spark-cli-drivers/pom.xml | 141 ++++++++++- .../apache/mahout/drivers/MahoutDriver.scala | 44 ++++ .../mahout/drivers/MahoutOptionParser.scala | 220 ++++++++++++++++++ core/pom.xml | 39 +++- .../math/solver/EigenDecompositionTest.java | 120 ++++++++++ .../org/apache/mahout/math/solver/LSMRTest.java | 105 +++++++++ .../solver/TestConjugateGradientSolver.java | 231 +++++++++++++++++++ engine/hdfs/pom.xml | 61 +++-- engine/spark/pom.xml | 20 +- experimental/pom.xml | 1 - experimental/viennacl-omp/pom.xml | 6 +- experimental/viennacl/pom.xml | 6 +- pom.xml | 17 ++ refactor-readme.md | 18 +- 17 files changed, 1011 insertions(+), 60 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/community/community-engines/flink-batch/pom.xml ---------------------------------------------------------------------- diff --git a/community/community-engines/flink-batch/pom.xml b/community/community-engines/flink-batch/pom.xml index 3e89c4c..990fd3a 100644 --- a/community/community-engines/flink-batch/pom.xml +++ b/community/community-engines/flink-batch/pom.xml @@ -56,7 +56,7 @@ <phase>package</phase> <configuration> <tasks> - <copy file="target/mahout-flink-batch_${scala.compat.version}-${version}.jar" tofile="../mahout-flink-batch_${scala.compat.version}-${version}.jar"/> + <copy file="target/mahout-flink-batch_${scala.compat.version}-${version}.jar" tofile="../../../lib/mahout-flink-batch_${scala.compat.version}-${version}.jar"/> </tasks> </configuration> <goals> @@ -102,7 +102,6 @@ </plugin> <!--this is what scalatest recommends to do to enable scala tests --> - <!-- disable surefire --> <plugin> <groupId>org.apache.maven.plugins</groupId> @@ -141,7 +140,7 @@ <configuration> <filesets> <fileset> - <directory>../</directory> + <directory>../../../lib</directory> <includes> <include>mahout-flink*.jar</include> </includes> @@ -194,7 +193,7 @@ <dependency> <groupId>org.apache.mahout</groupId> <!--<artifactId>mahout-math-scala_${scala.compat.version}</artifactId>--> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> </dependency> @@ -226,7 +225,7 @@ <dependency> <groupId>org.apache.mahout</groupId> <!--<artifactId>mahout-math-scala_${scala.compat.version}</artifactId>--> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> <classifier>tests</classifier> <scope>test</scope> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/community/community-engines/h2o/pom.xml ---------------------------------------------------------------------- diff --git a/community/community-engines/h2o/pom.xml b/community/community-engines/h2o/pom.xml index 2a3cd05..8049f07 100644 --- a/community/community-engines/h2o/pom.xml +++ b/community/community-engines/h2o/pom.xml @@ -48,7 +48,7 @@ <groupId>org.apache.rat</groupId> <artifactId>apache-rat-plugin</artifactId> </plugin> - + <!-- copy jars to top directory, which is MAHOUT_HOME --> <plugin> <artifactId>maven-antrun-plugin</artifactId> @@ -59,7 +59,7 @@ <phase>package</phase> <configuration> <tasks> - <copy file="target/mahout-h2o_${scala.compat.version}-${version}.jar" tofile="../mahout-h2o_${scala.compat.version}-${version}.jar" /> + <copy file="target/mahout-h2o_${scala.compat.version}-${version}.jar" tofile="../../../lib/mahout-h2o_${scala.compat.version}-${version}.jar" /> </tasks> </configuration> <goals> @@ -153,7 +153,7 @@ <configuration> <filesets> <fileset> - <directory>../</directory> + <directory>../../../lib</directory> <includes> <include>mahout-h2o*.jar</include> </includes> @@ -185,7 +185,7 @@ <dependency> <groupId>org.apache.mahout</groupId> <!--<artifactId>mahout-math-scala_${scala.compat.version}</artifactId>--> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> <classifier>tests</classifier> <scope>test</scope> @@ -194,7 +194,7 @@ <dependency> <groupId>org.apache.mahout</groupId> <!--<artifactId>mahout-math</artifactId>--> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> </dependency> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/community/mahout-mr/pom.xml ---------------------------------------------------------------------- diff --git a/community/mahout-mr/pom.xml b/community/mahout-mr/pom.xml index cf03000..a25e9d5 100644 --- a/community/mahout-mr/pom.xml +++ b/community/mahout-mr/pom.xml @@ -57,7 +57,7 @@ <artifactId>apache-rat-plugin</artifactId> </plugin> - <!-- copy jars to top directory, which is MAHOUT_HOME --> + <!-- copy jars to lib/ --> <plugin> <artifactId>maven-antrun-plugin</artifactId> <version>1.4</version> @@ -67,7 +67,7 @@ <phase>package</phase> <configuration> <tasks> - <copy file="target/mahout-mr-${version}.jar" tofile="../mahout-mr-${version}.jar" /> + <copy file="target/mahout-mr-${version}.jar" tofile="../../lib/mahout-mr-${version}.jar" /> </tasks> </configuration> <goals> @@ -76,6 +76,23 @@ </execution> </executions> </plugin> + + <!-- delete files on mvn clean --> + <plugin> + <artifactId>maven-clean-plugin</artifactId> + <version>3.0.0</version> + <configuration> + <filesets> + <fileset> + <directory>../../lib/</directory> + <includes> + <include>mahout-mr_*.jar</include> + </includes> + <followSymlinks>false</followSymlinks> + </fileset> + </filesets> + </configuration> + </plugin> <!-- create test jar so other modules can reuse the core test utility classes. --> <plugin> <groupId>org.apache.maven.plugins</groupId> @@ -137,7 +154,7 @@ <configuration> <filesets> <fileset> - <directory>../</directory> + <directory>../../lib</directory> <includes> <include>mahout-mr*.jar</include> </includes> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/community/spark-cli-drivers/pom.xml ---------------------------------------------------------------------- diff --git a/community/spark-cli-drivers/pom.xml b/community/spark-cli-drivers/pom.xml index 636bc63..6e5c870 100644 --- a/community/spark-cli-drivers/pom.xml +++ b/community/spark-cli-drivers/pom.xml @@ -27,11 +27,48 @@ <relativePath>../pom.xml</relativePath> </parent> - <artifactId>spark-cli-drivers</artifactId> + <artifactId>spark-cli-drivers_${scala.compat.version}</artifactId> <name>- Mahout Spark CLI Drivers</name> <description>These drivers will allow you to compile a handy Apache Spark App which utilizes Apache Mahout. </description> + <dependencies> + + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_${scala.compat.version}</artifactId> + </dependency> + + <dependency> + <groupId>com.github.scopt</groupId> + <artifactId>scopt_${scala.compat.version}</artifactId> + <version>3.3.0</version> + </dependency> + + <!-- our stuff --> + <dependency> + <groupId>org.apache.mahout</groupId> + <artifactId>spark_${scala.compat.version}</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.mahout</groupId> + <artifactId>core_${scala.compat.version}</artifactId> + <version>${project.version}</version> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.mahout</groupId> + <artifactId>spark_2.11</artifactId> + <version>${project.version}</version> + <classifier>tests</classifier> + </dependency> + </dependencies> + + <build> <plugins> <!-- ensure licenses --> @@ -39,8 +76,110 @@ <groupId>org.apache.rat</groupId> <artifactId>apache-rat-plugin</artifactId> </plugin> + + + <!-- copy jars to lib/ --> + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.4</version> + <executions> + <execution> + <id>copy</id> + <phase>package</phase> + <configuration> + <tasks> + <copy file="target/spark-cli-drivers_${scala.compat.version}-${project.version}.jar" tofile="../../lib/spark-cli-drivers_${scala.compat.version}-${project.version}.jar" /> + </tasks> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + + <!-- delete files on mvn clean --> + <plugin> + <artifactId>maven-clean-plugin</artifactId> + <version>3.0.0</version> + <configuration> + <filesets> + <fileset> + <directory>../../lib/</directory> + <includes> + <include>spark_cli-drivers_*.jar</include> + </includes> + <followSymlinks>false</followSymlinks> + </fileset> + </filesets> + </configuration> + </plugin> + + <!-- scala stuff --> + <plugin> + <groupId>net.alchim31.maven</groupId> + <artifactId>scala-maven-plugin</artifactId> + <executions> + <execution> + <id>add-scala-sources</id> + <phase>initialize</phase> + <goals> + <goal>add-source</goal> + </goals> + </execution> + <execution> + <id>scala-compile</id> + <phase>process-resources</phase> + <goals> + <goal>compile</goal> + </goals> + </execution> + <execution> + <id>scala-test-compile</id> + <phase>process-test-resources</phase> + <goals> + <goal>testCompile</goal> + </goals> + </execution> + </executions> + </plugin> + + <!--this is what scalatest recommends to do to enable scala tests --> + <!-- disable surefire --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <systemPropertyVariables> + <mahout.home>${project.build.directory}</mahout.home> + </systemPropertyVariables> + <skipTests>true</skipTests> + </configuration> + </plugin> + + <!-- enable scalatest --> + <plugin> + <groupId>org.scalatest</groupId> + <artifactId>scalatest-maven-plugin</artifactId> + <executions> + <execution> + <id>test</id> + <goals> + <goal>test</goal> + </goals> + </execution> + </executions> + <configuration> + <systemProperties> + <mahout.home>${project.build.directory}</mahout.home> + </systemProperties> + <argLine>-Xmx4g</argLine> + </configuration> + </plugin> + </plugins> </build> + <packaging>jar</packaging> </project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala ---------------------------------------------------------------------- diff --git a/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala b/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala new file mode 100644 index 0000000..32515f1 --- /dev/null +++ b/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.drivers + +import org.apache.mahout.math.drm.DistributedContext + +/** Extended by a platform specific version of this class to create a Mahout CLI driver. */ +abstract class MahoutDriver { + + implicit protected var mc: DistributedContext = _ + implicit protected var parser: MahoutOptionParser = _ + + var _useExistingContext: Boolean = false // used in the test suite to reuse one context per suite + + /** must be overriden to setup the DistributedContext mc*/ + protected def start() : Unit + + /** Override (optionally) for special cleanup */ + protected def stop(): Unit = { + if (!_useExistingContext) mc.close + } + + /** This is where you do the work, call start first, then before exiting call stop */ + protected def process(): Unit + + /** Parse command line and call process */ + def main(args: Array[String]): Unit + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala ---------------------------------------------------------------------- diff --git a/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala b/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala new file mode 100644 index 0000000..d3723a2 --- /dev/null +++ b/community/spark-cli-drivers/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.mahout.drivers + +import scopt.OptionParser + +import scala.collection.immutable + +/** + * Defines oft-repeated options and their parsing. Provides the option groups and parsing helper methods to + * keep both standarized. + * @param programName Name displayed in help message, the name by which the driver is invoked. + * @note options are engine neutral by convention. See the engine specific extending class for + * to add Spark or other engine options. + */ +class MahoutOptionParser(programName: String) extends OptionParser[Map[String, Any]](programName: String) { + + // build options from some stardard CLI param groups + // Note: always put the driver specific options at the last so they can override any previous options! + var opts = Map.empty[String, Any] + + override def showUsageOnError = true + + def parseIOOptions(numInputs: Int = 1) = { + opts = opts ++ MahoutOptionParser.FileIOOptions + note("Input, output options") + opt[String]('i', "input") required() action { (x, options) => + options + ("input" -> x) + } text ("Input path, may be a filename, directory name, or comma delimited list of HDFS supported URIs" + + " (required)") + + if (numInputs == 2) { + opt[String]("input2") abbr ("i2") action { (x, options) => + options + ("input2" -> x) + } text ("Secondary input path for cross-similarity calculation, same restrictions as \"--input\" " + + "(optional). Default: empty.") + } + + opt[String]('o', "output") required() action { (x, options) => + if (x.endsWith("/")) { + options + ("output" -> x) + } else { + options + ("output" -> (x + "/")) + } + } text ("Path for output directory, any HDFS supported URI (required)") + + } + + def parseGenericOptions() = { + opts = opts ++ MahoutOptionParser.GenericOptions + opt[Int]("randomSeed") abbr ("rs") action { (x, options) => + options + ("randomSeed" -> x) + } validate { x => + if (x > 0) success else failure("Option --randomSeed must be > 0") + } + + //output both input IndexedDatasets + opt[Unit]("writeAllDatasets") hidden() action { (_, options) => + options + ("writeAllDatasets" -> true) + }//Hidden option, though a user might want this. + } + + def parseElementInputSchemaOptions() = { + //Input text file schema--not driver specific but input data specific, elements input, + // not rows of IndexedDatasets + opts = opts ++ MahoutOptionParser.TextDelimitedElementsOptions + note("\nInput text file schema options:") + opt[String]("inDelim") abbr ("id") text ("Input delimiter character (optional). Default: \"[ ,\\t]\"") action { + (x, options) => + options + ("inDelim" -> x) + } + + opt[String]("filter1") abbr ("f1") action { (x, options) => + options + ("filter1" -> x) + } text ("String (or regex) whose presence indicates a datum for the primary item set (optional). " + + "Default: no filter, all data is used") + + opt[String]("filter2") abbr ("f2") action { (x, options) => + options + ("filter2" -> x) + } text ("String (or regex) whose presence indicates a datum for the secondary item set (optional). " + + "If not present no secondary dataset is collected") + + opt[Int]("rowIDColumn") abbr ("rc") action { (x, options) => + options + ("rowIDColumn" -> x) + } text ("Column number (0 based Int) containing the row ID string (optional). Default: 0") validate { + x => + if (x >= 0) success else failure("Option --rowIDColNum must be >= 0") + } + + opt[Int]("itemIDColumn") abbr ("ic") action { (x, options) => + options + ("itemIDColumn" -> x) + } text ("Column number (0 based Int) containing the item ID string (optional). Default: 1") validate { + x => + if (x >= 0) success else failure("Option --itemIDColNum must be >= 0") + } + + opt[Int]("filterColumn") abbr ("fc") action { (x, options) => + options + ("filterColumn" -> x) + } text ("Column number (0 based Int) containing the filter string (optional). Default: -1 for no " + + "filter") validate { x => + if (x >= -1) success else failure("Option --filterColNum must be >= -1") + } + + note("\nUsing all defaults the input is expected of the form: \"userID<tab>itemId\" or" + + " \"userID<tab>itemID<tab>any-text...\" and all rows will be used") + + //check for column consistency + checkConfig { options: Map[String, Any] => + if (options("filterColumn").asInstanceOf[Int] == options("itemIDColumn").asInstanceOf[Int] + || options("filterColumn").asInstanceOf[Int] == options("rowIDColumn").asInstanceOf[Int] + || options("rowIDColumn").asInstanceOf[Int] == options("itemIDColumn").asInstanceOf[Int]) + failure("The row, item, and filter positions must be unique.") else success + } + + //check for filter consistency + checkConfig { options: Map[String, Any] => + if (options("filter1").asInstanceOf[String] != null.asInstanceOf[String] + && options("filter2").asInstanceOf[String] != null.asInstanceOf[String] + && options("filter1").asInstanceOf[String] == options("filter2").asInstanceOf[String]) + failure ("If using filters they must be unique.") else success + } + + } + + def parseFileDiscoveryOptions() = { + //File finding strategy--not driver specific + opts = opts ++ MahoutOptionParser.FileDiscoveryOptions + note("\nFile discovery options:") + opt[Unit]('r', "recursive") action { (_, options) => + options + ("recursive" -> true) + } text ("Searched the -i path recursively for files that match --filenamePattern (optional), Default: false") + + opt[String]("filenamePattern") abbr ("fp") action { (x, options) => + options + ("filenamePattern" -> x) + } text ("Regex to match in determining input files (optional). Default: filename in the --input option " + + "or \"^part-.*\" if --input is a directory") + + } + + def parseIndexedDatasetFormatOptions(notice: String = "\nOutput text file schema options:") = { + opts = opts ++ MahoutOptionParser.TextDelimitedIndexedDatasetOptions + note(notice) + opt[String]("rowKeyDelim") abbr ("rd") action { (x, options) => + options + ("rowKeyDelim" -> x) + } text ("Separates the rowID key from the vector values list (optional). Default: \"\\t\"") + + opt[String]("columnIdStrengthDelim") abbr ("cd") action { (x, options) => + options + ("columnIdStrengthDelim" -> x) + } text ("Separates column IDs from their values in the vector values list (optional). Default: \":\"") + + opt[String]("elementDelim") abbr ("td") action { (x, options) => + options + ("elementDelim" -> x) + } text ("Separates vector element values in the values list (optional). Default: \" \"") + + opt[Unit]("omitStrength") abbr ("os") action { (_, options) => + options + ("omitStrength" -> true) + } text ("Do not write the strength to the output files (optional), Default: false.") + note("This option is used to output indexable data for creating a search engine recommender.") + + note("\nDefault delimiters will produce output of the form: " + + "\"itemID1<tab>itemID2:value2<space>itemID10:value10...\"") + } + +} + +/** + * Companion object defines default option groups for reference in any driver that needs them. + * @note not all options are platform neutral so other platforms can add default options here if desired + */ +object MahoutOptionParser { + + // set up the various default option groups + final val GenericOptions = immutable.HashMap[String, Any]( + "randomSeed" -> System.currentTimeMillis().toInt, + "writeAllDatasets" -> false) + + final val SparkOptions = immutable.HashMap[String, Any]( + "master" -> "local", + "sparkExecutorMem" -> "", + "appName" -> "Generic Spark App, Change this.") + + final val FileIOOptions = immutable.HashMap[String, Any]( + "input" -> null.asInstanceOf[String], + "input2" -> null.asInstanceOf[String], + "output" -> null.asInstanceOf[String]) + + final val FileDiscoveryOptions = immutable.HashMap[String, Any]( + "recursive" -> false, + "filenamePattern" -> "^part-.*") + + final val TextDelimitedElementsOptions = immutable.HashMap[String, Any]( + "rowIDColumn" -> 0, + "itemIDColumn" -> 1, + "filterColumn" -> -1, + "filter1" -> null.asInstanceOf[String], + "filter2" -> null.asInstanceOf[String], + "inDelim" -> "[,\t ]") + + final val TextDelimitedIndexedDatasetOptions = immutable.HashMap[String, Any]( + "rowKeyDelim" -> "\t", + "columnIdStrengthDelim" -> ":", + "elementDelim" -> " ", + "omitStrength" -> false) +} + + http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/core/pom.xml ---------------------------------------------------------------------- diff --git a/core/pom.xml b/core/pom.xml index 6c9dd43..35862f2 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -27,7 +27,7 @@ <relativePath>../pom.xml</relativePath> </parent> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <name>Mahout Core</name> <description>High performance scientific and technical computing data structures and methods, mostly based on CERN's Colt Java API @@ -193,6 +193,43 @@ <groupId>org.apache.rat</groupId> <artifactId>apache-rat-plugin</artifactId> </plugin> + + <!-- copy jars to lib/ --> + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.4</version> + <executions> + <execution> + <id>copy</id> + <phase>package</phase> + <configuration> + <tasks> + <copy file="target/core_${scala.compat.version}-${project.version}.jar" tofile="../lib/core_${scala.compat.version}-${project.version}.jar" /> + </tasks> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + + <!-- delete files on mvn clean --> + <plugin> + <artifactId>maven-clean-plugin</artifactId> + <version>3.0.0</version> + <configuration> + <filesets> + <fileset> + <directory>../lib/</directory> + <includes> + <include>core_*.jar</include> + </includes> + <followSymlinks>false</followSymlinks> + </fileset> + </filesets> + </configuration> + </plugin> </plugins> </build> </project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/core/src/test/java/org/apache/mahout/math/solver/EigenDecompositionTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/mahout/math/solver/EigenDecompositionTest.java b/core/src/test/java/org/apache/mahout/math/solver/EigenDecompositionTest.java new file mode 100644 index 0000000..690b633 --- /dev/null +++ b/core/src/test/java/org/apache/mahout/math/solver/EigenDecompositionTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.solver; + +import org.apache.mahout.common.RandomUtils; +import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.MatrixSlice; +import org.apache.mahout.math.Vector; +import org.apache.mahout.math.function.DoubleFunction; +import org.apache.mahout.math.function.Functions; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Random; + +public class EigenDecompositionTest { + @Test + public void testDegenerateMatrix() { + double[][] m = { + new double[]{0.641284, 0.767303, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}, + new double[]{0.767303, 3.050159, 2.561342, 0.000000, 0.000000, 0.000000, 0.000000}, + new double[]{0.000000, 2.561342, 5.000609, 0.810507, 0.000000, 0.000000, 0.000000}, + new double[]{0.000000, 0.000000, 0.810507, 0.550477, 0.142853, 0.000000, 0.000000}, + new double[]{0.000000, 0.000000, 0.000000, 0.142853, 0.254566, 0.000000, 0.000000}, + new double[]{0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.256073, 0.000000}, + new double[]{0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}}; + Matrix x = new DenseMatrix(m); + EigenDecomposition eig = new EigenDecomposition(x, true); + Matrix d = eig.getD(); + Matrix v = eig.getV(); + check("EigenvalueDecomposition (evil)...", x.times(v), v.times(d)); + } + + @Test + public void testDeficientRank() { + Matrix a = new DenseMatrix(10, 3).assign(new DoubleFunction() { + private final Random gen = RandomUtils.getRandom(); + @Override + public double apply(double arg1) { + return gen.nextGaussian(); + } + }); + + a = a.transpose().times(a); + + EigenDecomposition eig = new EigenDecomposition(a); + Matrix d = eig.getD(); + Matrix v = eig.getV(); + check("EigenvalueDecomposition (rank deficient)...", a.times(v), v.times(d)); + + Assert.assertEquals(0, eig.getImagEigenvalues().norm(1), 1.0e-10); + Assert.assertEquals(3, eig.getRealEigenvalues().norm(0), 1.0e-10); + } + + @Test + public void testEigen() { + double[] evals = + {0.0, 1.0, 0.0, 0.0, + 1.0, 0.0, 2.0e-7, 0.0, + 0.0, -2.0e-7, 0.0, 1.0, + 0.0, 0.0, 1.0, 0.0}; + int i = 0; + Matrix a = new DenseMatrix(4, 4); + for (MatrixSlice row : a) { + for (Vector.Element element : row.vector().all()) { + element.set(evals[i++]); + } + } + EigenDecomposition eig = new EigenDecomposition(a); + Matrix d = eig.getD(); + Matrix v = eig.getV(); + check("EigenvalueDecomposition (nonsymmetric)...", a.times(v), v.times(d)); + } + + @Test + public void testSequential() { + int validld = 3; + Matrix A = new DenseMatrix(validld, validld); + double[] columnwise = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + int i = 0; + for (MatrixSlice row : A) { + for (Vector.Element element : row.vector().all()) { + element.set(columnwise[i++]); + } + } + + EigenDecomposition Eig = new EigenDecomposition(A); + Matrix D = Eig.getD(); + Matrix V = Eig.getV(); + check("EigenvalueDecomposition (nonsymmetric)...", A.times(V), V.times(D)); + + A = A.transpose().times(A); + Eig = new EigenDecomposition(A); + D = Eig.getD(); + V = Eig.getV(); + check("EigenvalueDecomposition (symmetric)...", A.times(V), V.times(D)); + + } + + private static void check(String msg, Matrix a, Matrix b) { + Assert.assertEquals(msg, 0, a.minus(b).aggregate(Functions.PLUS, Functions.ABS), 1.0e-10); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/core/src/test/java/org/apache/mahout/math/solver/LSMRTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/mahout/math/solver/LSMRTest.java b/core/src/test/java/org/apache/mahout/math/solver/LSMRTest.java new file mode 100644 index 0000000..82b309f --- /dev/null +++ b/core/src/test/java/org/apache/mahout/math/solver/LSMRTest.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.solver; + +import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.DenseVector; +import org.apache.mahout.math.MahoutTestCase; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.SingularValueDecomposition; +import org.apache.mahout.math.Vector; +import org.apache.mahout.math.function.Functions; +import org.junit.Test; + +public final class LSMRTest extends MahoutTestCase { + @Test + public void basics() { + Matrix m = hilbert(5); + + // make sure it is the hilbert matrix we know and love + assertEquals(1, m.get(0, 0), 0); + assertEquals(0.5, m.get(0, 1), 0); + assertEquals(1 / 6.0, m.get(2, 3), 1.0e-9); + + Vector x = new DenseVector(new double[]{5, -120, 630, -1120, 630}); + + Vector b = new DenseVector(5); + b.assign(1); + + assertEquals(0, m.times(x).minus(b).norm(2), 1.0e-9); + + LSMR r = new LSMR(); + Vector x1 = r.solve(m, b); + + // the ideal solution is [5 -120 630 -1120 630] but the 5x5 hilbert matrix + // has a condition number of almost 500,000 and the normal equation condition + // number is that squared. This means that we don't get the exact answer with + // a fast iterative solution. + // Thus, we have to check the residuals rather than testing that the answer matched + // the ideal. + assertEquals(0, m.times(x1).minus(b).norm(2), 1.0e-2); + assertEquals(0, m.transpose().times(m).times(x1).minus(m.transpose().times(b)).norm(2), 1.0e-7); + + // and we need to check that the error estimates are pretty good. + assertEquals(m.times(x1).minus(b).norm(2), r.getResidualNorm(), 1.0e-5); + assertEquals(m.transpose().times(m).times(x1).minus(m.transpose().times(b)).norm(2), r.getNormalEquationResidual(), 1.0e-9); + } + + @Test + public void random() { + Matrix m = new DenseMatrix(200, 30).assign(Functions.random()); + + Vector b = new DenseVector(200).assign(1); + + LSMR r = new LSMR(); + Vector x1 = r.solve(m, b); + +// assertEquals(0, m.times(x1).minus(b).norm(2), 1.0e-2); + double norm = new SingularValueDecomposition(m).getS().viewDiagonal().norm(2); + double actual = m.transpose().times(m).times(x1).minus(m.transpose().times(b)).norm(2); + System.out.printf("%.4f\n", actual / norm * 1.0e6); + assertEquals(0, actual, norm * 1.0e-5); + + // and we need to check that the error estimates are pretty good. + assertEquals(m.times(x1).minus(b).norm(2), r.getResidualNorm(), 1.0e-5); + assertEquals(actual, r.getNormalEquationResidual(), 1.0e-9); + } + + private static Matrix hilbert(int n) { + Matrix r = new DenseMatrix(n, n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + r.set(i, j, 1.0 / (i + j + 1)); + } + } + return r; + } + + /* + private Matrix overDetermined(int n) { + Random rand = RandomUtils.getRandom(); + Matrix r = new DenseMatrix(2 * n, n); + for (int i = 0; i < 2 * n; i++) { + for (int j = 0; j < n; j++) { + r.set(i, j, rand.nextGaussian()); + } + } + return r; + } + */ +} http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/core/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java b/core/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java new file mode 100644 index 0000000..536878a --- /dev/null +++ b/core/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.solver; + +import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.DenseVector; +import org.apache.mahout.math.MahoutTestCase; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.Vector; +import org.junit.Test; + +public class TestConjugateGradientSolver extends MahoutTestCase { + + @Test + public void testConjugateGradientSolver() { + Matrix a = getA(); + Vector b = getB(); + + ConjugateGradientSolver solver = new ConjugateGradientSolver(); + Vector x = solver.solve(a, b); + + assertEquals(0.0, Math.sqrt(a.times(x).getDistanceSquared(b)), EPSILON); + assertEquals(0.0, solver.getResidualNorm(), ConjugateGradientSolver.DEFAULT_MAX_ERROR); + assertEquals(10, solver.getIterations()); + } + + @Test + public void testConditionedConjugateGradientSolver() { + Matrix a = getIllConditionedMatrix(); + Vector b = getB(); + Preconditioner conditioner = new JacobiConditioner(a); + ConjugateGradientSolver solver = new ConjugateGradientSolver(); + + Vector x = solver.solve(a, b, null, 100, ConjugateGradientSolver.DEFAULT_MAX_ERROR); + + double distance = Math.sqrt(a.times(x).getDistanceSquared(b)); + assertEquals(0.0, distance, EPSILON); + assertEquals(0.0, solver.getResidualNorm(), ConjugateGradientSolver.DEFAULT_MAX_ERROR); + assertEquals(16, solver.getIterations()); + + Vector x2 = solver.solve(a, b, conditioner, 100, ConjugateGradientSolver.DEFAULT_MAX_ERROR); + + // the Jacobi preconditioner isn't very good, but it does result in one less iteration to converge + distance = Math.sqrt(a.times(x2).getDistanceSquared(b)); + assertEquals(0.0, distance, EPSILON); + assertEquals(0.0, solver.getResidualNorm(), ConjugateGradientSolver.DEFAULT_MAX_ERROR); + assertEquals(15, solver.getIterations()); + } + + @Test + public void testEarlyStop() { + Matrix a = getA(); + Vector b = getB(); + ConjugateGradientSolver solver = new ConjugateGradientSolver(); + + // specifying a looser max error will result in few iterations but less accurate results + Vector x = solver.solve(a, b, null, 10, 0.1); + double distance = Math.sqrt(a.times(x).getDistanceSquared(b)); + assertTrue(distance > EPSILON); + assertEquals(0.0, distance, 0.1); // should be equal to within the error specified + assertEquals(7, solver.getIterations()); // should have taken fewer iterations + + // can get a similar effect by bounding the number of iterations + x = solver.solve(a, b, null, 7, ConjugateGradientSolver.DEFAULT_MAX_ERROR); + distance = Math.sqrt(a.times(x).getDistanceSquared(b)); + assertTrue(distance > EPSILON); + assertEquals(0.0, distance, 0.1); + assertEquals(7, solver.getIterations()); + } + + private static Matrix getA() { + return reshape(new double[] { + 11.7155649822793997, -0.7125253363083646, 4.6473613961860183, 1.6020939468348456, -4.6789817799137134, + -0.8140416763434970, -4.5995617505618345, -1.1749070042775340, -1.6747995811678336, 3.1922255171058342, + -0.7125253363083646, 12.3400579683994867, -2.6498099427000645, 0.5264507222630669, 0.3783428369189767, + -2.1170186159188811, 2.3695134252190528, 3.8182131490333013, 6.5285942298270347, 2.8564814419366353, + 4.6473613961860183, -2.6498099427000645, 16.1317933921668484, -0.0409475448061225, 1.4805687075608227, + -2.9958076484628950, -2.5288893025027264, -0.9614557539842487, -2.2974738351519077, -1.5516184284572598, + 1.6020939468348456, 0.5264507222630669, -0.0409475448061225, 4.1946802122694482, -2.5210038046912198, + 0.6634899962909317, 0.4036187419205338, -0.2829211393003727, -0.2283091172980954, 1.1253516563552464, + -4.6789817799137134, 0.3783428369189767, 1.4805687075608227, -2.5210038046912198, 19.4307361862733430, + -2.5200132222091787, 2.3748511971444510, 11.6426598443305522, -0.1508136510863874, 4.3471343888063512, + -0.8140416763434970, -2.1170186159188811, -2.9958076484628950, 0.6634899962909317, -2.5200132222091787, + 7.6712334419700747, -3.8687773629502851, -3.0453418711591529, -0.1155580876143619, -2.4025459467422121, + -4.5995617505618345, 2.3695134252190528, -2.5288893025027264, 0.4036187419205338, 2.3748511971444510, + -3.8687773629502851, 10.4681666057470082, 1.6527180866171229, 2.9341795819365384, -2.1708176372763099, + -1.1749070042775340, 3.8182131490333013, -0.9614557539842487, -0.2829211393003727, 11.6426598443305522, + -3.0453418711591529, 1.6527180866171229, 16.0050616934176233, 1.1689747208793086, 1.6665090945954870, + -1.6747995811678336, 6.5285942298270347, -2.2974738351519077, -0.2283091172980954, -0.1508136510863874, + -0.1155580876143619, 2.9341795819365384, 1.1689747208793086, 6.4794329751637481, -1.9197339981871877, + 3.1922255171058342, 2.8564814419366353, -1.5516184284572598, 1.1253516563552464, 4.3471343888063512, + -2.4025459467422121, -2.1708176372763099, 1.6665090945954870, -1.9197339981871877, 18.9149021356344598 + }, 10, 10); + } + + private static Vector getB() { + return new DenseVector(new double[] { + -0.552252, 0.038430, 0.058392, -1.234496, 1.240369, 0.373649, 0.505113, 0.503723, 1.215340, -0.391908 + }); + } + + private static Matrix getIllConditionedMatrix() { + return reshape(new double[] { + 0.00695278043678842, 0.09911830022078683, 0.01309584636255063, 0.00652917453032394, 0.04337631487735064, + 0.14232165273321387, 0.05808722912361313, -0.06591965049732287, 0.06055771542862332, 0.00577423310349649, + 0.09911830022078683, 1.50071402418061428, 0.14988743575884242, 0.07195514527480981, 0.63747362341752722, + 1.30711819020414688, 0.82151609385115953, -0.72616125524587938, 1.03490136002022948, 0.12800239664439328, + 0.01309584636255063, 0.14988743575884242, 0.04068462583124965, 0.02147022047006482, 0.07388113580146650, + 0.58070223915076002, 0.11280336266257514, -0.21690068430020618, 0.04065087561300068, -0.00876895259593769, + 0.00652917453032394, 0.07195514527480981, 0.02147022047006482, 0.01140105250542524, 0.03624164348693958, + 0.31291554581393255, 0.05648457235205666, -0.11507583016077780, 0.01475756130709823, -0.00584453679519805, + 0.04337631487735064, 0.63747362341752722, 0.07388113580146649, 0.03624164348693959, 0.27491543200760571, + 0.73410543168748121, 0.36120630002843257, -0.36583546331208316, 0.41472509341940017, 0.04581458758255480, + 0.14232165273321387, 1.30711819020414666, 0.58070223915076002, 0.31291554581393255, 0.73410543168748121, + 9.02536073121807014, 1.25426385582883104, -3.16186335125594642, -0.19740140818905436, -0.26613760880058035, + 0.05808722912361314, 0.82151609385115953, 0.11280336266257514, 0.05648457235205667, 0.36120630002843257, + 1.25426385582883126, 0.48661058451606820, -0.57030511336562195, 0.49151280464818098, 0.04428280690189127, + -0.06591965049732286, -0.72616125524587938, -0.21690068430020618, -0.11507583016077781, -0.36583546331208316, + -3.16186335125594642, -0.57030511336562195, 1.16270815038078945, -0.14837898963724327, 0.05917203395002889, + 0.06055771542862331, 1.03490136002022926, 0.04065087561300068, 0.01475756130709823, 0.41472509341940023, + -0.19740140818905436, 0.49151280464818103, -0.14837898963724327, 0.86693820682049716, 0.14089688752570340, + 0.00577423310349649, 0.12800239664439328, -0.00876895259593769, -0.00584453679519805, 0.04581458758255480, + -0.26613760880058035, 0.04428280690189126, 0.05917203395002889, 0.14089688752570340, 0.02901858439788401 + }, 10, 10); + } + + /* + private static Matrix getAsymmetricMatrix() { + return reshape(new double[] { + 0.1586493402398226, -0.8668244036239467, 0.4335233711065471, -1.1025223577469705, 1.1344100191664601, + -0.1399944083742454, 0.8879750333144295, -1.2139664527957903, 0.7154591081557057, -0.6320890356949669, + -2.4546945723009581, 0.6354748667295935, -0.1931993736354496, -0.1210449542073575, -1.0668745874463414, + 0.6539061600017384, 2.4045520271091063,-0.3387572116155693, 0.1575188740437142, 1.1791073500243496, + -0.6418745429181755, 0.6836410530720005, -1.2447493564334062, -1.8840081252627843, 0.5663864914859502, + 0.0819203791124956, 0.2004407540793239, 0.7350145066687849, 1.6525377683305262, -0.3156915229969668, + -0.1866701463141060, -0.3929673444397022, -0.4440946700501859, 0.1366803303987421, -0.2138101381625466, + 0.5399874351478779, -1.0088091882703056, 0.0978023083150833, 1.8795777615527958, 0.3782417618354363, + -0.4564752186043173, 0.4014814252832269, 1.9691150950571501, 0.2424686682362568, 1.0965758964799504, + 0.2751725463132324, -0.6652756564294597, -0.6256564536463288, 1.0332457212107204, -0.0330851504958215, + -1.0402096493279287, -0.6850389655533707, -1.8896839974451625, 1.1533231017445102, -0.5387306882127710, + 0.0181850207098213, -0.2416652193929706, -0.9868171673047287, -1.5872573189377035, -0.8492253650362955, + 1.1949977792951225, 0.7901168665120927, 0.9832676055718492, -0.0752834029327588, 1.0555006468941126, + 0.6842531633106009, 0.2589700378872499, 0.3565253337268334, 0.1869608474650344, -0.1696524825242293, + 0.6919898638809949, -1.4937187919435133, 1.0039151841775080, -0.2580993333173019, 0.1243386429912411, + 1.3945380460721688, 0.3078165489952902, 1.1248734111054359, 0.5613308856003306, -0.9013329415656699, + -0.9197179846787753, 0.1167372728291174, -0.7807620712716467, 0.2210918047063067, -0.4813869727362010, + 0.3870067788770671, 1.1974416632199159, 2.4676804711420330, 1.8492990765211168, -1.3089887830472471, + -0.7587845769668021, -1.0354138253278353, -0.3907902473275445, -2.1292895670916168, -0.7544686049709807, + -0.3431317172534703, 1.4959721683724390, 0.6004852467523584, 1.2140230344223786, 0.1279148299232956 + }, 20, 5); + } + + private static Vector getSmallB() { + return new DenseVector(new double[] { + 0.114065955249272, + 0.953981568944476, + -2.611106316607759, + 0.652190962446307, + 1.298055218126384, + }); + } + + private static Matrix getLowrankSymmetricMatrix() { + Matrix m = new DenseMatrix(5,5); + Vector u = new DenseVector(new double[] { + -0.0364638798936962, + 1.0219291133418171, + -0.5649933120375343, + -1.0050553315595800, + -0.5264178580727512 + }); + Vector v = new DenseVector(new double[] { + -1.345847117891187, + 0.553386426498032, + 1.912020072696648, + -0.820959934779948, + 1.223358044171859 + }); + + return m.plus(u.cross(u)).plus(v.cross(v)); + } + + private static Matrix getLowrankAsymmetricMatrix() { + Matrix m = new DenseMatrix(20,5); + Vector u = new DenseVector(new double[] { + -0.0364638798936962, + 1.0219291133418171, + -0.5649933120375343, + -1.0050553315595800, + -0.5264178580727512 + }); + Vector v = new DenseVector(new double[] { + -1.345847117891187, + 0.553386426498032, + 1.912020072696648, + -0.820959934779948, + 1.223358044171859 + }); + + m.assignRow(0, u); + m.assignRow(0, v); + + return m; + } + */ + + private static Matrix reshape(double[] values, int rows, int columns) { + Matrix m = new DenseMatrix(rows, columns); + int i = 0; + for (double v : values) { + m.set(i % rows, i / rows, v); + i++; + } + return m; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/engine/hdfs/pom.xml ---------------------------------------------------------------------- diff --git a/engine/hdfs/pom.xml b/engine/hdfs/pom.xml index b09de9b..8660856 100644 --- a/engine/hdfs/pom.xml +++ b/engine/hdfs/pom.xml @@ -42,6 +42,42 @@ <artifactId>apache-rat-plugin</artifactId> </plugin> + <!-- copy jars to lib/ --> + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.4</version> + <executions> + <execution> + <id>copy</id> + <phase>package</phase> + <configuration> + <tasks> + <copy file="target/mahout-hdfs-${project.version}.jar" tofile="../../lib/mahout-hdfs-${project.version}.jar" /> + </tasks> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + + <!-- delete files on mvn clean --> + <plugin> + <artifactId>maven-clean-plugin</artifactId> + <version>3.0.0</version> + <configuration> + <filesets> + <fileset> + <directory>../../lib/</directory> + <includes> + <include>mahout-hdfs_*.jar</include> + </includes> + <followSymlinks>false</followSymlinks> + </fileset> + </filesets> + </configuration> + </plugin> </plugins> </build> <!--<build>--> @@ -56,26 +92,7 @@ <!--</includes>--> <!--</resource>--> <!--</resources>--> - <!--<plugins>--> - <!--<!– copy jars to top directory, which is MAHOUT_HOME –>--> - <!--<plugin>--> - <!--<artifactId>maven-antrun-plugin</artifactId>--> - <!--<version>1.4</version>--> - <!--<executions>--> - <!--<execution>--> - <!--<id>copy</id>--> - <!--<phase>package</phase>--> - <!--<configuration>--> - <!--<tasks>--> - <!--<copy file="target/mahout-hdfs-${version}.jar" tofile="../mahout-hdfs-${version}.jar" />--> - <!--</tasks>--> - <!--</configuration>--> - <!--<goals>--> - <!--<goal>run</goal>--> - <!--</goals>--> - <!--</execution>--> - <!--</executions>--> - <!--</plugin>--> + <!--<!– create test jar so other modules can reuse the core test utility classes. –>--> <!--<plugin>--> <!--<groupId>org.apache.maven.plugins</groupId>--> @@ -134,13 +151,13 @@ <!-- our modules --> <dependency> <groupId>${project.groupId}</groupId> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> </dependency> <dependency> <groupId>${project.groupId}</groupId> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/engine/spark/pom.xml ---------------------------------------------------------------------- diff --git a/engine/spark/pom.xml b/engine/spark/pom.xml index 36d02f2..9aeef7e 100644 --- a/engine/spark/pom.xml +++ b/engine/spark/pom.xml @@ -141,8 +141,8 @@ <phase>package</phase> <configuration> <tasks> - <copy file="target/spark_${scala.compat.version}-${version}-spark_${spark.compat.version}.jar" tofile="../spark_${scala.compat.version}-${version}-spark_${spark.compat.version}.jar" /> - <copy file="target/spark_${scala.compat.version}-${version}-dependency-reduced.jar" tofile="../spark_${scala.compat.version}-${version}-dependency-reduced.jar" /> + <copy file="target/spark_${scala.compat.version}-${version}-spark_${spark.compat.version}.jar" tofile="../../lib/spark_${scala.compat.version}-${version}-spark_${spark.compat.version}.jar" /> + <copy file="target/spark_${scala.compat.version}-${version}-dependency-reduced.jar" tofile="../../lib/spark_${scala.compat.version}-${version}-dependency-reduced.jar" /> </tasks> </configuration> <goals> @@ -177,16 +177,16 @@ </execution> </executions> </plugin> - <!-- remove jars from top directory on clean --> + <!-- delete files on mvn clean --> <plugin> <artifactId>maven-clean-plugin</artifactId> <version>3.0.0</version> <configuration> <filesets> <fileset> - <directory>../</directory> + <directory>../../lib/</directory> <includes> - <include>spark*.jar</include> + <include>spark_*.jar</include> </includes> <followSymlinks>false</followSymlinks> </fileset> @@ -214,14 +214,14 @@ <!-- mahout stuff --> <dependency> <groupId>org.apache.mahout</groupId> - <artifactId>core</artifactId> - <version>0.13.1-SNAPSHOT</version> + <artifactId>core_${scala.compat.version}</artifactId> + <version>${project.version}</version> </dependency> <dependency> <groupId>org.apache.mahout</groupId> <artifactId>mahout-hdfs</artifactId> - <version>0.13.1-SNAPSHOT</version> + <version>${project.version}</version> </dependency> @@ -233,8 +233,8 @@ <dependency> <groupId>org.apache.mahout</groupId> - <artifactId>core</artifactId> - <version>0.13.1-SNAPSHOT</version> + <artifactId>core_${scala.compat.version}</artifactId> + <version>${project.version}</version> <classifier>tests</classifier> <scope>test</scope> </dependency> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/experimental/pom.xml ---------------------------------------------------------------------- diff --git a/experimental/pom.xml b/experimental/pom.xml index fbe26f2..590a27f 100644 --- a/experimental/pom.xml +++ b/experimental/pom.xml @@ -35,7 +35,6 @@ <packaging>pom</packaging> <properties> - <scala.compat.version>2.10</scala.compat.version> </properties> <modules> <module>viennacl</module> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/experimental/viennacl-omp/pom.xml ---------------------------------------------------------------------- diff --git a/experimental/viennacl-omp/pom.xml b/experimental/viennacl-omp/pom.xml index ba5b46a..24d7b3f 100644 --- a/experimental/viennacl-omp/pom.xml +++ b/experimental/viennacl-omp/pom.xml @@ -198,7 +198,7 @@ <phase>package</phase> <configuration> <tasks> - <copy file="target/mahout-native-viennacl-omp_${scala.compat.version}-${version}.jar" tofile="../mahout-native-viennacl-omp_${scala.compat.version}-${version}.jar" /> + <copy file="target/mahout-native-viennacl-omp_${scala.compat.version}-${version}.jar" tofile="../../lib/mahout-native-viennacl-omp_${scala.compat.version}-${version}.jar" /> </tasks> </configuration> <goals> @@ -214,7 +214,7 @@ <configuration> <filesets> <fileset> - <directory>../</directory> + <directory>../../lib</directory> <includes> <include>mahout-native-viennacl-omp*.jar</include> </includes> @@ -233,7 +233,7 @@ <dependency> <groupId>${project.groupId}</groupId> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> </dependency> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/experimental/viennacl/pom.xml ---------------------------------------------------------------------- diff --git a/experimental/viennacl/pom.xml b/experimental/viennacl/pom.xml index 007ccb3..91e4d80 100644 --- a/experimental/viennacl/pom.xml +++ b/experimental/viennacl/pom.xml @@ -199,7 +199,7 @@ <phase>package</phase> <configuration> <tasks> - <copy file="target/mahout-native-viennacl_${scala.compat.version}-${version}.jar" tofile="../mahout-native-viennacl_${scala.compat.version}-${version}.jar" /> + <copy file="target/mahout-native-viennacl_${scala.compat.version}-${version}.jar" tofile="../../lib/mahout-native-viennacl_${scala.compat.version}-${version}.jar" /> </tasks> </configuration> <goals> @@ -215,7 +215,7 @@ <configuration> <filesets> <fileset> - <directory>../</directory> + <directory>../../lib</directory> <includes> <include>mahout-native-viennacl_*.jar</include> </includes> @@ -232,7 +232,7 @@ <dependency> <groupId>${project.groupId}</groupId> - <artifactId>core</artifactId> + <artifactId>core_${scala.compat.version}</artifactId> <version>${project.version}</version> </dependency> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index e2d7a33..e9ab797 100644 --- a/pom.xml +++ b/pom.xml @@ -651,6 +651,23 @@ <excludeSubProjects>false</excludeSubProjects> </configuration> </plugin> + + <!-- delete jars on claen in top directory, which is lib/ --> + <plugin> + <artifactId>maven-clean-plugin</artifactId> + <version>3.0.0</version> + <configuration> + <filesets> + <fileset> + <directory>lib/</directory> + <includes> + <include>*.jar</include> + </includes> + <followSymlinks>false</followSymlinks> + </fileset> + </filesets> + </configuration> + </plugin> </plugins> </pluginManagement> http://git-wip-us.apache.org/repos/asf/mahout/blob/020a1875/refactor-readme.md ---------------------------------------------------------------------- diff --git a/refactor-readme.md b/refactor-readme.md index 251416b..857a65a 100644 --- a/refactor-readme.md +++ b/refactor-readme.md @@ -29,12 +29,13 @@ mahout -[x] get `org.apache.mahout.math.drm` squared away -[ ] see `core/pom.xml` --[ ] move tests for `org.apache.mahout.math.solver`s +-[x] move tests for `org.apache.mahout.math.solver`s -[x] move `org.apache.mahout.math.stats` esp OnlineSolver (but prob all of them) -[x] move above's tests over. -[x] IO Tests move over -[x] Add drivers to Community (to avoid future `scopt` hold ups) --[ ] update all poms to dump files in `lib/` +-[x] update all poms to dump files in `lib/` +-[ ] add pludin to delete everything from `lib/` on clean. -[ ] Move MR to community engines. (Failling on OpenIntHash, etc.) -[ ] Figure out where mising classes are ^^ OpenIntIntHash, etc. -[x] failing tests on Naivebayes (needs 'Online Summarizer') @@ -43,7 +44,7 @@ mahout -[ ] Update `.travis.yml` for new module structures -[ ] Add profiles back in. -[x] Fix POMs (correct heirarcy / inheritance) (for now is done...) --[ ] Fix POMs (add required plugins for release) +-[ ] Fix POMs (add required plugins for release, see below) -[ ] Clean up, delete directories no longer in use. -[ ] Create profile to create spark-fat-jar. (of if you get pushback to make no fat jars) -[ ] Move Kryo to top pom (spark, core and flink at least) @@ -51,16 +52,21 @@ mahout -[ ] Update Website -[ ] - Description of modules -[ ] - Available profiles and what they do --[ ] Update bin/mahout --[ ] Add licenes to files +-[ ] Update bin/mahout (probably moving most of it to mr-classic) +-[x] Add licenes to files +-[ ] Last thing- delete this file. + ### Plugins to add in -[ ] Release -[x] Ratcheck -[ ] Checkstyle --[ ] Maven-enforcer +-[ ] Maven-enforcer (Java 1.8 bump for spark 2.3+ compatability) -[ ] Maven Surefire -[ ] JavaDoc/Scala Doc Plugin -[ ] profile for fat jars (spark/flink/h2o) -[ ] profile to turn on flink / h2o / other non-essentials (then disable them in standard build) + +### Current profiles +`mahout-mr` - builds the MapReduce stuff. \ No newline at end of file
