Author: koji Date: Fri Jul 25 02:46:19 2014 New Revision: 1613340 URL: http://svn.apache.org/r1613340 Log: finalize support of Spark
Modified: labs/alike/trunk/build.xml labs/alike/trunk/demo/README.txt labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala Modified: labs/alike/trunk/build.xml URL: http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1613340&r1=1613339&r2=1613340&view=diff ============================================================================== --- labs/alike/trunk/build.xml (original) +++ labs/alike/trunk/build.xml Fri Jul 25 02:46:19 2014 @@ -92,9 +92,6 @@ <mkdir dir="${job.dir}"/> <copy todir="${job.dir}"> <fileset dir="${cls.dir}"/> - <fileset dir="${demo.dir}"> - <include name="conf.xml"/> - </fileset> </copy> <unjar dest="${job.dir}"> <fileset dir="${lib.dir}"> Modified: labs/alike/trunk/demo/README.txt URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/README.txt?rev=1613340&r1=1613339&r2=1613340&view=diff ============================================================================== --- labs/alike/trunk/demo/README.txt (original) +++ labs/alike/trunk/demo/README.txt Fri Jul 25 02:46:19 2014 @@ -100,6 +100,7 @@ # # (2) If you prefer Spark (um, it takes 4 and half hours on my MacBook) + # IMPORTANT: You need to install Spark 1.0.1 beforehand # $ spark-submit --jars ../apache-alike-0.2.job --class org.apache.alike.KMeansClusteringExecutor ../target/scala-2.10/alike-spark_2.10-0.2.jar Modified: labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java?rev=1613340&r1=1613339&r2=1613340&view=diff ============================================================================== --- labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java (original) +++ labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java Fri Jul 25 02:46:19 2014 @@ -26,6 +26,7 @@ import org.apache.commons.io.IOUtils; final class ClusterDumpReader { private final int k, d; + private Format format; /** * @@ -35,6 +36,7 @@ final class ClusterDumpReader { public ClusterDumpReader(int k, int d){ this.k = k; this.d = d; + format = null; } public double[][] getCentroids(String ifile) throws IOException { @@ -49,11 +51,24 @@ final class ClusterDumpReader { String line = null; int i = 0; while((line = br.readLine()) != null){ - int sp = line.indexOf("c=[") + "c=[".length(); - int ep = line.indexOf("] r=["); + + if(format == null){ + // the format hasn't been checked yet + format = checkFormat(line); + } + + int sp, ep; + if(format == Format.MAHOUT){ + sp = line.indexOf("c=[") + "c=[".length(); + ep = line.indexOf("] r=["); + } + else{ + sp = line.indexOf("[") + "[".length(); + ep = line.indexOf("]"); + } //System.out.printf("\"%s\"\n", line.substring(sp, ep)); String[] strValues = line.substring(sp, ep).trim().split(",\\s*"); - if(strValues.length < d){ + if(strValues.length < d){ // this can be occurred in Mahout // may be sparse vector representation is used... for(String sv : strValues){ int col = sv.indexOf(':'); @@ -66,6 +81,7 @@ final class ClusterDumpReader { centroids[i][j] = Double.parseDouble(strValues[j]); } } + i++; } } @@ -76,4 +92,12 @@ final class ClusterDumpReader { return centroids; } + + private Format checkFormat(String line){ + return line.indexOf("c=[") > 0 ? Format.MAHOUT : Format.SPARK; + } + + static enum Format { + MAHOUT, SPARK + } } Modified: labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala?rev=1613340&r1=1613339&r2=1613340&view=diff ============================================================================== --- labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala (original) +++ labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala Fri Jul 25 02:46:19 2014 @@ -25,8 +25,7 @@ class MyExec extends Executor { object KMeansClusteringExecutor { def main(args: Array[String]){ - val is = this.getClass.getClassLoader.getResourceAsStream("conf.xml") - val config = new AlikeConfig(is) + val config = new AlikeConfig("conf.xml") val inDir = config.getDescNormalFSDir val numClusters = config.getNumOfClusters val maxIter = config.getClusterMaxIter --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@labs.apache.org For additional commands, e-mail: commits-h...@labs.apache.org