Author: koji
Date: Fri Jul 25 02:46:19 2014
New Revision: 1613340

URL: http://svn.apache.org/r1613340
Log:
finalize support of Spark

Modified:
    labs/alike/trunk/build.xml
    labs/alike/trunk/demo/README.txt
    labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java
    
labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala

Modified: labs/alike/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1613340&r1=1613339&r2=1613340&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Fri Jul 25 02:46:19 2014
@@ -92,9 +92,6 @@
         <mkdir dir="${job.dir}"/>
         <copy todir="${job.dir}">
             <fileset dir="${cls.dir}"/>
-            <fileset dir="${demo.dir}">
-              <include name="conf.xml"/>
-            </fileset>
        </copy>
         <unjar dest="${job.dir}">
             <fileset dir="${lib.dir}">

Modified: labs/alike/trunk/demo/README.txt
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/demo/README.txt?rev=1613340&r1=1613339&r2=1613340&view=diff
==============================================================================
--- labs/alike/trunk/demo/README.txt (original)
+++ labs/alike/trunk/demo/README.txt Fri Jul 25 02:46:19 2014
@@ -100,6 +100,7 @@
 
    #
    # (2) If you prefer Spark (um, it takes 4 and half hours on my MacBook)
+   # IMPORTANT: You need to install Spark 1.0.1 beforehand
    #
    $ spark-submit --jars ../apache-alike-0.2.job --class 
org.apache.alike.KMeansClusteringExecutor 
../target/scala-2.10/alike-spark_2.10-0.2.jar
 

Modified: labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java?rev=1613340&r1=1613339&r2=1613340&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/ClusterDumpReader.java Fri Jul 
25 02:46:19 2014
@@ -26,6 +26,7 @@ import org.apache.commons.io.IOUtils;
 final class ClusterDumpReader {
 
   private final int k, d;
+  private Format format;
 
   /**
    * 
@@ -35,6 +36,7 @@ final class ClusterDumpReader {
   public ClusterDumpReader(int k, int d){
     this.k = k;
     this.d = d;
+    format = null;
   }
   
   public double[][] getCentroids(String ifile) throws IOException {
@@ -49,11 +51,24 @@ final class ClusterDumpReader {
       String line = null;
       int i = 0;
       while((line = br.readLine()) != null){
-        int sp = line.indexOf("c=[") + "c=[".length();
-        int ep = line.indexOf("] r=[");
+
+        if(format == null){
+          // the format hasn't been checked yet
+          format = checkFormat(line);
+        }
+
+        int sp, ep;
+        if(format == Format.MAHOUT){
+          sp = line.indexOf("c=[") + "c=[".length();
+          ep = line.indexOf("] r=[");
+        }
+        else{
+          sp = line.indexOf("[") + "[".length();
+          ep = line.indexOf("]");
+        }
         //System.out.printf("\"%s\"\n", line.substring(sp, ep));
         String[] strValues = line.substring(sp, ep).trim().split(",\\s*");
-        if(strValues.length < d){
+        if(strValues.length < d){   // this can be occurred in Mahout
           // may be sparse vector representation is used...
           for(String sv : strValues){
             int col = sv.indexOf(':');
@@ -66,6 +81,7 @@ final class ClusterDumpReader {
             centroids[i][j] = Double.parseDouble(strValues[j]);
           }
         }
+
         i++;
       }
     }
@@ -76,4 +92,12 @@ final class ClusterDumpReader {
     
     return centroids;
   }
+  
+  private Format checkFormat(String line){
+    return line.indexOf("c=[") > 0 ? Format.MAHOUT : Format.SPARK;
+  }
+  
+  static enum Format {
+    MAHOUT, SPARK
+  }
 }

Modified: 
labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala?rev=1613340&r1=1613339&r2=1613340&view=diff
==============================================================================
--- 
labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala 
(original)
+++ 
labs/alike/trunk/src/main/scala/org/apache/alike/KMeansClusteringExecutor.scala 
Fri Jul 25 02:46:19 2014
@@ -25,8 +25,7 @@ class MyExec extends Executor {
 
 object KMeansClusteringExecutor {
   def main(args: Array[String]){
-    val is = this.getClass.getClassLoader.getResourceAsStream("conf.xml")
-    val config = new AlikeConfig(is)
+    val config = new AlikeConfig("conf.xml")
     val inDir = config.getDescNormalFSDir
     val numClusters = config.getNumOfClusters
     val maxIter = config.getClusterMaxIter



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@labs.apache.org
For additional commands, e-mail: commits-h...@labs.apache.org

Reply via email to