Author: koji
Date: Fri Dec 14 03:58:18 2012
New Revision: 1421658

URL: http://svn.apache.org/viewvc?rev=1421658&view=rev
Log:
add PrepareInitClusters which uses RandomSeedGenerator to create initial 
clusters

Added:
    labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java
Modified:
    labs/alike/trunk/build.xml
    labs/alike/trunk/demo/   (props changed)
    labs/alike/trunk/ivy.xml
    labs/alike/trunk/src/java/org/apache/alike/Clustering.java
    labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java

Modified: labs/alike/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Fri Dec 14 03:58:18 2012
@@ -128,10 +128,21 @@
     <!-- ================================================================== -->
     <!-- = LAUNCH TOOLS                                                   = -->
     <!-- ================================================================== -->
+    <mkdir dir="demo/input-vectors"/>
+
     <target name="run-piv" depends="alike-compile" description="run 
PrepareInputVectors">
         <java classname="org.apache.alike.PrepareInputVectors" fork="true">
             <jvmarg line="-Dfile.encoding=UTF-8"/>
-            <arg line="demo/desc demo/input-vectors"/>
+            <arg line="demo/desc demo/input-vectors/data"/>
+            <classpath refid="common.path.lib"/>
+            <classpath path="${cls.dir}"/>
+        </java>
+    </target>
+
+    <target name="run-pic" depends="alike-compile" description="run 
PrepareInitClusters">
+        <java classname="org.apache.alike.PrepareInitClusters" fork="true">
+            <jvmarg line="-Dfile.encoding=UTF-8"/>
+            <arg line="demo/input-vectors demo/init-clusters 20"/>
             <classpath refid="common.path.lib"/>
             <classpath path="${cls.dir}"/>
         </java>
@@ -140,7 +151,6 @@
     <target name="run-clustering" depends="alike-compile" description="run 
Clustering">
         <java classname="org.apache.alike.Clustering" fork="true">
             <jvmarg line="-Dfile.encoding=UTF-8"/>
-            <arg line="conf.properties"/>
             <classpath refid="common.path.lib"/>
             <classpath path="${cls.dir}"/>
         </java>
@@ -160,6 +170,11 @@
         </delete>
     </target>
 
+    <target name="clean-demo" description="clean intermediate files in demo 
directory">
+        <delete dir="demo/input-vectors"/>
+        <delete dir="demo/init-clusters"/>
+    </target>
+
     <target name="clean-lib" description="clean libs downloaded by ivy">
         <delete dir="${lib.dir}"/>
     </target>

Propchange: labs/alike/trunk/demo/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Fri Dec 14 03:58:18 2012
@@ -2,4 +2,4 @@
 101_ObjectCategories.tar.gz
 apache-solr-*
 input-vectors
-.input-vectors.crc
+init-clusters

Modified: labs/alike/trunk/ivy.xml
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/ivy.xml?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/ivy.xml (original)
+++ labs/alike/trunk/ivy.xml Fri Dec 14 03:58:18 2012
@@ -25,7 +25,11 @@
         status="integration">
        </info>
   <dependencies>
+<!--
     <dependency org="org.apache.hadoop" name="hadoop-core" rev="1.0.4"/>
+-->
+    <dependency org="commons-io" name="commons-io" rev="2.4"/>
+    <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.204.0"/>
     <dependency org="org.apache.mahout" name="mahout-core" rev="0.7"/>
     <dependency org="org.slf4j" name="slf4j-jcl" rev="1.6.1"/>
   </dependencies>

Modified: labs/alike/trunk/src/java/org/apache/alike/Clustering.java
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/Clustering.java?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/Clustering.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/Clustering.java Fri Dec 14 
03:58:18 2012
@@ -44,7 +44,8 @@ public class Clustering {
   static final int K = 500;
   static final int MAX_ITE = 500;
   // TODO: make parameterization
-  static final String SRC_DIR = 
"/Users/koji/Project/rondhuit/JAIST/SUB-THEME/out";
+  //static final String SRC_DIR = 
"/Users/koji/Project/rondhuit/JAIST/SUB-THEME/out";
+  static final String SRC_DIR = 
"/Users/koji/Project/labs/alike/COMMIT-NEW/trunk/demo/desc";
   static final String D_IN_POINTS = "testdata/points";
   static final String F_IN_INPUT_VECTORS = D_IN_POINTS + "/input-vectors";
   static final String D_IN_CLUSTERS = "testdata/clusters";
@@ -72,12 +73,16 @@ public class Clustering {
   static void prepareInputVectors() throws IOException {
     new File(D_IN_POINTS).mkdirs();
     
+    /*
     File srcDirFile = new File(SRC_DIR);
     File[] txtFiles = srcDirFile.listFiles(new FileFilter() {
       public boolean accept(File pathname) {
         return pathname.getName().endsWith(".txt");
       }
     });
+    */
+    
+    List<File> txtFiles = getDescTxtFiles();
 
     Path path = new Path(F_IN_INPUT_VECTORS);
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
@@ -96,6 +101,26 @@ public class Clustering {
     writer.close();
   }
   
+  static List<File> getDescTxtFiles(){
+    String[] subDirs = {"Faces", "car_side", "chair", "cougar_face",  
"dollar_bill", "elephant"};
+    List<File> results = new ArrayList<File>();
+    
+    for(String subDir : subDirs){
+      File srcDirFile = new File(SRC_DIR + "/" + subDir);
+      File[] txtFiles = srcDirFile.listFiles(new FileFilter() {
+        public boolean accept(File pathname) {
+          return pathname.getName().endsWith(".txt");
+        }
+      });
+      
+      for(File txtFile : txtFiles){
+        results.add(txtFile);
+      }
+    }
+    
+    return results;
+  }
+  
   static List<NamedVector> getNamedVectorsFromFile(File txtFile) throws 
IOException {
     List<NamedVector> nvList = new ArrayList<NamedVector>();
     BufferedReader br = new BufferedReader(new FileReader(txtFile));

Added: labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java?rev=1421658&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java Fri Dec 
14 03:58:18 2012
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+
+public class PrepareInitClusters {
+
+  /**
+   * @param args
+   * @throws IOException 
+   */
+  public static void main(String[] args) throws IOException {
+    if(args.length != 3){
+      printUsage(1);
+    }
+    
+    Configuration conf = new Configuration();
+    Path inPath = new Path(args[0]);
+    // TODO: set false to avoid checksum error when execute PrepareInitClusters
+    // I don't know why it happens???
+    //inPath.getFileSystem(conf).setVerifyChecksum(false);
+    Path outPath = new Path(args[1]);
+    int k = 0;
+    try{
+      k = Integer.parseInt(args[2]);
+    }
+    catch(NumberFormatException e){
+      printUsage(1);
+    }
+    
+    RandomSeedGenerator.buildRandom(conf, inPath, outPath, k, new 
EuclideanDistanceMeasure());
+  }
+
+  static void printUsage(int exit){
+    System.err.printf("Usage: $ java %s <input_vectors_path> 
<output_init_clusters_path> <k>\n",
+        PrepareInitClusters.class.getName());
+    System.err.println("\t<input_vectors_path> input file path of visual 
descriptors");
+    System.err.println("\t<output_init_clusters_path> output file path for 
initial clusters");
+    System.err.println("\t<k> number of clusters");
+
+    if(exit >= 0){
+      System.exit(exit);
+    }
+  }
+
+}

Modified: labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java 
(original)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java Fri Dec 
14 03:58:18 2012
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.alike.FileUtil.Executor;
+import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -54,7 +55,10 @@ public class PrepareInputVectors {
       printUsage(1);
     }
     
-    FileUtil.executeRecursively(new VisualDescriptors2MahoutExecutor(args[1]), 
args[0]);
+    VisualDescriptors2MahoutExecutor executor = new 
VisualDescriptors2MahoutExecutor(args[1]);
+    FileUtil.executeRecursively(executor, args[0]);
+    executor.closeWriter();
+    
   }
 
   static void printUsage(int exit){
@@ -103,6 +107,10 @@ public class PrepareInputVectors {
         throw new RuntimeException(e);
       }
     }
+    
+    public void closeWriter(){
+      IOUtils.closeQuietly(writer);
+    }
 
     List<NamedVector> getNamedVectorsFromFile(File txtFile) throws IOException 
{
       List<NamedVector> nvList = new ArrayList<NamedVector>();



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to