Author: koji
Date: Fri Dec 14 03:58:18 2012
New Revision: 1421658
URL: http://svn.apache.org/viewvc?rev=1421658&view=rev
Log:
add PrepareInitClusters which uses RandomSeedGenerator to create initial
clusters
Added:
labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java
Modified:
labs/alike/trunk/build.xml
labs/alike/trunk/demo/ (props changed)
labs/alike/trunk/ivy.xml
labs/alike/trunk/src/java/org/apache/alike/Clustering.java
labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
Modified: labs/alike/trunk/build.xml
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Fri Dec 14 03:58:18 2012
@@ -128,10 +128,21 @@
<!-- ================================================================== -->
<!-- = LAUNCH TOOLS = -->
<!-- ================================================================== -->
+ <mkdir dir="demo/input-vectors"/>
+
<target name="run-piv" depends="alike-compile" description="run
PrepareInputVectors">
<java classname="org.apache.alike.PrepareInputVectors" fork="true">
<jvmarg line="-Dfile.encoding=UTF-8"/>
- <arg line="demo/desc demo/input-vectors"/>
+ <arg line="demo/desc demo/input-vectors/data"/>
+ <classpath refid="common.path.lib"/>
+ <classpath path="${cls.dir}"/>
+ </java>
+ </target>
+
+ <target name="run-pic" depends="alike-compile" description="run
PrepareInitClusters">
+ <java classname="org.apache.alike.PrepareInitClusters" fork="true">
+ <jvmarg line="-Dfile.encoding=UTF-8"/>
+ <arg line="demo/input-vectors demo/init-clusters 20"/>
<classpath refid="common.path.lib"/>
<classpath path="${cls.dir}"/>
</java>
@@ -140,7 +151,6 @@
<target name="run-clustering" depends="alike-compile" description="run
Clustering">
<java classname="org.apache.alike.Clustering" fork="true">
<jvmarg line="-Dfile.encoding=UTF-8"/>
- <arg line="conf.properties"/>
<classpath refid="common.path.lib"/>
<classpath path="${cls.dir}"/>
</java>
@@ -160,6 +170,11 @@
</delete>
</target>
+ <target name="clean-demo" description="clean intermediate files in demo
directory">
+ <delete dir="demo/input-vectors"/>
+ <delete dir="demo/init-clusters"/>
+ </target>
+
<target name="clean-lib" description="clean libs downloaded by ivy">
<delete dir="${lib.dir}"/>
</target>
Propchange: labs/alike/trunk/demo/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Fri Dec 14 03:58:18 2012
@@ -2,4 +2,4 @@
101_ObjectCategories.tar.gz
apache-solr-*
input-vectors
-.input-vectors.crc
+init-clusters
Modified: labs/alike/trunk/ivy.xml
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/ivy.xml?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/ivy.xml (original)
+++ labs/alike/trunk/ivy.xml Fri Dec 14 03:58:18 2012
@@ -25,7 +25,11 @@
status="integration">
</info>
<dependencies>
+<!--
<dependency org="org.apache.hadoop" name="hadoop-core" rev="1.0.4"/>
+-->
+ <dependency org="commons-io" name="commons-io" rev="2.4"/>
+ <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.204.0"/>
<dependency org="org.apache.mahout" name="mahout-core" rev="0.7"/>
<dependency org="org.slf4j" name="slf4j-jcl" rev="1.6.1"/>
</dependencies>
Modified: labs/alike/trunk/src/java/org/apache/alike/Clustering.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/Clustering.java?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/Clustering.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/Clustering.java Fri Dec 14
03:58:18 2012
@@ -44,7 +44,8 @@ public class Clustering {
static final int K = 500;
static final int MAX_ITE = 500;
// TODO: make parameterization
- static final String SRC_DIR =
"/Users/koji/Project/rondhuit/JAIST/SUB-THEME/out";
+ //static final String SRC_DIR =
"/Users/koji/Project/rondhuit/JAIST/SUB-THEME/out";
+ static final String SRC_DIR =
"/Users/koji/Project/labs/alike/COMMIT-NEW/trunk/demo/desc";
static final String D_IN_POINTS = "testdata/points";
static final String F_IN_INPUT_VECTORS = D_IN_POINTS + "/input-vectors";
static final String D_IN_CLUSTERS = "testdata/clusters";
@@ -72,12 +73,16 @@ public class Clustering {
static void prepareInputVectors() throws IOException {
new File(D_IN_POINTS).mkdirs();
+ /*
File srcDirFile = new File(SRC_DIR);
File[] txtFiles = srcDirFile.listFiles(new FileFilter() {
public boolean accept(File pathname) {
return pathname.getName().endsWith(".txt");
}
});
+ */
+
+ List<File> txtFiles = getDescTxtFiles();
Path path = new Path(F_IN_INPUT_VECTORS);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
@@ -96,6 +101,26 @@ public class Clustering {
writer.close();
}
+ static List<File> getDescTxtFiles(){
+ String[] subDirs = {"Faces", "car_side", "chair", "cougar_face",
"dollar_bill", "elephant"};
+ List<File> results = new ArrayList<File>();
+
+ for(String subDir : subDirs){
+ File srcDirFile = new File(SRC_DIR + "/" + subDir);
+ File[] txtFiles = srcDirFile.listFiles(new FileFilter() {
+ public boolean accept(File pathname) {
+ return pathname.getName().endsWith(".txt");
+ }
+ });
+
+ for(File txtFile : txtFiles){
+ results.add(txtFile);
+ }
+ }
+
+ return results;
+ }
+
static List<NamedVector> getNamedVectorsFromFile(File txtFile) throws
IOException {
List<NamedVector> nvList = new ArrayList<NamedVector>();
BufferedReader br = new BufferedReader(new FileReader(txtFile));
Added: labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java?rev=1421658&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInitClusters.java Fri Dec
14 03:58:18 2012
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+
+public class PrepareInitClusters {
+
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ if(args.length != 3){
+ printUsage(1);
+ }
+
+ Configuration conf = new Configuration();
+ Path inPath = new Path(args[0]);
+ // TODO: set false to avoid checksum error when execute PrepareInitClusters
+ // I don't know why it happens???
+ //inPath.getFileSystem(conf).setVerifyChecksum(false);
+ Path outPath = new Path(args[1]);
+ int k = 0;
+ try{
+ k = Integer.parseInt(args[2]);
+ }
+ catch(NumberFormatException e){
+ printUsage(1);
+ }
+
+ RandomSeedGenerator.buildRandom(conf, inPath, outPath, k, new
EuclideanDistanceMeasure());
+ }
+
+ static void printUsage(int exit){
+ System.err.printf("Usage: $ java %s <input_vectors_path>
<output_init_clusters_path> <k>\n",
+ PrepareInitClusters.class.getName());
+ System.err.println("\t<input_vectors_path> input file path of visual
descriptors");
+ System.err.println("\t<output_init_clusters_path> output file path for
initial clusters");
+ System.err.println("\t<k> number of clusters");
+
+ if(exit >= 0){
+ System.exit(exit);
+ }
+ }
+
+}
Modified: labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java?rev=1421658&r1=1421657&r2=1421658&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
(original)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java Fri Dec
14 03:58:18 2012
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.alike.FileUtil.Executor;
+import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -54,7 +55,10 @@ public class PrepareInputVectors {
printUsage(1);
}
- FileUtil.executeRecursively(new VisualDescriptors2MahoutExecutor(args[1]),
args[0]);
+ VisualDescriptors2MahoutExecutor executor = new
VisualDescriptors2MahoutExecutor(args[1]);
+ FileUtil.executeRecursively(executor, args[0]);
+ executor.closeWriter();
+
}
static void printUsage(int exit){
@@ -103,6 +107,10 @@ public class PrepareInputVectors {
throw new RuntimeException(e);
}
}
+
+ public void closeWriter(){
+ IOUtils.closeQuietly(writer);
+ }
List<NamedVector> getNamedVectorsFromFile(File txtFile) throws IOException
{
List<NamedVector> nvList = new ArrayList<NamedVector>();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]