Author: koji
Date: Wed Dec 12 10:47:40 2012
New Revision: 1420615
URL: http://svn.apache.org/viewvc?rev=1420615&view=rev
Log:
add PrepareInputVectors program. It is used to convert visual descriptor files
(human readable) to Mahout format (VectorWritable)
Added:
labs/alike/trunk/src/java/org/apache/alike/FileUtil.java
labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
labs/alike/trunk/src/test/org/
labs/alike/trunk/src/test/org/apache/
labs/alike/trunk/src/test/org/apache/alike/
labs/alike/trunk/src/test/org/apache/alike/FileUtilTest.java
Modified:
labs/alike/trunk/ (props changed)
labs/alike/trunk/build.xml
labs/alike/trunk/demo/ (props changed)
labs/alike/trunk/demo/README.txt
Propchange: labs/alike/trunk/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Wed Dec 12 10:47:40 2012
@@ -4,3 +4,5 @@
target
classes
lib
+test-result
+test-classes
Modified: labs/alike/trunk/build.xml
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1420615&r1=1420614&r2=1420615&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Wed Dec 12 10:47:40 2012
@@ -131,6 +131,15 @@
<!-- ================================================================== -->
<!-- = LAUNCH TOOLS = -->
<!-- ================================================================== -->
+ <target name="run-piv" depends="alike-compile" description="run
PrepareInputVectors">
+ <java classname="org.apache.alike.PrepareInputVectors" fork="true">
+ <jvmarg line="-Dfile.encoding=UTF-8"/>
+ <arg line="demo/desc demo/input-vectors"/>
+ <classpath refid="common.path.lib"/>
+ <classpath path="${cls.dir}"/>
+ </java>
+ </target>
+
<target name="run-clustering" depends="alike-compile" description="run
Clustering">
<java classname="org.apache.alike.Clustering" fork="true">
<jvmarg line="-Dfile.encoding=UTF-8"/>
Propchange: labs/alike/trunk/demo/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Wed Dec 12 10:47:40 2012
@@ -1,3 +1,5 @@
101_ObjectCategories
101_ObjectCategories.tar.gz
apache-solr-*
+input-vectors
+.input-vectors.crc
Modified: labs/alike/trunk/demo/README.txt
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/demo/README.txt?rev=1420615&r1=1420614&r2=1420615&view=diff
==============================================================================
--- labs/alike/trunk/demo/README.txt (original)
+++ labs/alike/trunk/demo/README.txt Wed Dec 12 10:47:40 2012
@@ -37,6 +37,10 @@
6. run clustering and quantizing vectors program
+ # go to parent directory and run PrepareInputVectors via ant
+ $ cd ..
+ $ ant run-piv
+
TODO
7. run clusterdump to compute cluster centroids
Added: labs/alike/trunk/src/java/org/apache/alike/FileUtil.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/FileUtil.java?rev=1420615&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/FileUtil.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/FileUtil.java Wed Dec 12
10:47:40 2012
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.File;
+
+public class FileUtil {
+
+ /**
+ * delete the specified directory or file recursively.
+ * @param parent the directory or file to be removed.
+ */
+ public static void deleteRecursively(String parent){
+ executeRecursively(new DeleteExecutor(), parent);
+ }
+
+ /**
+ * execute the specified {@link Executor} to the specified directory or file
recursively.
+ * @param executor an {@link Executor} to be executed.
+ * @param parent the directory or file to be applied the executor.
+ */
+ public static void executeRecursively(Executor executor, String parent){
+ executeRecursively(executor, parent, null);
+ }
+
+ /**
+ * execute the specified {@link Executor} to the specified directory or file
recursively.
+ * @param executor an {@link Executor} to be executed.
+ * @param parent the directory to be digged recursively.
+ * @param file the file to be applied the executor if it is not null.
+ */
+ public static void executeRecursively(Executor executor, String parent,
String file){
+ File theFile = file == null ? new File(parent) : new File(parent, file);
+ if(!theFile.exists()) return;
+ if(theFile.isFile()){
+ if(executor.isExecutable(theFile))
+ executor.execute(theFile);
+ return;
+ }
+ else{
+ if(!theFile.isAbsolute()){
+ theFile = theFile.getAbsoluteFile();
+ }
+ String[] fileList = theFile.list();
+ for(String aFile : fileList){
+ executeRecursively(executor, theFile.getAbsolutePath(), aFile);
+ }
+ if(executor.isExecutable(theFile))
+ executor.execute(theFile);
+ }
+ }
+
+ /**
+ * An abstract class which is called by {@link
FileUtil#executeRecursively(Executor, String, String)}
+ * recursively.
+ */
+ public static abstract class Executor {
+
+ /**
+ * The sub class must implement this method to process theFile argument.
+ *
+ * @param theFile
+ */
+ public abstract void execute(File theFile);
+
+ /**
+ * The sub class can override this method to notify that theFile to be
+ * processed by {@link Executor}. The default implementation returns true.
+ *
+ * @param theFile
+ * @return return true if you want {@link Executor} to process theFile
otherwise false.
+ */
+ public boolean isExecutable(File theFile){
+ return true;
+ }
+ }
+
+ /**
+ * An {@link Executor} implementation to delete a directory recursively.
+ *
+ */
+ static class DeleteExecutor extends Executor {
+ public void execute(File theFile) {
+ theFile.delete();
+ }
+ }
+}
Added: labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java?rev=1420615&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java Wed Dec
12 10:47:40 2012
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.alike.FileUtil.Executor;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.math.NamedVector;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+
+/**
+ * This program reads visual descriptors that were created by OpenCV,
+ * and writes them to Mahout format ({@link VectorWritable}) file.
+ *
+ */
+public class PrepareInputVectors {
+
+ /**
+ * The main program that takes two arguments.
+ *
+ * @param args the first argument is the parent directory of visual
descriptors.
+ * the second argument is output file path.
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ if(args.length != 2){
+ printUsage(1);
+ }
+
+ FileUtil.executeRecursively(new VisualDescriptors2MahoutExecutor(args[1]),
args[0]);
+ }
+
+ static void printUsage(int exit){
+ System.err.printf("Usage: $ java %s <parent_dir_path>
<output_file_path>\n",
+ PrepareInputVectors.class.getName());
+ System.err.println("\t<parent_dir_path> parent directory path of visual
descriptors");
+ System.err.println("\t<output_file_path> output file path for Mahout");
+
+ if(exit >= 0){
+ System.exit(exit);
+ }
+ }
+
+ static class VisualDescriptors2MahoutExecutor extends Executor {
+
+ private long recNum;
+ private Configuration conf;
+ private FileSystem fs;
+ private SequenceFile.Writer writer;
+ private VectorWritable vw;
+
+ public VisualDescriptors2MahoutExecutor(String resultPath) throws
IOException {
+ recNum = 0;
+
+ conf = new Configuration();
+ fs = FileSystem.get(conf);
+
+ Path path = new Path(resultPath);
+ writer = new SequenceFile.Writer(fs, conf,
+ path, LongWritable.class, VectorWritable.class);
+ vw = new VectorWritable();
+ }
+
+ public boolean isExecutable(File theFile){
+ return theFile.getName().endsWith(".txt");
+ }
+
+ public void execute(File theFile) {
+ try {
+ List<NamedVector> nvList = getNamedVectorsFromFile(theFile);
+ for (Vector vector : nvList) {
+ vw.set(vector);
+ writer.append(new LongWritable(recNum++), vw);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ List<NamedVector> getNamedVectorsFromFile(File txtFile) throws IOException
{
+ List<NamedVector> nvList = new ArrayList<NamedVector>();
+ BufferedReader br = new BufferedReader(new FileReader(txtFile));
+ String imgFile = br.readLine();
+ String line = br.readLine(); // skip number of lines count
+ StringBuilder sb = new StringBuilder();
+ int num = 0;
+ while((line = br.readLine()) != null){
+ String[] strValues = line.trim().split("\\s+");
+ double[] values = new double[strValues.length];
+ for(int i = 0; i < strValues.length; i++){
+ values[i] = Double.parseDouble(strValues[i]);
+ }
+ sb.setLength(0);
+ sb.append(imgFile).append('_').append(num);
+ NamedVector nv = new NamedVector(new
RandomAccessSparseVector(values.length), sb.toString());
+ nv.assign(values);
+ nvList.add(nv);
+
+ num++;
+ }
+ br.close();
+
+ return nvList;
+ }
+ }
+}
Added: labs/alike/trunk/src/test/org/apache/alike/FileUtilTest.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/test/org/apache/alike/FileUtilTest.java?rev=1420615&view=auto
==============================================================================
--- labs/alike/trunk/src/test/org/apache/alike/FileUtilTest.java (added)
+++ labs/alike/trunk/src/test/org/apache/alike/FileUtilTest.java Wed Dec 12
10:47:40 2012
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+
+import org.junit.Test;
+
+public class FileUtilTest {
+
+ @Test
+ public void testDeleteRecursively() throws Exception {
+ File dirs = new File("a/b/c");
+ assertFalse(dirs.exists());
+ dirs.mkdirs();
+ assertTrue(dirs.exists());
+ FileUtil.deleteRecursively("a");
+ assertFalse(dirs.exists());
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]