Author: koji
Date: Sat Dec 29 10:11:28 2012
New Revision: 1426735
URL: http://svn.apache.org/viewvc?rev=1426735&view=rev
Log:
add optimal cluster finder
Added:
labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java
Added: labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java?rev=1426735&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java Sat Dec
29 10:11:28 2012
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.IOException;
+
+/**
+ * This program finds the best suited cluster from multiple cluster dump files.
+ *
+ */
+public class FindOptimalCluster {
+
+ static int K;
+ static int D;
+
+ /**
+ * The main program that takes the path to alikeconfig.xml as the first
argument and
+ * one or more arguments for cluster dump files path.
+ *
+ * @param args file path to alikeconfig.xml and one or more path to cluster
dump files
+ * @throws IOException
+ *
+ */
+ public static void main(String[] args) throws IOException {
+ if(args.length < 2){
+ printUsage(1);
+ }
+
+ AlikeConfig config = new AlikeConfig(args[0]);
+
+ K = config.getNumOfClusters();
+ D = config.getNumOfDimensions();
+ ClusterDumpReader clusterDumpReader = new ClusterDumpReader(K, D);
+
+ double minError = Double.MAX_VALUE;
+ String optimalClusterDumpFile = null;
+ for(int i = 1; i < args.length; i++){
+ String dumpFile = args[i];
+
+ // read cluster centroids
+ double[][] centroids = clusterDumpReader.getCentroids(dumpFile);
+
+ MeanErrorExecutor executor =
+ new MeanErrorExecutor(K, centroids, config.getDistanceCalculator());
+ FileUtil.executeRecursively(executor, dumpFile);
+ double me = executor.getMeanError();
+ System.out.printf("%s : %f\n", dumpFile, me);
+ if(me < minError){
+ me = minError;
+ optimalClusterDumpFile = dumpFile;
+ }
+ }
+
+ System.out.printf("\n%s is the best suited cluster w/ mean error is %f\n",
optimalClusterDumpFile, minError);
+ }
+
+ static void printUsage(int exit){
+ System.err.printf("Usage: $ java %s <path-to-alikeconfig.xml>
<path-to-cluster-dump-file-1>" +
+ " [dump-file-2 ...]\n",
+ QuantizeVectors.class.getName());
+ System.err.println("\t<path-to-alikeconfig.xml> the file path to
alikeconfig.xml");
+ System.err.println("\t<path-to-cluster-dump-file> the file path to cluster
dump file");
+
+ if(exit >= 0){
+ System.exit(exit);
+ }
+ }
+
+ static class MeanErrorExecutor extends VisualDescriptorsExecutorBase {
+
+ private double sum;
+ private int count;
+
+ public MeanErrorExecutor(int k, double[][] centroids, DistanceCalculator
distanceCalculator) {
+ super(k, centroids, distanceCalculator);
+ sum = 0;
+ count = 0;
+ }
+
+ @Override
+ protected void minFoundProcess(String key, int pos, double minDistance){
+ sum += minDistance;
+ count++;
+ }
+
+ public double getMeanError(){
+ return sum / count;
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]