Author: edwardyoon
Date: Thu Nov 22 04:13:05 2012
New Revision: 1412417
URL: http://svn.apache.org/viewvc?rev=1412417&view=rev
Log:
Reformatting code
Modified:
hama/trunk/examples/src/main/java/org/apache/hama/examples/ExampleDriver.java
hama/trunk/examples/src/main/java/org/apache/hama/examples/GradientDescentExample.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/distance/DistanceMeasurer.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/CostFunction.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/GradientDescentBSP.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/HypothesisFunction.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegressionModel.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/RegressionModel.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/VectorDoubleFileInputFormat.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/writable/VectorWritable.java
Modified:
hama/trunk/examples/src/main/java/org/apache/hama/examples/ExampleDriver.java
URL:
http://svn.apache.org/viewvc/hama/trunk/examples/src/main/java/org/apache/hama/examples/ExampleDriver.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/examples/src/main/java/org/apache/hama/examples/ExampleDriver.java
(original)
+++
hama/trunk/examples/src/main/java/org/apache/hama/examples/ExampleDriver.java
Thu Nov 22 04:13:05 2012
@@ -34,10 +34,8 @@ public class ExampleDriver {
pgd.addClass("bench", RandBench.class, "Random Benchmark");
pgd.addClass("pagerank", PageRank.class, "PageRank");
pgd.addClass("inlnkcount", InlinkCount.class, "InlinkCount");
- pgd.addClass("bipartite", BipartiteMatching.class,
- "Bipartite Matching");
- pgd.addClass("kmeans", Kmeans.class,
- "K-Means Clustering");
+ pgd.addClass("bipartite", BipartiteMatching.class, "Bipartite Matching");
+ pgd.addClass("kmeans", Kmeans.class, "K-Means Clustering");
pgd.driver(args);
} catch (Throwable e) {
e.printStackTrace();
Modified:
hama/trunk/examples/src/main/java/org/apache/hama/examples/GradientDescentExample.java
URL:
http://svn.apache.org/viewvc/hama/trunk/examples/src/main/java/org/apache/hama/examples/GradientDescentExample.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/examples/src/main/java/org/apache/hama/examples/GradientDescentExample.java
(original)
+++
hama/trunk/examples/src/main/java/org/apache/hama/examples/GradientDescentExample.java
Thu Nov 22 04:13:05 2012
@@ -42,15 +42,16 @@ public class GradientDescentExample {
private static final Path TMP_OUTPUT = new Path("/tmp/gd");
public static void main(String[] args) throws InterruptedException,
- IOException, ClassNotFoundException {
+ IOException, ClassNotFoundException {
// BSP job configuration
HamaConfiguration conf = new HamaConfiguration();
conf.setFloat(GradientDescentBSP.ALPHA, 0.002f);
conf.setFloat(GradientDescentBSP.COST_THRESHOLD, 0.5f);
conf.setInt(GradientDescentBSP.ITERATIONS_THRESHOLD, 300);
conf.setInt(GradientDescentBSP.INITIAL_THETA_VALUES, 10);
- if (args.length > 1 && args[1]!=null && args[1].equals("logistic")) {
- conf.setClass(GradientDescentBSP.REGRESSION_MODEL_CLASS,
LogisticRegressionModel.class, RegressionModel.class);
+ if (args.length > 1 && args[1] != null && args[1].equals("logistic")) {
+ conf.setClass(GradientDescentBSP.REGRESSION_MODEL_CLASS,
+ LogisticRegressionModel.class, RegressionModel.class);
}
BSPJob bsp = new BSPJob(conf, GradientDescentExample.class);
@@ -70,7 +71,7 @@ public class GradientDescentExample {
if (bsp.waitForCompletion(true)) {
printOutput(conf);
System.out.println("Job Finished in "
- + (System.currentTimeMillis() - startTime) / 1000.0 + "
seconds");
+ + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}
}
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/distance/DistanceMeasurer.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/distance/DistanceMeasurer.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/ml/src/main/java/org/apache/hama/ml/distance/DistanceMeasurer.java
(original)
+++
hama/trunk/ml/src/main/java/org/apache/hama/ml/distance/DistanceMeasurer.java
Thu Nov 22 04:13:05 2012
@@ -20,22 +20,23 @@ package org.apache.hama.ml.distance;
import org.apache.hama.ml.math.DoubleVector;
/**
- * a {@link DistanceMeasurer} is responsible for calculating the distance
between
- * {@link DoubleVector}s or Arrays of {@code double}s
+ * a {@link DistanceMeasurer} is responsible for calculating the distance
+ * between {@link DoubleVector}s or Arrays of {@code double}s
*/
public interface DistanceMeasurer {
/**
* Calculates the distance between two arrays of {@code double}s
+ *
* @param set1 an array of {@code double}
* @param set2 an array of {@code double}
* @return a {@code double} representing the distance
*/
public double measureDistance(double[] set1, double[] set2);
-
/**
* Calculates the distance between two {@link DoubleVector}ss
+ *
* @param vec1 a {@link DoubleVector}
* @param vec2 a {@link DoubleVector}
* @return a {@code double} representing the distance
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/CostFunction.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/CostFunction.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/CostFunction.java
(original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/CostFunction.java
Thu Nov 22 04:13:05 2012
@@ -27,14 +27,15 @@ public interface CostFunction {
/**
* Calculates the cost function for a given item (input x, output y), a model
* defined by the hypothesis parametrized by the vector theta
- *
- * @param x the input vector
- * @param y the learned output for x
- * @param m the number of existing items
- * @param theta the parameters vector theta
+ *
+ * @param x the input vector
+ * @param y the learned output for x
+ * @param m the number of existing items
+ * @param theta the parameters vector theta
* @param hypothesis the hypothesis function to model the problem
* @return the calculated cost for input x and output y
*/
- public double calculateCostForItem(DoubleVector x, double y, int m,
DoubleVector theta, HypothesisFunction hypothesis);
+ public double calculateCostForItem(DoubleVector x, double y, int m,
+ DoubleVector theta, HypothesisFunction hypothesis);
}
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/GradientDescentBSP.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/GradientDescentBSP.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/GradientDescentBSP.java
(original)
+++
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/GradientDescentBSP.java
Thu Nov 22 04:13:05 2012
@@ -32,11 +32,16 @@ import java.io.IOException;
import java.util.Arrays;
/**
- * A gradient descent (see
<code>http://en.wikipedia.org/wiki/Gradient_descent</code>) BSP based
implementation.
+ * A gradient descent (see
+ * <code>http://en.wikipedia.org/wiki/Gradient_descent</code>) BSP based
+ * implementation.
*/
-public class GradientDescentBSP extends BSP<VectorWritable, DoubleWritable,
VectorWritable, DoubleWritable, VectorWritable> {
+public class GradientDescentBSP
+ extends
+ BSP<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> {
- private static final Logger log =
LoggerFactory.getLogger(GradientDescentBSP.class);
+ private static final Logger log = LoggerFactory
+ .getLogger(GradientDescentBSP.class);
public static final String INITIAL_THETA_VALUES = "gd.initial.theta";
public static final String ALPHA = "gd.alpha";
public static final String COST_THRESHOLD = "gd.cost.threshold";
@@ -52,22 +57,30 @@ public class GradientDescentBSP extends
private int iterationsThreshold;
private int m;
+ @SuppressWarnings("unchecked")
@Override
- public void setup(BSPPeer<VectorWritable, DoubleWritable, VectorWritable,
DoubleWritable, VectorWritable> peer) throws IOException, SyncException,
InterruptedException {
+ public void setup(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer)
+ throws IOException, SyncException, InterruptedException {
master = peer.getPeerIndex() == peer.getNumPeers() / 2;
cost = Double.MAX_VALUE;
costThreshold = peer.getConfiguration().getFloat(COST_THRESHOLD, 0.1f);
- iterationsThreshold = peer.getConfiguration().getInt(ITERATIONS_THRESHOLD,
10000);
+ iterationsThreshold = peer.getConfiguration().getInt(ITERATIONS_THRESHOLD,
+ 10000);
alpha = peer.getConfiguration().getFloat(ALPHA, 0.003f);
try {
- regressionModel = ((Class<? extends RegressionModel>)
peer.getConfiguration().getClass(REGRESSION_MODEL_CLASS,
LinearRegressionModel.class)).newInstance();
+ regressionModel = ((Class<? extends RegressionModel>) peer
+ .getConfiguration().getClass(REGRESSION_MODEL_CLASS,
+ LinearRegressionModel.class)).newInstance();
} catch (Exception e) {
- throw new IOException(e);
+ throw new IOException(e);
}
}
@Override
- public void bsp(BSPPeer<VectorWritable, DoubleWritable, VectorWritable,
DoubleWritable, VectorWritable> peer) throws IOException, SyncException,
InterruptedException {
+ public void bsp(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer)
+ throws IOException, SyncException, InterruptedException {
// 0a superstep: get initial theta
getInitialTheta(peer);
@@ -77,7 +90,7 @@ public class GradientDescentBSP extends
// increment counter
itemCount++;
}
- broadcastVector(peer, new double[]{itemCount});
+ broadcastVector(peer, new double[] { itemCount });
peer.sync();
// aggregate number of items
@@ -92,14 +105,15 @@ public class GradientDescentBSP extends
double localCost = calculateLocalCost(peer);
// cost is sent and aggregated by each
- broadcastVector(peer, new double[]{localCost});
+ broadcastVector(peer, new double[] { localCost });
peer.sync();
// second superstep : aggregate cost calculation
double totalCost = aggregateTotalCost(peer, localCost);
// cost check
- if (checkCost(peer, iterations, totalCost)) break;
+ if (checkCost(peer, iterations, totalCost))
+ break;
peer.sync();
peer.reopenInput();
@@ -119,8 +133,9 @@ public class GradientDescentBSP extends
updateTheta(newTheta);
if (log.isDebugEnabled()) {
- log.debug(new StringBuilder(peer.getPeerName()).append(": new theta
for cost ").
- append(cost).append(" is
").append(theta.toString()).toString());
+ log.debug(new StringBuilder(peer.getPeerName())
+ .append(": new theta for cost ").append(cost).append(" is ")
+ .append(theta.toString()).toString());
}
// master writes down the output
if (master) {
@@ -132,9 +147,11 @@ public class GradientDescentBSP extends
iterations++;
}
-}
+ }
- private double aggregateTotalCost(BSPPeer<VectorWritable, DoubleWritable,
VectorWritable, DoubleWritable, VectorWritable> peer, double localCost) throws
IOException {
+ private double aggregateTotalCost(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer,
+ double localCost) throws IOException {
double totalCost = localCost;
VectorWritable costResult;
while ((costResult = peer.getCurrentMessage()) != null) {
@@ -143,12 +160,14 @@ public class GradientDescentBSP extends
return totalCost;
}
- private double[] aggregatePartialDerivatives(BSPPeer<VectorWritable,
DoubleWritable, VectorWritable, DoubleWritable, VectorWritable> peer, double[]
thetaDelta) throws IOException {
+ private double[] aggregatePartialDerivatives(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer,
+ double[] thetaDelta) throws IOException {
VectorWritable thetaDeltaSlice;
double[] newTheta = Arrays.copyOf(thetaDelta, thetaDelta.length);
while ((thetaDeltaSlice = peer.getCurrentMessage()) != null) {
for (int j = 0; j < theta.getLength(); j++) {
- newTheta[j] += thetaDeltaSlice.getVector().get(j);
+ newTheta[j] += thetaDeltaSlice.getVector().get(j);
}
}
return newTheta;
@@ -162,7 +181,9 @@ public class GradientDescentBSP extends
theta = new DenseDoubleVector(newTheta);
}
- private void aggregateItemsNumber(BSPPeer<VectorWritable, DoubleWritable,
VectorWritable, DoubleWritable, VectorWritable> peer, int itemCount) throws
IOException {
+ private void aggregateItemsNumber(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer,
+ int itemCount) throws IOException {
VectorWritable itemsResult;
while ((itemsResult = peer.getCurrentMessage()) != null) {
itemCount += itemsResult.getVector().get(0);
@@ -171,23 +192,30 @@ public class GradientDescentBSP extends
m = itemCount;
}
- private boolean checkCost(BSPPeer<VectorWritable, DoubleWritable,
VectorWritable, DoubleWritable, VectorWritable> peer, int iterations, double
totalCost) {
- if (iterations > 0 && cost < totalCost ) {
- throw new RuntimeException(new StringBuilder("gradient descent failed to
converge with alpha ").
- append(alpha).toString());
- } else if (totalCost == 0 || totalCost < costThreshold || iterations >=
iterationsThreshold) {
+ private boolean checkCost(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer,
+ int iterations, double totalCost) {
+ if (iterations > 0 && cost < totalCost) {
+ throw new RuntimeException(new StringBuilder(
+ "gradient descent failed to converge with alpha ").append(alpha)
+ .toString());
+ } else if (totalCost == 0 || totalCost < costThreshold
+ || iterations >= iterationsThreshold) {
cost = totalCost;
return true;
} else {
cost = totalCost;
if (log.isDebugEnabled()) {
- log.debug(new StringBuilder(peer.getPeerName()).append(": current cost
is ").append(cost).toString());
+ log.debug(new StringBuilder(peer.getPeerName())
+ .append(": current cost is ").append(cost).toString());
}
return false;
}
-}
+ }
- private double calculateLocalCost(BSPPeer<VectorWritable, DoubleWritable,
VectorWritable, DoubleWritable, VectorWritable> peer) throws IOException {
+ private double calculateLocalCost(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer)
+ throws IOException {
double localCost = 0d;
// read an item
@@ -202,9 +230,11 @@ public class GradientDescentBSP extends
localCost += costForX;
}
return localCost;
-}
+ }
- private void broadcastVector(BSPPeer<VectorWritable, DoubleWritable,
VectorWritable, DoubleWritable, VectorWritable> peer, double[] vector) throws
IOException {
+ private void broadcastVector(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer,
+ double[] vector) throws IOException {
for (String peerName : peer.getAllPeerNames()) {
if (!peerName.equals(peer.getPeerName())) { // avoid sending to oneself
peer.send(peerName, new VectorWritable(new DenseDoubleVector(vector)));
@@ -212,7 +242,9 @@ public class GradientDescentBSP extends
}
}
- private double[] calculatePartialDerivatives(BSPPeer<VectorWritable,
DoubleWritable, VectorWritable, DoubleWritable, VectorWritable> peer) throws
IOException {
+ private double[] calculatePartialDerivatives(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer)
+ throws IOException {
KeyValuePair<VectorWritable, DoubleWritable> kvp;
double[] thetaDelta = new double[theta.getLength()];
while ((kvp = peer.readNext()) != null) {
@@ -227,45 +259,55 @@ public class GradientDescentBSP extends
}
@Override
- public void cleanup(BSPPeer<VectorWritable, DoubleWritable, VectorWritable,
DoubleWritable, VectorWritable> peer) throws IOException {
+ public void cleanup(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer)
+ throws IOException {
// master writes down the final output
if (master) {
peer.write(new VectorWritable(theta), new DoubleWritable(cost));
if (log.isInfoEnabled()) {
- log.info(new StringBuilder(peer.getPeerName()).append(":computation
finished with cost ").
- append(cost).append(" for theta ").append(theta).toString());
+ log.info(new StringBuilder(peer.getPeerName())
+ .append(":computation finished with cost ").append(cost)
+ .append(" for theta ").append(theta).toString());
}
}
}
- public void getInitialTheta(BSPPeer<VectorWritable, DoubleWritable,
VectorWritable, DoubleWritable, VectorWritable> peer) throws IOException,
SyncException, InterruptedException {
+ public void getInitialTheta(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer)
+ throws IOException, SyncException, InterruptedException {
if (theta == null) {
if (master) {
int size = getXSize(peer);
- theta = new DenseDoubleVector(size,
peer.getConfiguration().getInt(INITIAL_THETA_VALUES, 1));
- broadcastVector(peer,theta.toArray());
+ theta = new DenseDoubleVector(size, peer.getConfiguration().getInt(
+ INITIAL_THETA_VALUES, 1));
+ broadcastVector(peer, theta.toArray());
if (log.isDebugEnabled()) {
- log.debug(new StringBuilder(peer.getPeerName()).append(": sending
theta").toString());
+ log.debug(new StringBuilder(peer.getPeerName()).append(
+ ": sending theta").toString());
}
peer.sync();
- } else {
- if (log.isDebugEnabled()) {
- log.debug(new StringBuilder(peer.getPeerName()).append(": getting
theta").toString());
- }
- peer.sync();
- VectorWritable vectorWritable = peer.getCurrentMessage();
- theta = vectorWritable.getVector();
+ } else {
+ if (log.isDebugEnabled()) {
+ log.debug(new StringBuilder(peer.getPeerName()).append(
+ ": getting theta").toString());
}
+ peer.sync();
+ VectorWritable vectorWritable = peer.getCurrentMessage();
+ theta = vectorWritable.getVector();
+ }
}
}
- private int getXSize(BSPPeer<VectorWritable, DoubleWritable, VectorWritable,
DoubleWritable, VectorWritable> peer) throws IOException {
+ private int getXSize(
+ BSPPeer<VectorWritable, DoubleWritable, VectorWritable, DoubleWritable,
VectorWritable> peer)
+ throws IOException {
VectorWritable key = new VectorWritable();
DoubleWritable value = new DoubleWritable();
peer.readNext(key, value);
peer.reopenInput(); // reset input to start
if (key.getVector() == null) {
- throw new IOException("cannot read input vector size");
+ throw new IOException("cannot read input vector size");
}
return key.getVector().getDimension();
}
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/HypothesisFunction.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/HypothesisFunction.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/HypothesisFunction.java
(original)
+++
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/HypothesisFunction.java
Thu Nov 22 04:13:05 2012
@@ -25,10 +25,11 @@ import org.apache.hama.ml.math.DoubleVec
public interface HypothesisFunction {
/**
- * Applies the applyHypothesis given a set of parameters theta to a given
input x
- *
+ * Applies the applyHypothesis given a set of parameters theta to a given
+ * input x
+ *
* @param theta the parameters vector
- * @param x the input
+ * @param x the input
* @return a <code>double</code> number
*/
public double applyHypothesis(DoubleVector theta, DoubleVector x);
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegressionModel.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegressionModel.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegressionModel.java
(original)
+++
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegressionModel.java
Thu Nov 22 04:13:05 2012
@@ -29,7 +29,8 @@ public class LinearRegressionModel imple
public LinearRegressionModel() {
costFunction = new CostFunction() {
@Override
- public double calculateCostForItem(DoubleVector x, double y, int m,
DoubleVector theta, HypothesisFunction hypothesis) {
+ public double calculateCostForItem(DoubleVector x, double y, int m,
+ DoubleVector theta, HypothesisFunction hypothesis) {
return y * Math.pow(applyHypothesis(theta, x) - y, 2) / (2 * m);
}
};
@@ -41,7 +42,8 @@ public class LinearRegressionModel imple
}
@Override
- public double calculateCostForItem(DoubleVector x, double y, int m,
DoubleVector theta) {
+ public double calculateCostForItem(DoubleVector x, double y, int m,
+ DoubleVector theta) {
return costFunction.calculateCostForItem(x, y, m, theta, this);
}
}
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
(original)
+++
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegressionModel.java
Thu Nov 22 04:13:05 2012
@@ -32,44 +32,54 @@ public class LogisticRegressionModel imp
public LogisticRegressionModel() {
costFunction = new CostFunction() {
@Override
- public double calculateCostForItem(DoubleVector x, double y, int m,
DoubleVector theta, HypothesisFunction hypothesis) {
- return (-1d * y * ln(applyHypothesisWithPrecision(theta,
x)).doubleValue() + (1d - y) * ln(applyHypothesisWithPrecision(theta,
x).subtract(BigDecimal.valueOf(1))).doubleValue()) / m;
+ public double calculateCostForItem(DoubleVector x, double y, int m,
+ DoubleVector theta, HypothesisFunction hypothesis) {
+ return (-1d * y
+ * ln(applyHypothesisWithPrecision(theta, x)).doubleValue() + (1d -
y)
+ * ln(
+ applyHypothesisWithPrecision(theta, x).subtract(
+ BigDecimal.valueOf(1))).doubleValue())
+ / m;
}
};
}
+
@Override
public double applyHypothesis(DoubleVector theta, DoubleVector x) {
- return applyHypothesisWithPrecision(theta, x).doubleValue();
+ return applyHypothesisWithPrecision(theta, x).doubleValue();
}
- private BigDecimal applyHypothesisWithPrecision(DoubleVector theta,
DoubleVector x) {
- return
BigDecimal.valueOf(1).divide(BigDecimal.valueOf(1d).add(BigDecimal.valueOf(Math.exp(-1d
* theta.dot(x)))),
- MathContext.DECIMAL128);
+ private BigDecimal applyHypothesisWithPrecision(DoubleVector theta,
+ DoubleVector x) {
+ return BigDecimal.valueOf(1).divide(
+ BigDecimal.valueOf(1d).add(
+ BigDecimal.valueOf(Math.exp(-1d * theta.dot(x)))),
+ MathContext.DECIMAL128);
}
private BigDecimal ln(BigDecimal x) {
- if (x.equals(BigDecimal.ONE)) {
- return BigDecimal.ZERO;
- }
- x = x.subtract(BigDecimal.ONE);
- int iterations = 1000;
- BigDecimal ret = new BigDecimal(iterations + 1);
- for (long i = iterations; i >= 0; i--) {
- BigDecimal N = new BigDecimal(i / 2 + 1).pow(2);
- N = N.multiply(x, MathContext.DECIMAL128);
- ret = N.divide(ret, MathContext.DECIMAL128);
-
- N = new BigDecimal(i + 1);
- ret = ret.add(N, MathContext.DECIMAL128);
-
- }
- ret = x.divide(ret, MathContext.DECIMAL128);
- return ret;
+ if (x.equals(BigDecimal.ONE)) {
+ return BigDecimal.ZERO;
+ }
+ x = x.subtract(BigDecimal.ONE);
+ int iterations = 1000;
+ BigDecimal ret = new BigDecimal(iterations + 1);
+ for (long i = iterations; i >= 0; i--) {
+ BigDecimal N = new BigDecimal(i / 2 + 1).pow(2);
+ N = N.multiply(x, MathContext.DECIMAL128);
+ ret = N.divide(ret, MathContext.DECIMAL128);
+
+ N = new BigDecimal(i + 1);
+ ret = ret.add(N, MathContext.DECIMAL128);
+
+ }
+ ret = x.divide(ret, MathContext.DECIMAL128);
+ return ret;
}
-
- @Override
- public double calculateCostForItem(DoubleVector x, double y, int m,
DoubleVector theta) {
+ @Override
+ public double calculateCostForItem(DoubleVector x, double y, int m,
+ DoubleVector theta) {
return costFunction.calculateCostForItem(x, y, m, theta, this);
}
}
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/RegressionModel.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/RegressionModel.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/RegressionModel.java
(original)
+++
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/RegressionModel.java
Thu Nov 22 04:13:05 2012
@@ -25,15 +25,16 @@ import org.apache.hama.ml.math.DoubleVec
public interface RegressionModel extends HypothesisFunction {
/**
- * Calculates the cost function for a given item (input x, output y) and
- * the model's parameters defined by the vector theta
- *
- * @param x the input vector
- * @param y the learned output for x
- * @param m the total number of existing items
+ * Calculates the cost function for a given item (input x, output y) and the
+ * model's parameters defined by the vector theta
+ *
+ * @param x the input vector
+ * @param y the learned output for x
+ * @param m the total number of existing items
* @param theta the parameters vector theta
* @return the calculated cost for input x and output y
*/
- public double calculateCostForItem(DoubleVector x, double y, int m,
DoubleVector theta);
+ public double calculateCostForItem(DoubleVector x, double y, int m,
+ DoubleVector theta);
}
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/VectorDoubleFileInputFormat.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/VectorDoubleFileInputFormat.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
---
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/VectorDoubleFileInputFormat.java
(original)
+++
hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/VectorDoubleFileInputFormat.java
Thu Nov 22 04:13:05 2012
@@ -36,19 +36,23 @@ import java.io.IOException;
import java.io.InputStream;
/**
- * A {@link FileInputFormat} for files containing one vector and one double
per line
+ * A {@link FileInputFormat} for files containing one vector and one double per
+ * line
*/
-public class VectorDoubleFileInputFormat extends
FileInputFormat<VectorWritable, DoubleWritable> {
+public class VectorDoubleFileInputFormat extends
+ FileInputFormat<VectorWritable, DoubleWritable> {
@Override
- public RecordReader<VectorWritable, DoubleWritable>
getRecordReader(InputSplit split, BSPJob job) throws IOException {
- return new VectorDoubleRecorderReader(job.getConf(), (FileSplit) split);
+ public RecordReader<VectorWritable, DoubleWritable> getRecordReader(
+ InputSplit split, BSPJob job) throws IOException {
+ return new VectorDoubleRecorderReader(job.getConfiguration(), (FileSplit)
split);
}
- static class VectorDoubleRecorderReader implements
RecordReader<VectorWritable, DoubleWritable> {
+ static class VectorDoubleRecorderReader implements
+ RecordReader<VectorWritable, DoubleWritable> {
- private static final Log LOG =
LogFactory.getLog(VectorDoubleRecorderReader.class
- .getName());
+ private static final Log LOG = LogFactory
+ .getLog(VectorDoubleRecorderReader.class.getName());
private CompressionCodecFactory compressionCodecs = null;
private long start;
@@ -75,9 +79,9 @@ public class VectorDoubleFileInputFormat
}
public VectorDoubleRecorderReader(Configuration job, FileSplit split)
- throws IOException {
+ throws IOException {
this.maxLineLength = job.getInt("bsp.linerecordreader.maxlength",
- Integer.MAX_VALUE);
+ Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
@@ -101,13 +105,13 @@ public class VectorDoubleFileInputFormat
}
if (skipFirstLine) { // skip first line and re-establish "start".
start += in.readLine(new Text(), 0,
- (int) Math.min(Integer.MAX_VALUE, end - start));
+ (int) Math.min(Integer.MAX_VALUE, end - start));
}
this.pos = start;
}
- public VectorDoubleRecorderReader(InputStream in, long offset, long
endOffset,
- int maxLineLength) {
+ public VectorDoubleRecorderReader(InputStream in, long offset,
+ long endOffset, int maxLineLength) {
this.maxLineLength = maxLineLength;
this.in = new LineReader(in);
this.start = offset;
@@ -115,10 +119,10 @@ public class VectorDoubleFileInputFormat
this.end = endOffset;
}
- public VectorDoubleRecorderReader(InputStream in, long offset, long
endOffset,
- Configuration job) throws IOException {
+ public VectorDoubleRecorderReader(InputStream in, long offset,
+ long endOffset, Configuration job) throws IOException {
this.maxLineLength = job.getInt("bsp.linerecordreader.maxlength",
- Integer.MAX_VALUE);
+ Integer.MAX_VALUE);
this.in = new LineReader(in, job);
this.start = offset;
this.pos = offset;
@@ -140,13 +144,13 @@ public class VectorDoubleFileInputFormat
*/
@Override
public synchronized boolean next(VectorWritable key, DoubleWritable value)
- throws IOException {
+ throws IOException {
while (pos < end) {
Text textVal = new Text();
int newSize = in.readLine(textVal, maxLineLength, Math.max(
- (int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
+ (int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
if (newSize == 0) {
return false;
}
@@ -168,7 +172,8 @@ public class VectorDoubleFileInputFormat
}
// line too long. try again
- LOG.info("Skipped line of size " + newSize + " at pos " + (pos -
newSize));
+ LOG.info("Skipped line of size " + newSize + " at pos "
+ + (pos - newSize));
}
return false;
Modified:
hama/trunk/ml/src/main/java/org/apache/hama/ml/writable/VectorWritable.java
URL:
http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/writable/VectorWritable.java?rev=1412417&r1=1412416&r2=1412417&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/writable/VectorWritable.java
(original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/writable/VectorWritable.java
Thu Nov 22 04:13:05 2012
@@ -20,11 +20,10 @@ package org.apache.hama.ml.writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.hadoop.io.WritableComparable;
-import org.apache.hama.ml.math.DoubleVector;
import org.apache.hama.ml.math.DenseDoubleVector;
+import org.apache.hama.ml.math.DoubleVector;
/**
* Writable for dense vectors.