Author: ssc
Date: Mon Dec 12 16:08:36 2011
New Revision: 1213291
URL: http://svn.apache.org/viewvc?rev=1213291&view=rev
Log:
MAHOUT-924 Allow creation of symmetric adjacency matrices
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java
- copied, changed from r1212189,
mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java
Removed:
mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java?rev=1213291&r1=1213290&r2=1213291&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java
Mon Dec 12 16:08:36 2011
@@ -31,15 +31,19 @@ import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.common.mapreduce.VectorSumReducer;
-import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.map.OpenIntIntHashMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
@@ -47,16 +51,17 @@ import java.util.Map;
import java.util.regex.Pattern;
/**
- * <p>Distributed computation of the adjacency matrix of a directed graph, see
http://en.wikipedia.org/wiki/Adjacency_matrix
+ * <p>Distributed computation of the adjacency matrix of a graph, see
http://en.wikipedia.org/wiki/Adjacency_matrix
*
* <p>This job outputs {@link org.apache.hadoop.io.SequenceFile}s an {@link
IntWritable} as key and a {@link VectorWritable} as value</p>
*
* <p>Command line arguments specific to this class are:</p>
*
* <ol>
- * <li>--output=(path): output path where the resulting matrix should be
written</li>
- * <li>--vertices=(path): file containing a list of all vertices</li>
- * <li>--edges=(path): Directory containing edges of the graph</li>
+ * <li>--output=(path): output path where the resulting matrix and the
number of vertices should be written</li>
+ * <li>--vertices=(path): file containing a list of all vertices</li>
+ * <li>--edges=(path): Directory containing edges of the graph</li>
+ * <li>--symmetric = (boolean) produce a symmetric adjacency matrix
(corresponds to an undirected graph)</li>
* </ol>
*
* <p>General command line options are documented in {@link AbstractJob}.</p>
@@ -65,31 +70,46 @@ import java.util.regex.Pattern;
*/
public class AdjacencyMatrixJob extends AbstractJob {
+ private static final Logger log =
LoggerFactory.getLogger(AdjacencyMatrixJob.class);
+
public static final String NUM_VERTICES = "numVertices.bin";
public static final String ADJACENCY_MATRIX = "adjacencyMatrix";
public static final String VERTEX_INDEX = "vertexIndex";
static final String NUM_VERTICES_PARAM = AdjacencyMatrixJob.class.getName()
+ ".numVertices";
static final String VERTEX_INDEX_PARAM = AdjacencyMatrixJob.class.getName()
+ ".vertexIndex";
+ static final String SYMMETRIC_PARAM = AdjacencyMatrixJob.class.getName() +
".symmetric";
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new AdjacencyMatrixJob(), args);
+ }
@Override
public int run(String[] args) throws Exception {
addOption("vertices", null, "a text file containing all vertices of the
graph (one per line)", true);
- addOption("edges", null, "text files containing the edges of the graph
(vertexA,vertextB per line)", true);
+ addOption("edges", null, "text files containing the edges of the graph
(vertexA,vertexB per line)", true);
+ addOption("symmetric", null, "produce a symmetric adjacency matrix
(corresponds to an undirected graph)",
+ String.valueOf(false));
+
addOutputOption();
Map<String, String> parsedArgs = parseArguments(args);
+ if (parsedArgs == null) {
+ return -1;
+ }
Path vertices = new Path(parsedArgs.get("--vertices"));
Path edges = new Path(parsedArgs.get("--edges"));
+ boolean symmetric = Boolean.parseBoolean(parsedArgs.get("--symmetric"));
+ log.info("Indexing vertices sequentially, this might take a while...");
int numVertices = indexVertices(vertices, getOutputPath(VERTEX_INDEX));
HadoopUtil.writeInt(numVertices, getOutputPath(NUM_VERTICES), getConf());
-
Preconditions.checkArgument(numVertices > 0);
+ log.info("Found " + numVertices + " vertices, creating adjacency
matrix...");
Job createAdjacencyMatrix = prepareJob(edges,
getOutputPath(ADJACENCY_MATRIX), TextInputFormat.class,
VectorizeEdgesMapper.class, IntWritable.class, VectorWritable.class,
VectorSumReducer.class,
IntWritable.class, VectorWritable.class,
SequenceFileOutputFormat.class);
@@ -97,6 +117,7 @@ public class AdjacencyMatrixJob extends
Configuration createAdjacencyMatrixConf =
createAdjacencyMatrix.getConfiguration();
createAdjacencyMatrixConf.set(NUM_VERTICES_PARAM,
String.valueOf(numVertices));
createAdjacencyMatrixConf.set(VERTEX_INDEX_PARAM,
getOutputPath(VERTEX_INDEX).toString());
+ createAdjacencyMatrixConf.setBoolean(SYMMETRIC_PARAM, symmetric);
createAdjacencyMatrix.waitForCompletion(true);
return 0;
@@ -133,6 +154,7 @@ public class AdjacencyMatrixJob extends
private int numVertices;
private OpenIntIntHashMap vertexIDsToIndex;
+ private boolean symmetric;
private final IntWritable row = new IntWritable();
@@ -142,6 +164,7 @@ public class AdjacencyMatrixJob extends
protected void setup(Context ctx) throws IOException, InterruptedException
{
Configuration conf = ctx.getConfiguration();
numVertices = Integer.parseInt(conf.get(NUM_VERTICES_PARAM));
+ symmetric = conf.getBoolean(SYMMETRIC_PARAM, false);
Path vertexIndexPath = new Path(conf.get(VERTEX_INDEX_PARAM));
vertexIDsToIndex = new OpenIntIntHashMap(numVertices);
for (Pair<IntWritable,IntWritable> indexAndVertexID :
@@ -157,13 +180,19 @@ public class AdjacencyMatrixJob extends
String[] tokens = SEPARATOR.split(line.toString());
int rowIndex = vertexIDsToIndex.get(Integer.parseInt(tokens[0]));
int columnIndex = vertexIDsToIndex.get(Integer.parseInt(tokens[1]));
- RandomAccessSparseVector partialTransitionMatrixRow = new
RandomAccessSparseVector(numVertices, 1);
+ Vector partialTransitionMatrixRow = new
SequentialAccessSparseVector(numVertices, 1);
row.set(rowIndex);
partialTransitionMatrixRow.setQuick(columnIndex, 1);
-
ctx.write(row, new VectorWritable(partialTransitionMatrixRow));
+
+ if (symmetric && rowIndex != columnIndex) {
+ partialTransitionMatrixRow = new
SequentialAccessSparseVector(numVertices, 1);
+ row.set(columnIndex);
+ partialTransitionMatrixRow.setQuick(rowIndex, 1);
+ ctx.write(row, new VectorWritable(partialTransitionMatrixRow));
+ }
}
}
-}
\ No newline at end of file
+}
Copied:
mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java
(from r1212189,
mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java)
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java&r1=1212189&r2=1213291&rev=1213291&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java
Mon Dec 12 16:08:36 2011
@@ -15,13 +15,12 @@
* limitations under the License.
*/
-package org.apache.mahout.graph.preprocessing;
+package org.apache.mahout.graph;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.graph.AdjacencyMatrixJob;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.hadoop.MathHelper;
@@ -82,4 +81,51 @@ public class AdjacencyMatrixJobTest exte
MathHelper.assertMatrixEquals(expectedAdjacencyMatrix,
actualAdjacencyMatrix);
}
+
+ @Test
+ public void symmetricAdjacencyMatrix() throws Exception {
+ File verticesFile = getTestTempFile("vertices.txt");
+ File edgesFile = getTestTempFile("edges.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+
+ Configuration conf = new Configuration();
+
+ writeLines(verticesFile, "12", "34", "56", "78");
+
+ writeLines(edgesFile,
+ "12,34",
+ "12,56",
+ "34,34",
+ "34,78",
+ "56,34",
+ "56,56",
+ "56,78");
+
+ Matrix expectedAdjacencyMatrix = new DenseMatrix(new double[][] {
+ { 0, 1, 1, 0 },
+ { 1, 1, 1, 1 },
+ { 1, 1, 1, 1 },
+ { 0, 1, 1, 0 } });
+
+ AdjacencyMatrixJob createAdjacencyMatrix = new AdjacencyMatrixJob();
+ createAdjacencyMatrix.setConf(conf);
+ createAdjacencyMatrix.run(new String[] { "--vertices",
verticesFile.getAbsolutePath(),
+ "--edges", edgesFile.getAbsolutePath(), "--symmetric",
String.valueOf(true),
+ "--output", outputDir.getAbsolutePath() });
+
+ int numVertices = HadoopUtil.readInt(new Path(outputDir.getAbsolutePath(),
AdjacencyMatrixJob.NUM_VERTICES), conf);
+ Matrix actualAdjacencyMatrix = MathHelper.readMatrix(conf, new
Path(outputDir.getAbsolutePath(),
+ AdjacencyMatrixJob.ADJACENCY_MATRIX + "/part-r-00000"), numVertices,
numVertices);
+
+ StringBuilder info = new StringBuilder();
+ info.append("expected adjacency matrix:\n");
+ info.append(MathHelper.nice(expectedAdjacencyMatrix));
+ info.append("actual adjacency matrix:\n");
+ info.append(MathHelper.nice(actualAdjacencyMatrix));
+
+ log.info(info.toString());
+
+ MathHelper.assertMatrixEquals(expectedAdjacencyMatrix,
actualAdjacencyMatrix);
+ }
}
\ No newline at end of file