Updated Branches: refs/heads/trunk 06d64a29d -> 89bba9534
GIRAPH-520: ReverseEdgeDuplicator (nitay) Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/89bba953 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/89bba953 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/89bba953 Branch: refs/heads/trunk Commit: 89bba9534b092d85027c8de81bf7f174760bc24f Parents: 06d64a2 Author: Nitay Joffe <[email protected]> Authored: Sat Feb 16 23:28:38 2013 -0500 Committer: Nitay Joffe <[email protected]> Committed: Sun Feb 17 18:11:31 2013 -0500 ---------------------------------------------------------------------- CHANGELOG | 2 + .../apache/giraph/graph/ReverseEdgeDuplicator.java | 115 +++++++++++++++ .../formats/IntNullReverseTextEdgeInputFormat.java | 47 ++++++ .../java/org/apache/giraph/io/TestEdgeInput.java | 34 ++++- 4 files changed, 197 insertions(+), 1 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 2fb626e..5204023 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 0.2.0 - unreleased + GIRAPH-520: ReverseEdgeDuplicator (nitay) + GIRAPH-522: JMap Dumper (nitay) GIRAPH-517: Use stable hcatalog 0.5.0-incubating (nitay) http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java b/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java new file mode 100644 index 0000000..4415cc2 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.graph; + +import org.apache.giraph.io.EdgeReader; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +import java.io.IOException; + +/** + * An EdgeReader that creates the opposite direction edge for each edge read. + * Used to create an undirected graph from a directed input. + * This class is a decorator around any other EdgeReader. + * + * @param <I> Vertex ID + * @param <E> Edge Value + */ +public class ReverseEdgeDuplicator<I extends WritableComparable, + E extends Writable> implements EdgeReader<I, E> { + /** The underlying EdgeReader to wrap */ + private final EdgeReader<I, E> baseReader; + + /** Whether the reverse edge stored currently is valid */ + private boolean haveReverseEdge = true; + /** Reverse of the edge last read */ + private Edge<I, E> reverseEdge; + /** Reverse source of last edge, in other words last edge's target */ + private I reverseSourceId; + + /** + * Constructor + * @param baseReader EdgeReader to wrap + */ + public ReverseEdgeDuplicator(EdgeReader<I, E> baseReader) { + this.baseReader = baseReader; + } + + /** + * Get wrapped EdgeReader + * @return EdgeReader + */ + public EdgeReader<I, E> getBaseReader() { + return baseReader; + } + + @Override + public void initialize(InputSplit inputSplit, TaskAttemptContext context) + throws IOException, InterruptedException { + baseReader.initialize(inputSplit, context); + haveReverseEdge = true; + } + + @Override + public boolean nextEdge() throws IOException, InterruptedException { + boolean result = true; + if (haveReverseEdge) { + result = baseReader.nextEdge(); + haveReverseEdge = false; + } else { + Edge<I, E> currentEdge = baseReader.getCurrentEdge(); + reverseSourceId = currentEdge.getTargetVertexId(); + reverseEdge = EdgeFactory.create(baseReader.getCurrentSourceId(), + currentEdge.getValue()); + haveReverseEdge = true; + } + return result; + } + + @Override + public I getCurrentSourceId() throws IOException, InterruptedException { + if (haveReverseEdge) { + return reverseSourceId; + } else { + return baseReader.getCurrentSourceId(); + } + } + + @Override + public Edge<I, E> getCurrentEdge() throws IOException, InterruptedException { + if (haveReverseEdge) { + return reverseEdge; + } else { + return baseReader.getCurrentEdge(); + } + } + + @Override + public void close() throws IOException { + baseReader.close(); + } + + @Override + public float getProgress() throws IOException, InterruptedException { + return baseReader.getProgress(); + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java b/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java new file mode 100644 index 0000000..1e3b643 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.io.formats; + +import org.apache.giraph.graph.ReverseEdgeDuplicator; +import org.apache.giraph.io.EdgeReader; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +import java.io.IOException; + +/** + * Simple text-based {@link org.apache.giraph.io.EdgeInputFormat} for + * unweighted graphs with int ids. + * + * Each line consists of: source_vertex, target_vertex + * + * This version also creates the reverse edges. + */ +public class IntNullReverseTextEdgeInputFormat + extends IntNullTextEdgeInputFormat { + @Override + public EdgeReader<IntWritable, NullWritable> createEdgeReader( + InputSplit split, TaskAttemptContext context) throws IOException { + EdgeReader<IntWritable, NullWritable> edgeReader = + super.createEdgeReader(split, context); + return new ReverseEdgeDuplicator<IntWritable, NullWritable>(edgeReader); + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java b/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java index 112a267..c080622 100644 --- a/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java +++ b/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java @@ -20,11 +20,12 @@ package org.apache.giraph.io; import org.apache.giraph.BspCase; import org.apache.giraph.conf.GiraphClasses; -import org.apache.giraph.vertex.EdgeListVertex; import org.apache.giraph.io.formats.IdWithValueTextOutputFormat; import org.apache.giraph.io.formats.IntIntTextVertexValueInputFormat; +import org.apache.giraph.io.formats.IntNullReverseTextEdgeInputFormat; import org.apache.giraph.io.formats.IntNullTextEdgeInputFormat; import org.apache.giraph.utils.InternalVertexRunner; +import org.apache.giraph.vertex.EdgeListVertex; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.junit.Test; @@ -75,6 +76,37 @@ public class TestEdgeInput extends BspCase { assertEquals(1, (int) values.get(4)); } + // It should be able to build a graph starting from the edges only. + // Using ReverseEdgeDuplicator it should also create the reverse edges. + // Vertices should be implicitly created with default values. + @Test + public void testEdgesOnlyWithReverse() throws Exception { + String[] edges = new String[] { + "1 2", + "2 3", + "2 4", + "4 1" + }; + + GiraphClasses classes = new GiraphClasses(); + classes.setVertexClass(TestVertexWithNumEdges.class); + classes.setEdgeInputFormatClass(IntNullReverseTextEdgeInputFormat.class); + classes.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class); + Map<String, String> params = ImmutableMap.of(); + Iterable<String> results = InternalVertexRunner.run(classes, params, + null, edges); + + Map<Integer, Integer> values = parseResults(results); + + // Check that all vertices with outgoing edges have been created + assertEquals(4, values.size()); + // Check the number of edges for each vertex + assertEquals(2, (int) values.get(1)); + assertEquals(3, (int) values.get(2)); + assertEquals(1, (int) values.get(3)); + assertEquals(2, (int) values.get(4)); + } + // It should be able to build a graph by specifying vertex data and edges // as separate input formats. @Test
