Repository: giraph Updated Branches: refs/heads/trunk 2ce3efbb7 -> 61cd43b19
GIRAPH-895 : Trim the edges in Giraph (edunov via pavanka) Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/61cd43b1 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/61cd43b1 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/61cd43b1 Branch: refs/heads/trunk Commit: 61cd43b19f55bf15dfb07f8cb5c785a357660a0e Parents: 2ce3efb Author: Pavan Kumar <[email protected]> Authored: Tue May 13 14:54:32 2014 -0700 Committer: Pavan Kumar <[email protected]> Committed: Tue May 13 14:54:32 2014 -0700 ---------------------------------------------------------------------- CHANGELOG | 2 ++ .../org/apache/giraph/edge/ArrayListEdges.java | 8 ++++- .../org/apache/giraph/edge/ByteArrayEdges.java | 13 +++++++- .../java/org/apache/giraph/edge/EdgeStore.java | 7 +++++ .../apache/giraph/edge/HashMultimapEdges.java | 8 ++++- .../apache/giraph/edge/IntNullArrayEdges.java | 8 ++++- .../giraph/edge/LongDoubleArrayEdges.java | 13 ++++++-- .../giraph/edge/LongDoubleHashMapEdges.java | 8 ++++- .../apache/giraph/edge/LongNullArrayEdges.java | 12 +++++-- .../giraph/edge/LongNullHashSetEdges.java | 9 +++++- .../apache/giraph/graph/ComputeCallable.java | 5 +++ .../org/apache/giraph/graph/DefaultVertex.java | 10 +++++- .../org/apache/giraph/utils/EdgeIterables.java | 3 ++ .../java/org/apache/giraph/utils/Trimmable.java | 33 ++++++++++++++++++++ 14 files changed, 126 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index c38a65f..efc76b6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 1.1.0 - unreleased + GIRAPH-895 : Trim the edges in Giraph (edunov via pavanka) + GIRAPH-889: Update Yourkit Profiler (yhdong via pavanka) GIRAPH-891: Make MessageStoreFactory configurable (rohankarwa via majakabiljo) http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/ArrayListEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/ArrayListEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/ArrayListEdges.java index 4eb3378..97134da 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/ArrayListEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/ArrayListEdges.java @@ -19,6 +19,7 @@ package org.apache.giraph.edge; import com.google.common.collect.Lists; +import org.apache.giraph.utils.Trimmable; import org.apache.giraph.utils.WritableUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -38,7 +39,7 @@ import java.util.Iterator; */ public class ArrayListEdges<I extends WritableComparable, E extends Writable> extends ConfigurableOutEdges<I, E> - implements MutableOutEdges<I, E> { + implements MutableOutEdges<I, E>, Trimmable { /** List of edges. */ private ArrayList<Edge<I, E>> edgeList; @@ -116,4 +117,9 @@ public class ArrayListEdges<I extends WritableComparable, E extends Writable> edgeList.add(edge); } } + + @Override + public void trim() { + edgeList.trimToSize(); + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/ByteArrayEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/ByteArrayEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/ByteArrayEdges.java index 3f69c5c..271e9c5 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/ByteArrayEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/ByteArrayEdges.java @@ -22,6 +22,7 @@ import com.google.common.collect.Iterators; import com.google.common.collect.UnmodifiableIterator; import org.apache.giraph.utils.ExtendedDataInput; import org.apache.giraph.utils.ExtendedDataOutput; +import org.apache.giraph.utils.Trimmable; import org.apache.giraph.utils.WritableUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -29,6 +30,7 @@ import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -44,7 +46,7 @@ import java.util.List; */ public class ByteArrayEdges<I extends WritableComparable, E extends Writable> extends ConfigurableOutEdges<I, E> - implements ReuseObjectsOutEdges<I, E> { + implements ReuseObjectsOutEdges<I, E>, Trimmable { /** Serialized edges. */ private byte[] serializedEdges; /** Number of bytes used in serializedEdges. */ @@ -133,6 +135,15 @@ public class ByteArrayEdges<I extends WritableComparable, E extends Writable> return edgeCount; } + @Override + public void trim() { + if (serializedEdges != null && + serializedEdges.length > serializedEdgesBytesUsed) { + serializedEdges = + Arrays.copyOf(serializedEdges, serializedEdgesBytesUsed); + } + } + /** * Iterator that reuses the same Edge object. */ http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java b/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java index dd8f2a3..57ad387 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java @@ -25,6 +25,7 @@ import org.apache.giraph.partition.Partition; import org.apache.giraph.utils.ByteArrayVertexIdEdges; import org.apache.giraph.utils.CallableFactory; import org.apache.giraph.utils.ProgressableUtils; +import org.apache.giraph.utils.Trimmable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.util.Progressable; @@ -198,6 +199,9 @@ public class EdgeStore<I extends WritableComparable, vertex = configuration.createVertex(); vertex.initialize(vertexId, configuration.createVertexValue(), outEdges); + if (vertex instanceof Trimmable) { + ((Trimmable) vertex).trim(); + } partition.putVertex(vertex); } } else { @@ -210,6 +214,9 @@ public class EdgeStore<I extends WritableComparable, vertex.addEdge(edge); } } + if (vertex instanceof Trimmable) { + ((Trimmable) vertex).trim(); + } // Some Partition implementations (e.g. ByteArrayPartition) // require us to put back the vertex after modifying it. partition.saveVertex(vertex); http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/HashMultimapEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/HashMultimapEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/HashMultimapEdges.java index e5e39de..4f44a53 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/HashMultimapEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/HashMultimapEdges.java @@ -22,6 +22,7 @@ import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.UnmodifiableIterator; import org.apache.giraph.utils.EdgeIterables; +import org.apache.giraph.utils.Trimmable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -42,7 +43,7 @@ import java.util.Map; */ public class HashMultimapEdges<I extends WritableComparable, E extends Writable> extends ConfigurableOutEdges<I, E> - implements MultiRandomAccessOutEdges<I, E> { + implements MultiRandomAccessOutEdges<I, E>, Trimmable { /** Multimap from target vertex id to edge values. */ private ArrayListMultimap<I, E> edgeMultimap; @@ -148,4 +149,9 @@ public class HashMultimapEdges<I extends WritableComparable, E extends Writable> edgeMultimap.put(targetVertexId, edgeValue); } } + + @Override + public void trim() { + edgeMultimap.trimToSize(); + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/IntNullArrayEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/IntNullArrayEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/IntNullArrayEdges.java index e54b41f..31cc611 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/IntNullArrayEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/IntNullArrayEdges.java @@ -19,6 +19,7 @@ package org.apache.giraph.edge; import org.apache.giraph.utils.EdgeIterables; +import org.apache.giraph.utils.Trimmable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; @@ -40,7 +41,7 @@ import java.util.Iterator; * but edge removals are expensive. */ public class IntNullArrayEdges - implements ReuseObjectsOutEdges<IntWritable, NullWritable> { + implements ReuseObjectsOutEdges<IntWritable, NullWritable>, Trimmable { /** Array of target vertex ids */ private IntArrayList neighbors; @@ -136,4 +137,9 @@ public class IntNullArrayEdges neighbors.add(in.readInt()); } } + + @Override + public void trim() { + neighbors.trim(); + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleArrayEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleArrayEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleArrayEdges.java index 324a32c..8bd1cef 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleArrayEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleArrayEdges.java @@ -25,6 +25,7 @@ import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongIterator; import org.apache.giraph.utils.EdgeIterables; +import org.apache.giraph.utils.Trimmable; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; @@ -42,7 +43,7 @@ import java.util.Iterator; */ public class LongDoubleArrayEdges implements ReuseObjectsOutEdges<LongWritable, DoubleWritable>, - MutableOutEdges<LongWritable, DoubleWritable> { + MutableOutEdges<LongWritable, DoubleWritable>, Trimmable { /** Array of target vertex ids. */ private LongArrayList neighbors; /** Array of edge values. */ @@ -75,7 +76,7 @@ public class LongDoubleArrayEdges * If the backing arrays are more than four times as big as the number of * elements, halve their size. */ - private void trim() { + private void trimBack() { if (neighbors.elements().length > 4 * neighbors.size()) { neighbors.trim(neighbors.elements().length / 2); edgeValues.trim(neighbors.elements().length / 2); @@ -99,7 +100,7 @@ public class LongDoubleArrayEdges edgeValues.set(i, edgeValues.popDouble()); } // If needed after the removal, trim the arrays. - trim(); + trimBack(); } @Override @@ -228,4 +229,10 @@ public class LongDoubleArrayEdges edgeValues.add(in.readDouble()); } } + + @Override + public void trim() { + neighbors.trim(); + edgeValues.trim(); + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleHashMapEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleHashMapEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleHashMapEdges.java index e6c8229..6bfbd20 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleHashMapEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/LongDoubleHashMapEdges.java @@ -24,6 +24,7 @@ import it.unimi.dsi.fastutil.longs.Long2DoubleOpenHashMap; import it.unimi.dsi.fastutil.objects.ObjectIterator; import org.apache.giraph.utils.EdgeIterables; +import org.apache.giraph.utils.Trimmable; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; @@ -43,7 +44,7 @@ import java.util.Iterator; public class LongDoubleHashMapEdges implements StrictRandomAccessOutEdges<LongWritable, DoubleWritable>, ReuseObjectsOutEdges<LongWritable, DoubleWritable>, - MutableOutEdges<LongWritable, DoubleWritable> { + MutableOutEdges<LongWritable, DoubleWritable>, Trimmable { /** Hash map from target vertex id to edge value. */ private Long2DoubleOpenHashMap edgeMap; /** Representative edge value object, used by getEdgeValue(). */ @@ -125,6 +126,11 @@ public class LongDoubleHashMapEdges }; } + @Override + public void trim() { + edgeMap.trim(); + } + /** Helper class for a mutable edge that modifies the backing map entry. */ private static class LongDoubleHashMapMutableEdge extends DefaultEdge<LongWritable, DoubleWritable> { http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/LongNullArrayEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/LongNullArrayEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/LongNullArrayEdges.java index b7b3af5..06815ef 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/LongNullArrayEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/LongNullArrayEdges.java @@ -22,6 +22,7 @@ import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongIterator; import org.apache.giraph.utils.EdgeIterables; +import org.apache.giraph.utils.Trimmable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; @@ -39,7 +40,7 @@ import java.util.Iterator; */ public class LongNullArrayEdges implements ReuseObjectsOutEdges<LongWritable, NullWritable>, - MutableOutEdges<LongWritable, NullWritable> { + MutableOutEdges<LongWritable, NullWritable>, Trimmable { /** Array of target vertex ids. */ private LongArrayList neighbors; @@ -67,7 +68,7 @@ public class LongNullArrayEdges * If the backing array is more than four times as big as the number of * elements, halve its size. */ - private void trim() { + private void trimBack() { if (neighbors.elements().length > 4 * neighbors.size()) { neighbors.trim(neighbors.elements().length / 2); } @@ -88,7 +89,7 @@ public class LongNullArrayEdges neighbors.set(i, neighbors.popLong()); } // If needed after the removal, trim the array. - trim(); + trimBack(); } @Override @@ -161,5 +162,10 @@ public class LongNullArrayEdges neighbors.add(in.readLong()); } } + + @Override + public void trim() { + neighbors.trim(); + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/edge/LongNullHashSetEdges.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/LongNullHashSetEdges.java b/giraph-core/src/main/java/org/apache/giraph/edge/LongNullHashSetEdges.java index a21de91..0c6f76a 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/LongNullHashSetEdges.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/LongNullHashSetEdges.java @@ -22,6 +22,7 @@ import it.unimi.dsi.fastutil.longs.LongIterator; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import org.apache.giraph.utils.EdgeIterables; +import org.apache.giraph.utils.Trimmable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; @@ -41,7 +42,8 @@ import java.util.Iterator; public class LongNullHashSetEdges implements ReuseObjectsOutEdges<LongWritable, NullWritable>, MutableOutEdges<LongWritable, NullWritable>, - StrictRandomAccessOutEdges<LongWritable, NullWritable> { + StrictRandomAccessOutEdges<LongWritable, NullWritable>, + Trimmable { /** Hash set of target vertex ids. */ private LongOpenHashSet neighbors; @@ -143,4 +145,9 @@ public class LongNullHashSetEdges // Only set value for an existing edge. // If the edge exist, the Null value is already there. } + + @Override + public void trim() { + neighbors.trim(); + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/graph/ComputeCallable.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/ComputeCallable.java b/giraph-core/src/main/java/org/apache/giraph/graph/ComputeCallable.java index 0303530..86a1a52 100644 --- a/giraph-core/src/main/java/org/apache/giraph/graph/ComputeCallable.java +++ b/giraph-core/src/main/java/org/apache/giraph/graph/ComputeCallable.java @@ -34,6 +34,7 @@ import org.apache.giraph.time.Time; import org.apache.giraph.time.Times; import org.apache.giraph.utils.MemoryUtils; import org.apache.giraph.utils.TimedLogger; +import org.apache.giraph.utils.Trimmable; import org.apache.giraph.worker.WorkerContext; import org.apache.giraph.worker.WorkerProgress; import org.apache.giraph.worker.WorkerThreadAggregatorUsage; @@ -250,6 +251,10 @@ public class ComputeCallable<I extends WritableComparable, V extends Writable, } // Need to unwrap the mutated edges (possibly) vertex.unwrapMutableEdges(); + //Compact edges representation if possible + if (vertex instanceof Trimmable) { + ((Trimmable) vertex).trim(); + } // Write vertex to superstep output (no-op if it is not used) vertexWriter.writeVertex(vertex); // Need to save the vertex changes (possibly) http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/graph/DefaultVertex.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/DefaultVertex.java b/giraph-core/src/main/java/org/apache/giraph/graph/DefaultVertex.java index 03cb3c1..e704635 100644 --- a/giraph-core/src/main/java/org/apache/giraph/graph/DefaultVertex.java +++ b/giraph-core/src/main/java/org/apache/giraph/graph/DefaultVertex.java @@ -19,6 +19,7 @@ package org.apache.giraph.graph; import org.apache.giraph.conf.DefaultImmutableClassesGiraphConfigurable; import org.apache.giraph.edge.Edge; +import org.apache.giraph.utils.Trimmable; import org.apache.giraph.edge.MultiRandomAccessOutEdges; import org.apache.giraph.edge.MutableEdge; import org.apache.giraph.edge.MutableEdgesIterable; @@ -43,7 +44,7 @@ import java.util.Iterator; public class DefaultVertex<I extends WritableComparable, V extends Writable, E extends Writable> extends DefaultImmutableClassesGiraphConfigurable<I, V, E> - implements Vertex<I, V, E> { + implements Vertex<I, V, E>, Trimmable { /** Vertex id. */ private I id; /** Vertex value. */ @@ -215,6 +216,13 @@ public class DefaultVertex<I extends WritableComparable, } @Override + public void trim() { + if (edges instanceof Trimmable) { + ((Trimmable) edges).trim(); + } + } + + @Override public void addEdge(Edge<I, E> edge) { edges.add(edge); } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/utils/EdgeIterables.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/EdgeIterables.java b/giraph-core/src/main/java/org/apache/giraph/utils/EdgeIterables.java index 0d2b810..3c2eb4d 100644 --- a/giraph-core/src/main/java/org/apache/giraph/utils/EdgeIterables.java +++ b/giraph-core/src/main/java/org/apache/giraph/utils/EdgeIterables.java @@ -151,5 +151,8 @@ public class EdgeIterables { for (Edge<I, E> edge : edgesIterable) { edges.add(edge); } + if (edges instanceof Trimmable) { + ((Trimmable) edges).trim(); + } } } http://git-wip-us.apache.org/repos/asf/giraph/blob/61cd43b1/giraph-core/src/main/java/org/apache/giraph/utils/Trimmable.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/Trimmable.java b/giraph-core/src/main/java/org/apache/giraph/utils/Trimmable.java new file mode 100644 index 0000000..3966c71 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/utils/Trimmable.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.utils; + +/** + * Interface for {@link org.apache.giraph.edge.OutEdges} and + * {@link org.apache.giraph.graph.Vertex} implementations capable to optimize + * in-memory data representation. + */ +public interface Trimmable { + + /** + * Compacts all recent updates to this object. + */ + void trim(); + +}
