Updated Branches: refs/heads/trunk adc87aee5 -> 9ded6c372
http://git-wip-us.apache.org/repos/asf/giraph/blob/9ded6c37/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/package-info.java ---------------------------------------------------------------------- diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/package-info.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/package-info.java new file mode 100644 index 0000000..02dd59f --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Gora Utils for Giraph + */ +package org.apache.giraph.io.gora.utils; http://git-wip-us.apache.org/repos/asf/giraph/blob/9ded6c37/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexInputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexInputFormat.java new file mode 100644 index 0000000..7de9346 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexInputFormat.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.gora.generated.GVertex; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific reader for a generated data bean. + */ +public class GoraTestVertexInputFormat + extends GoraVertexInputFormat<LongWritable, DoubleWritable, + FloatWritable> { + + /** + * DEfault constructor + */ + public GoraTestVertexInputFormat() { + } + + /** + * Creates specific vertex reader to be used inside Hadoop. + * @param split split to be read. + * @param context JobContext to be used. + * @return GoraVertexReader Vertex reader to be used by Hadoop. + */ + @Override + public GoraVertexReader createVertexReader( + InputSplit split, TaskAttemptContext context) throws IOException { + putArtificialData(); + return new GoraGVertexVertexReader(); + } + + /** + * Writes data into the data store in order to test it out. + */ + @SuppressWarnings("unchecked") + private static void putArtificialData() { + getDataStore().put("1", createVertex("1", null)); + getDataStore().put("10", createVertex("10", null)); + getDataStore().put("100", createVertex("100", null)); + getDataStore().flush(); + } + + /** + * Creates a vertex using an id and a set of edges. + * @param id Vertex id. + * @param edges Set of edges. + * @return GVertex created. + */ + public static GVertex createVertex(String id, Map<String, String> edges) { + GVertex newVrtx = new GVertex(); + newVrtx.setVertexId(new Utf8(id)); + if (edges != null) { + for (String edgeId : edges.keySet()) + newVrtx.putToEdges(new Utf8(edgeId), new Utf8(edges.get(edgeId))); + } + return newVrtx; + } + + /** + * Gora vertex reader + */ + protected class GoraGVertexVertexReader extends GoraVertexReader { + + /** + * Transforms a GoraObject into a Vertex object. + * @param goraObject Object from Gora to be translated. + * @return Vertex Result from transforming the gora object. + */ + @Override + protected Vertex<LongWritable, DoubleWritable, FloatWritable> + transformVertex(Object goraObject) { + Vertex<LongWritable, DoubleWritable, FloatWritable> vertex; + /* create the actual vertex */ + vertex = getConf().createVertex(); + GVertex tmpGVertex = (GVertex) goraObject; + + LongWritable vrtxId = new LongWritable( + Long.parseLong(tmpGVertex.getVertexId().toString())); + DoubleWritable vrtxValue = new DoubleWritable(tmpGVertex.getValue()); + vertex.initialize(vrtxId, vrtxValue); + if (tmpGVertex.getEdges() != null && !tmpGVertex.getEdges().isEmpty()) { + Set<Utf8> keyIt = tmpGVertex.getEdges().keySet(); + for (Utf8 key : keyIt) { + String keyVal = key.toString(); + String valVal = tmpGVertex.getEdges().get(key).toString(); + Edge<LongWritable, FloatWritable> edge; + if (!keyVal.contains("vertexId")) { + edge = EdgeFactory.create( + new LongWritable(Long.parseLong(keyVal)), + new FloatWritable(Float.parseFloat(valVal))); + vertex.addEdge(edge); + } + } + } + return vertex; + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/9ded6c37/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexOutputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexOutputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexOutputFormat.java new file mode 100644 index 0000000..5170d03 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexOutputFormat.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; + +import junit.framework.Assert; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.VertexWriter; +import org.apache.giraph.io.gora.generated.GVertex; +import org.apache.giraph.io.gora.generated.GVertexResult; +import org.apache.gora.persistency.Persistent; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific reader for a generated data bean. + */ +public class GoraTestVertexOutputFormat + extends GoraVertexOutputFormat<LongWritable, DoubleWritable, + FloatWritable> { + + /** + * DEfault constructor + */ + public GoraTestVertexOutputFormat() { + } + + @Override + public VertexWriter<LongWritable, DoubleWritable, FloatWritable> + createVertexWriter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new GoraGVertexVertexWriter(); + } + + /** + * Gora vertex writer. + */ + protected class GoraGVertexVertexWriter extends GoraVertexWriter { + + @Override + protected Persistent getGoraVertex( + Vertex<LongWritable, DoubleWritable, FloatWritable> vertex) { + GVertexResult tmpGVertex = new GVertexResult(); + tmpGVertex.setVertexId(new Utf8(vertex.getId().toString())); + tmpGVertex.setValue(Float.parseFloat(vertex.getValue().toString())); + Iterator<Edge<LongWritable, FloatWritable>> it = + vertex.getEdges().iterator(); + while (it.hasNext()) { + Edge<LongWritable, FloatWritable> edge = it.next(); + tmpGVertex.putToEdges( + new Utf8(edge.getTargetVertexId().toString()), + new Utf8(edge.getValue().toString())); + } + getLogger().debug("GoraObject created: " + tmpGVertex.toString()); + return tmpGVertex; + } + + @Override + public void writeVertex( + Vertex<LongWritable, DoubleWritable, FloatWritable> vertex) + throws IOException, InterruptedException { + super.writeVertex(vertex); + // Asserting + Assert.assertEquals(createVertex(vertex.getId().toString(), null), + getDataStore().get(vertex.getId().toString())); + } + + /** + * Creates a vertex using an id and a set of edges. + * @param id Vertex id. + * @param edges Set of edges. + * @return GVertex created. + */ + public GVertex createVertex(String id, Map<String, String> edges) { + GVertex newVrtx = new GVertex(); + newVrtx.setVertexId(new Utf8(id)); + if (edges != null) { + for (String edgeId : edges.keySet()) + newVrtx.putToEdges(new Utf8(edgeId), new Utf8(edges.get(edgeId))); + } + return newVrtx; + } + + @Override + protected Object getGoraKey( + Vertex<LongWritable, DoubleWritable, FloatWritable> vertex) { + String goraKey = String.valueOf(vertex.getId()); + return goraKey; + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/9ded6c37/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexInputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexInputFormat.java new file mode 100644 index 0000000..b42ab49 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexInputFormat.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.graph.BasicComputation; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.formats.IdWithValueTextOutputFormat; +import org.apache.giraph.utils.InternalVertexRunner; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Test; +import org.junit.Assert; + +/** + * Test class for Gora vertex input/output formats. + */ +public class TestGoraVertexInputFormat { + + @Test + public void getEmptyDb() throws Exception { + Iterable<String> results; + Iterator<String> result; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"10"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(GoraTestVertexInputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + result = results.iterator(); + Assert.assertFalse(result.hasNext()); + } + + @Test + public void getTestDb() throws Exception { + Iterable<String> results; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"100"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(GoraTestVertexInputFormat.class); + conf.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + Assert.assertEquals(3, ((ArrayList<?>)results).size()); + if (results instanceof Collection<?> + & (((Collection<?>)results).size() == 3)) { + Assert.assertEquals("10\t0.0", + ((ArrayList<?>)results).get(0).toString()); + Assert.assertEquals("1\t0.0", + ((ArrayList<?>)results).get(1).toString()); + Assert.assertEquals("100\t0.0", + ((ArrayList<?>)results).get(2).toString()); + } + } + + /* + Test compute method that sends each edge a notification of its parents. + The test set only has a 1-1 parent-to-child ratio for this unit test. + */ + public static class EmptyComputation + extends BasicComputation<LongWritable, DoubleWritable, + FloatWritable, LongWritable> { + + @Override + public void compute( + Vertex<LongWritable, DoubleWritable, FloatWritable> vertex, + Iterable<LongWritable> messages) throws IOException { + vertex.voteToHalt(); + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/9ded6c37/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexOutputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexOutputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexOutputFormat.java new file mode 100644 index 0000000..2c2cb5e --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexOutputFormat.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.io.gora.TestGoraVertexInputFormat.EmptyComputation; +import org.apache.giraph.utils.InternalVertexRunner; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class for Gora vertex output formats. + */ +public class TestGoraVertexOutputFormat { + + @Test + public void getWritingDb() throws Exception { + Iterable<String> results; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"10"); + GIRAPH_GORA_OUTPUT_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_OUTPUT_KEY_CLASS.set(conf, "java.lang.String"); + GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS. + set(conf, "org.apache.giraph.io.gora.generated.GVertex"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(GoraTestVertexInputFormat.class); + // Parameters for output + GIRAPH_GORA_OUTPUT_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_OUTPUT_KEY_CLASS.set(conf, "java.lang.String"); + GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + conf.setVertexOutputFormatClass(GoraTestVertexOutputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + } +}
