adding Alfonso Nishikawa reviews and test cases
Project: http://git-wip-us.apache.org/repos/asf/gora/repo Commit: http://git-wip-us.apache.org/repos/asf/gora/commit/70637707 Tree: http://git-wip-us.apache.org/repos/asf/gora/tree/70637707 Diff: http://git-wip-us.apache.org/repos/asf/gora/diff/70637707 Branch: refs/heads/master Commit: 70637707188c30111862d716b9af94cf7c742d1a Parents: ac6c42b Author: Kevin <[email protected]> Authored: Mon Sep 19 12:06:08 2016 +0530 Committer: Kevin <[email protected]> Committed: Mon Sep 19 12:06:08 2016 +0530 ---------------------------------------------------------------------- .../mapreduce/MapReduceSerialization.java | 159 +++++++++++++++++++ .../TestHBaseStoreMapReduceSerialization.java | 56 +++++++ 2 files changed, 215 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/gora/blob/70637707/gora-core/src/examples/java/org/apache/gora/examples/mapreduce/MapReduceSerialization.java ---------------------------------------------------------------------- diff --git a/gora-core/src/examples/java/org/apache/gora/examples/mapreduce/MapReduceSerialization.java b/gora-core/src/examples/java/org/apache/gora/examples/mapreduce/MapReduceSerialization.java new file mode 100644 index 0000000..fd5c062 --- /dev/null +++ b/gora-core/src/examples/java/org/apache/gora/examples/mapreduce/MapReduceSerialization.java @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gora.examples.mapreduce; + +import java.io.IOException; + +import org.apache.gora.examples.generated.WebPage; +import org.apache.gora.mapreduce.GoraMapper; +import org.apache.gora.mapreduce.GoraReducer; +import org.apache.gora.query.Query; +import org.apache.gora.store.DataStore; +import org.apache.gora.store.DataStoreFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test that serializing from Map to Reduce holds the dirty state + */ +public class MapReduceSerialization extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(MapReduceSerialization.class); + + public MapReduceSerialization() { + + } + + public MapReduceSerialization(Configuration conf) { + setConf(conf); + } + + /** + * {@link CheckDirtyBitsSerializationMapper} gets a WebPage, sets + * the URL to the string "hola" and emits it with the same key. + */ + public static class CheckDirtyBitsSerializationMapper + extends GoraMapper<String, WebPage, Text, WebPage> { + + @Override + protected void map(String key, WebPage page, Context context) + throws IOException ,InterruptedException { + page.setUrl("hola") ; + context.write(new Text(key), page) ; + }; + } + + /** + * {@link CheckDirtyBytesSerializationReducer} just take vales and emits + * them as is. + */ + public static class CheckDirtyBytesSerializationReducer extends GoraReducer<Text, WebPage, + String, WebPage> { + + @Override + protected void reduce(Text key, Iterable<WebPage> values, Context context) + throws IOException ,InterruptedException { + for (WebPage val : values) { + LOG.info(key.toString()) ; + LOG.info(val.toString()) ; + LOG.info(String.valueOf(val.isDirty())) ; + context.write(key.toString(), val); + } + }; + + } + + /** + * Creates and returns the {@link Job} for submitting to Hadoop mapreduce. + * @param inStore + * @param query + * @return + * @throws IOException + */ + public Job createJob(DataStore<String,WebPage> inStore, Query<String,WebPage> query + , DataStore<String,WebPage> outStore) throws IOException { + Job job = new Job(getConf()); + + job.setJobName("Check serialization of dirty bits"); + + job.setNumReduceTasks(1); + job.setJarByClass(getClass()); + + /* Mappers are initialized with GoraMapper#initMapper(). + * Instead of the TokenizerMapper defined here, if the input is not + * obtained via Gora, any other mapper can be used, such as + * Hadoop-MapReduce's WordCount.TokenizerMapper. + */ + GoraMapper.initMapperJob(job, query, Text.class + , WebPage.class, CheckDirtyBitsSerializationMapper.class, true); + + /* Reducers are initialized with GoraReducer#initReducer(). + * If the output is not to be persisted via Gora, any reducer + * can be used instead. + */ + GoraReducer.initReducerJob(job, outStore, CheckDirtyBytesSerializationReducer.class); + + return job; + } + + public int mapReduceSerialization(DataStore<String,WebPage> inStore, + DataStore<String, WebPage> outStore) throws IOException, InterruptedException, ClassNotFoundException { + Query<String,WebPage> query = inStore.newQuery(); + query.setFields("url") ; + + Job job = createJob(inStore, query, outStore); + return job.waitForCompletion(true) ? 0 : 1; + } + + @Override + public int run(String[] args) throws Exception { + + DataStore<String,WebPage> inStore; + DataStore<String,WebPage> outStore; + Configuration conf = new Configuration(); + if(args.length > 0) { + String dataStoreClass = args[0]; + inStore = DataStoreFactory.getDataStore(dataStoreClass, + String.class, WebPage.class, conf); + if(args.length > 1) { + dataStoreClass = args[1]; + } + outStore = DataStoreFactory.getDataStore(dataStoreClass, + String.class, WebPage.class, conf); + } else { + inStore = DataStoreFactory.getDataStore(String.class, WebPage.class, conf); + outStore = DataStoreFactory.getDataStore(String.class, WebPage.class, conf); + } + + return mapReduceSerialization(inStore, outStore); + } + + // Usage WordCount [<input datastore class> [output datastore class]] + public static void main(String[] args) throws Exception { + int ret = ToolRunner.run(new MapReduceSerialization(), args); + System.exit(ret); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/gora/blob/70637707/gora-hbase/src/test/java/org/apache/gora/hbase/mapreduce/TestHBaseStoreMapReduceSerialization.java ---------------------------------------------------------------------- diff --git a/gora-hbase/src/test/java/org/apache/gora/hbase/mapreduce/TestHBaseStoreMapReduceSerialization.java b/gora-hbase/src/test/java/org/apache/gora/hbase/mapreduce/TestHBaseStoreMapReduceSerialization.java new file mode 100644 index 0000000..f73c77b --- /dev/null +++ b/gora-hbase/src/test/java/org/apache/gora/hbase/mapreduce/TestHBaseStoreMapReduceSerialization.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gora.hbase.mapreduce; + +import org.apache.gora.examples.generated.WebPage; +import org.apache.gora.hbase.store.HBaseStore; +import org.apache.gora.hbase.util.HBaseClusterSingleton; +import org.apache.gora.mapreduce.MapReduceTestUtils; +import org.apache.gora.store.DataStoreFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests related to {@link HBaseStore} using mapreduce. + */ +public class TestHBaseStoreMapReduceSerialization { + private static final HBaseClusterSingleton cluster = HBaseClusterSingleton.build(1); + + private HBaseStore<String, WebPage> webPageStore; + + @SuppressWarnings("unchecked") + @Before + public void setUp() throws Exception { + cluster.deleteAllTables(); + webPageStore = DataStoreFactory.getDataStore( + HBaseStore.class, String.class, WebPage.class, cluster.getConf()); + } + + @After + public void tearDown() throws Exception { + webPageStore.close(); + } + + @Test + public void testMapReduceSerialization() throws Exception { + MapReduceTestUtils.testMapReduceSerialization(cluster.getConf(), webPageStore, webPageStore); + } + +} \ No newline at end of file
