GoraSparkEngine.java setOutput method renamed to generateOutputConf. Added a new method to GoraSparkEngine.java which creates a job and returns necessary conf for Spark to use.
Project: http://git-wip-us.apache.org/repos/asf/gora/repo Commit: http://git-wip-us.apache.org/repos/asf/gora/commit/85849111 Tree: http://git-wip-us.apache.org/repos/asf/gora/tree/85849111 Diff: http://git-wip-us.apache.org/repos/asf/gora/diff/85849111 Branch: refs/heads/master Commit: 8584911176712f63e316707ace23a12060588cdc Parents: 62be0c3 Author: Furkan KAMACI <[email protected]> Authored: Mon Aug 17 23:25:15 2015 +0300 Committer: Furkan KAMACI <[email protected]> Committed: Mon Aug 17 23:25:15 2015 +0300 ---------------------------------------------------------------------- .../org/apache/gora/spark/GoraSparkEngine.java | 28 +++++++++++++++----- .../gora/tutorial/log/LogAnalyticsSpark.java | 5 +--- 2 files changed, 23 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/gora/blob/85849111/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java ---------------------------------------------------------------------- diff --git a/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java b/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java index 9c64542..7a819fd 100644 --- a/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java +++ b/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java @@ -97,19 +97,35 @@ public class GoraSparkEngine<K, V extends Persistent> { } /** - * Sets the output parameters for the job + * Creates a job and sets the output parameters for the conf that Spark will use + * @param dataStore the datastore as the output + * @param reuseObjects whether to reuse objects in serialization + */ + public <K, V extends Persistent> Configuration generateOutputConf(DataStore<K, V> dataStore, + boolean reuseObjects) throws IOException { + + Configuration hadoopConf = new Configuration(); + GoraMapReduceUtils.setIOSerializations(hadoopConf, true); + Job job = Job.getInstance(hadoopConf); + + return generateOutputConf(job, dataStore.getClass(), dataStore.getKeyClass(), + dataStore.getPersistentClass(), reuseObjects); + } + + /** + * Sets the output parameters for the conf that Spark will use * @param job the job to set the properties for * @param dataStore the datastore as the output * @param reuseObjects whether to reuse objects in serialization */ - public <K, V extends Persistent> Configuration setOutput(Job job, + public <K, V extends Persistent> Configuration generateOutputConf(Job job, DataStore<K, V> dataStore, boolean reuseObjects) { - return setOutput(job, dataStore.getClass(), dataStore.getKeyClass(), - dataStore.getPersistentClass(), reuseObjects); + return generateOutputConf(job, dataStore.getClass(), dataStore.getKeyClass(), + dataStore.getPersistentClass(), reuseObjects); } /** - * Sets the output parameters for the job + * Sets the output parameters for the conf that Spark will use * * @param job the job to set the properties for * @param dataStoreClass the datastore class @@ -118,7 +134,7 @@ public class GoraSparkEngine<K, V extends Persistent> { * @param reuseObjects whether to reuse objects in serialization */ @SuppressWarnings("rawtypes") - public <K, V extends Persistent> Configuration setOutput(Job job, + public <K, V extends Persistent> Configuration generateOutputConf(Job job, Class<? extends DataStore> dataStoreClass, Class<K> keyClass, Class<V> persistentClass, boolean reuseObjects) { http://git-wip-us.apache.org/repos/asf/gora/blob/85849111/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java ---------------------------------------------------------------------- diff --git a/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java b/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java index d441de7..c66f7e4 100644 --- a/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java +++ b/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java @@ -190,10 +190,7 @@ public class LogAnalyticsSpark { // //write output to datastore - GoraMapReduceUtils.setIOSerializations(hadoopConf, true); - Job job = Job.getInstance(hadoopConf); - - Configuration sparkHadoopConf = goraSparkEngine.setOutput(job, outStore, true); + Configuration sparkHadoopConf = goraSparkEngine.generateOutputConf(outStore, true); reducedGoraRdd.saveAsNewAPIHadoopDataset(sparkHadoopConf); //
