Repository: giraph Updated Branches: refs/heads/trunk 2117d1dbb -> 984f45efe
GIRAPH-1111 - FileOutputFormat#setOutputPath is not always available Test Plan: mvn clean install + run a few jobs Reviewers: majakabiljo, dionysis.logothetis, maja.kabiljo Reviewed By: maja.kabiljo Differential Revision: https://reviews.facebook.net/D63837 Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/984f45ef Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/984f45ef Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/984f45ef Branch: refs/heads/trunk Commit: 984f45efed6f79e41d7e5e684ea8705172c90436 Parents: 2117d1d Author: Sergey Edunov <edu...@fb.com> Authored: Wed Sep 14 10:20:25 2016 -0700 Committer: Sergey Edunov <edu...@fb.com> Committed: Wed Sep 14 10:20:25 2016 -0700 ---------------------------------------------------------------------- .../java/org/apache/giraph/GiraphRunner.java | 4 +- .../giraph/io/formats/FileOutputFormatUtil.java | 44 ++++++++++++++++++++ .../giraph/utils/InternalVertexRunner.java | 4 +- .../test/java/org/apache/giraph/BspCase.java | 4 +- .../giraph/examples/SimpleCheckpoint.java | 4 +- 5 files changed, 52 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/984f45ef/giraph-core/src/main/java/org/apache/giraph/GiraphRunner.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/GiraphRunner.java b/giraph-core/src/main/java/org/apache/giraph/GiraphRunner.java index dc9384c..6980c17 100644 --- a/giraph-core/src/main/java/org/apache/giraph/GiraphRunner.java +++ b/giraph-core/src/main/java/org/apache/giraph/GiraphRunner.java @@ -18,6 +18,7 @@ package org.apache.giraph; import org.apache.commons.cli.CommandLine; +import org.apache.giraph.io.formats.FileOutputFormatUtil; import org.apache.giraph.utils.ConfigurationUtils; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.job.GiraphJob; @@ -27,7 +28,6 @@ end[PURE_YARN]*/ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; @@ -104,7 +104,7 @@ else[PURE_YARN]*/ throws Exception { if (cmd.hasOption("vof") || cmd.hasOption("eof")) { if (cmd.hasOption("op")) { - FileOutputFormat.setOutputPath(job.getInternalJob(), + FileOutputFormatUtil.setOutputPath(job.getInternalJob(), new Path(cmd.getOptionValue("op"))); } } http://git-wip-us.apache.org/repos/asf/giraph/blob/984f45ef/giraph-core/src/main/java/org/apache/giraph/io/formats/FileOutputFormatUtil.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/io/formats/FileOutputFormatUtil.java b/giraph-core/src/main/java/org/apache/giraph/io/formats/FileOutputFormatUtil.java new file mode 100644 index 0000000..6abb1e4 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/io/formats/FileOutputFormatUtil.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.io.formats; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.Job; + +/** + * Utility to access file output format in a + * way independent of current hadoop implementation. + */ +public class FileOutputFormatUtil { + + /** + * Private constructor for utility class. + */ + private FileOutputFormatUtil() { } + + /** + * Set the Path of the output directory for the map-reduce job. + * @param job The job to modify + * @param outputDir the Path of the output directory for the map-reduce job. + */ + public static void setOutputPath(Job job, Path outputDir) { + job.getConfiguration().set("mapred.output.dir", outputDir.toString()); + } + +} http://git-wip-us.apache.org/repos/asf/giraph/blob/984f45ef/giraph-core/src/main/java/org/apache/giraph/utils/InternalVertexRunner.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/InternalVertexRunner.java b/giraph-core/src/main/java/org/apache/giraph/utils/InternalVertexRunner.java index 90a5859..1480d1a 100644 --- a/giraph-core/src/main/java/org/apache/giraph/utils/InternalVertexRunner.java +++ b/giraph-core/src/main/java/org/apache/giraph/utils/InternalVertexRunner.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.io.Files; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.GiraphConstants; +import org.apache.giraph.io.formats.FileOutputFormatUtil; import org.apache.giraph.io.formats.GiraphFileInputFormat; import org.apache.giraph.io.formats.InMemoryVertexOutputFormat; import org.apache.giraph.job.GiraphJob; @@ -32,7 +33,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.log4j.Logger; import java.io.File; @@ -197,7 +197,7 @@ public class InternalVertexRunner { GiraphFileInputFormat.setEdgeInputPath(internalJob.getConfiguration(), new Path(edgeInputFile.toString())); } - FileOutputFormat.setOutputPath(job.getInternalJob(), + FileOutputFormatUtil.setOutputPath(job.getInternalJob(), new Path(outputDir.toString())); // Configure a local zookeeper instance http://git-wip-us.apache.org/repos/asf/giraph/blob/984f45ef/giraph-core/src/test/java/org/apache/giraph/BspCase.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/test/java/org/apache/giraph/BspCase.java b/giraph-core/src/test/java/org/apache/giraph/BspCase.java index b372ab7..c5a9401 100644 --- a/giraph-core/src/test/java/org/apache/giraph/BspCase.java +++ b/giraph-core/src/test/java/org/apache/giraph/BspCase.java @@ -20,6 +20,7 @@ package org.apache.giraph; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.GiraphConstants; +import org.apache.giraph.io.formats.FileOutputFormatUtil; import org.apache.giraph.job.GiraphJob; import org.apache.giraph.utils.FileUtils; import org.apache.giraph.zk.ZooKeeperExt; @@ -29,7 +30,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.junit.After; @@ -339,7 +339,7 @@ public class BspCase implements Watcher { public static void removeAndSetOutput(GiraphJob job, Path outputPath) throws IOException { FileUtils.deletePath(job.getConfiguration(), outputPath); - FileOutputFormat.setOutputPath(job.getInternalJob(), outputPath); + FileOutputFormatUtil.setOutputPath(job.getInternalJob(), outputPath); } public static String getCallingMethodName() { http://git-wip-us.apache.org/repos/asf/giraph/blob/984f45ef/giraph-examples/src/main/java/org/apache/giraph/examples/SimpleCheckpoint.java ---------------------------------------------------------------------- diff --git a/giraph-examples/src/main/java/org/apache/giraph/examples/SimpleCheckpoint.java b/giraph-examples/src/main/java/org/apache/giraph/examples/SimpleCheckpoint.java index 005754e..862d164 100644 --- a/giraph-examples/src/main/java/org/apache/giraph/examples/SimpleCheckpoint.java +++ b/giraph-examples/src/main/java/org/apache/giraph/examples/SimpleCheckpoint.java @@ -27,6 +27,7 @@ import org.apache.giraph.aggregators.LongSumAggregator; import org.apache.giraph.graph.BasicComputation; import org.apache.giraph.edge.Edge; import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.io.formats.FileOutputFormatUtil; import org.apache.giraph.io.formats.GeneratedVertexInputFormat; import org.apache.giraph.io.formats.IdWithValueTextOutputFormat; import org.apache.giraph.job.GiraphJob; @@ -38,7 +39,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; @@ -248,7 +248,7 @@ public class SimpleCheckpoint implements Tool { bspJob.getConfiguration().setWorkerConfiguration( minWorkers, maxWorkers, 100.0f); - FileOutputFormat.setOutputPath(bspJob.getInternalJob(), + FileOutputFormatUtil.setOutputPath(bspJob.getInternalJob(), new Path(cmd.getOptionValue('o'))); boolean verbose = false; if (cmd.hasOption('v')) {