[FLINK-3059] Improve JavaDocs for DataSet.writeAsText() Currently the JavaDocs of writeAsText() state it simply generates a file, but this is not always true and it depends on the environment configuration. This commit improves the JavaDocs of writeAsText().
This closes #1392 Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/e03e60da Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/e03e60da Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/e03e60da Branch: refs/heads/master Commit: e03e60da208bc60dab477950cb2c7e329fe5ff57 Parents: c787a03 Author: jaoki <ja...@apache.org> Authored: Sun Nov 22 19:18:57 2015 -0800 Committer: Fabian Hueske <fhue...@apache.org> Committed: Wed Nov 25 23:48:24 2015 +0100 ---------------------------------------------------------------------- .../java/org/apache/flink/api/java/DataSet.java | 66 +++++++++++++++++--- .../org/apache/flink/api/scala/DataSet.scala | 4 +- 2 files changed, 59 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/e03e60da/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java ---------------------------------------------------------------------- diff --git a/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java b/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java index 81a747e..cd63bcb 100644 --- a/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java +++ b/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java @@ -1343,10 +1343,49 @@ public abstract class DataSet<T> { // -------------------------------------------------------------------------------------------- /** - * Writes a DataSet as a text file to the specified location.<br> - * For each element of the DataSet the result of {@link Object#toString()} is written. + * Writes a DataSet as text file(s) to the specified location.<br> + * For each element of the DataSet the result of {@link Object#toString()} is written.<br/> + * <br/> + * <span class="strong">Output files and directories</span><br/> + * What output how writeAsText() method produces is depending on other circumstance + * <ul> + * <li> + * A directory is created and multiple files are written underneath. (Default behavior)<br/> + * This sink creates a directory called "path1", and files "1", "2" ... are writen underneath depending on <a href="https://flink.apache.org/faq.html#what-is-the-parallelism-how-do-i-set-it">parallelism</a> + * <pre>{@code . + * âââ path1/ + * âââ 1 + * âââ 2 + * âââ ...}</pre> + * Code Example + * <pre>{@code dataset.writeAsText("file:///path1");}</pre> + * </li> + * <li> + * A single file called "path1" is created when parallelism is set to 1 + * <pre>{@code . + * âââ path1 }</pre> + * Code Example + * <pre>{@code // Parallelism is set to only this particular operation + *dataset.writeAsText("file:///path1").setParallelism(1); + * + * // This will creates the same effect but note all operators' parallelism are set to one + *env.setParallelism(1); + *... + *dataset.writeAsText("file:///path1"); }</pre> + * </li> + * <li> + * A directory is always created when <a href="https://ci.apache.org/projects/flink/flink-docs-master/setup/config.html#file-systems">fs.output.always-create-directory</a> + * is set to true in flink-conf.yaml file, even when parallelism is set to 1. + * <pre>{@code . + * âââ path1/ + * âââ 1 }</pre> + * Code Example + * <pre>{@code // fs.output.always-create-directory = true + *dataset.writeAsText("file:///path1").setParallelism(1); }</pre> + * </li> + * </ul> * - * @param filePath The path pointing to the location the text file is written to. + * @param filePath The path pointing to the location the text file or files under the directory is written to. * @return The DataSink that writes the DataSet. * * @see TextOutputFormat @@ -1356,7 +1395,7 @@ public abstract class DataSet<T> { } /** - * Writes a DataSet as a text file to the specified location.<br> + * Writes a DataSet as text file(s) to the specified location.<br> * For each element of the DataSet the result of {@link Object#toString()} is written. * * @param filePath The path pointing to the location the text file is written to. @@ -1364,6 +1403,7 @@ public abstract class DataSet<T> { * @return The DataSink that writes the DataSet. * * @see TextOutputFormat + * @see DataSet#writeAsText(String) Output files and directories */ public DataSink<T> writeAsText(String filePath, WriteMode writeMode) { TextOutputFormat<T> tof = new TextOutputFormat<T>(new Path(filePath)); @@ -1372,7 +1412,7 @@ public abstract class DataSet<T> { } /** - * Writes a DataSet as a text file to the specified location.<br> + * Writes a DataSet as text file(s) to the specified location.<br> * For each element of the DataSet the result of {@link TextFormatter#format(Object)} is written. * * @param filePath The path pointing to the location the text file is written to. @@ -1380,13 +1420,14 @@ public abstract class DataSet<T> { * @return The DataSink that writes the DataSet. * * @see TextOutputFormat + * @see DataSet#writeAsText(String) Output files and directories */ public DataSink<String> writeAsFormattedText(String filePath, TextFormatter<T> formatter) { return map(new FormattingMapper<T>(clean(formatter))).writeAsText(filePath); } /** - * Writes a DataSet as a text file to the specified location.<br> + * Writes a DataSet as text file(s) to the specified location.<br> * For each element of the DataSet the result of {@link TextFormatter#format(Object)} is written. * * @param filePath The path pointing to the location the text file is written to. @@ -1395,13 +1436,14 @@ public abstract class DataSet<T> { * @return The DataSink that writes the DataSet. * * @see TextOutputFormat + * @see DataSet#writeAsText(String) Output files and directories */ public DataSink<String> writeAsFormattedText(String filePath, WriteMode writeMode, TextFormatter<T> formatter) { return map(new FormattingMapper<T>(clean(formatter))).writeAsText(filePath, writeMode); } /** - * Writes a {@link Tuple} DataSet as a CSV file to the specified location.<br> + * Writes a {@link Tuple} DataSet as CSV file(s) to the specified location.<br> * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br> * For each Tuple field the result of {@link Object#toString()} is written. * Tuple fields are separated by the default field delimiter {@code "comma" (,)}.<br> @@ -1412,13 +1454,14 @@ public abstract class DataSet<T> { * * @see Tuple * @see CsvOutputFormat + * @see DataSet#writeAsText(String) Output files and directories */ public DataSink<T> writeAsCsv(String filePath) { return writeAsCsv(filePath, CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER); } /** - * Writes a {@link Tuple} DataSet as a CSV file to the specified location.<br> + * Writes a {@link Tuple} DataSet as CSV file(s) to the specified location.<br> * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br> * For each Tuple field the result of {@link Object#toString()} is written. * Tuple fields are separated by the default field delimiter {@code "comma" (,)}.<br> @@ -1430,13 +1473,14 @@ public abstract class DataSet<T> { * * @see Tuple * @see CsvOutputFormat + * @see DataSet#writeAsText(String) Output files and directories */ public DataSink<T> writeAsCsv(String filePath, WriteMode writeMode) { return internalWriteAsCsv(new Path(filePath),CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER, writeMode); } /** - * Writes a {@link Tuple} DataSet as a CSV file to the specified location with the specified field and line delimiters.<br> + * Writes a {@link Tuple} DataSet as CSV file(s) to the specified location with the specified field and line delimiters.<br> * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br> * For each Tuple field the result of {@link Object#toString()} is written. * @@ -1446,13 +1490,14 @@ public abstract class DataSet<T> { * * @see Tuple * @see CsvOutputFormat + * @see DataSet#writeAsText(String) Output files and directories */ public DataSink<T> writeAsCsv(String filePath, String rowDelimiter, String fieldDelimiter) { return internalWriteAsCsv(new Path(filePath), rowDelimiter, fieldDelimiter, null); } /** - * Writes a {@link Tuple} DataSet as a CSV file to the specified location with the specified field and line delimiters.<br> + * Writes a {@link Tuple} DataSet as CSV file(s) to the specified location with the specified field and line delimiters.<br> * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br> § * For each Tuple field the result of {@link Object#toString()} is written. * @@ -1463,6 +1508,7 @@ public abstract class DataSet<T> { * * @see Tuple * @see CsvOutputFormat + * @see DataSet#writeAsText(String) Output files and directories */ public DataSink<T> writeAsCsv(String filePath, String rowDelimiter, String fieldDelimiter, WriteMode writeMode) { return internalWriteAsCsv(new Path(filePath), rowDelimiter, fieldDelimiter, writeMode); http://git-wip-us.apache.org/repos/asf/flink/blob/e03e60da/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala ---------------------------------------------------------------------- diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala index a1587a4..350a86f 100644 --- a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala +++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala @@ -1461,6 +1461,7 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) { /** * Writes `this` DataSet to the specified location. This uses [[AnyRef.toString]] on * each element. + * @see org.apache.flink.api.java.DataSet#writeAsText(String) */ def writeAsText( filePath: String, @@ -1473,9 +1474,10 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) { } /** - * Writes `this` DataSet to the specified location as a CSV file. + * Writes `this` DataSet to the specified location as CSV file(s). * * This only works on Tuple DataSets. For individual tuple fields [[AnyRef.toString]] is used. + * @see org.apache.flink.api.java.DataSet#writeAsText(String) */ def writeAsCsv( filePath: String,