[6/8] flink git commit: [FLINK-3059] Improve JavaDocs for DataSet.writeAsText()

fhueske Wed, 25 Nov 2015 16:21:07 -0800

[FLINK-3059] Improve JavaDocs for DataSet.writeAsText()

Currently the JavaDocs of writeAsText() state it simply generates a file,
but this is not always true and it depends on the environment configuration.
This commit improves the JavaDocs of writeAsText().


This closes #1392


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/e03e60da
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/e03e60da
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/e03e60da

Branch: refs/heads/master
Commit: e03e60da208bc60dab477950cb2c7e329fe5ff57
Parents: c787a03
Author: jaoki <ja...@apache.org>
Authored: Sun Nov 22 19:18:57 2015 -0800
Committer: Fabian Hueske <fhue...@apache.org>
Committed: Wed Nov 25 23:48:24 2015 +0100

----------------------------------------------------------------------
 .../java/org/apache/flink/api/java/DataSet.java | 66 +++++++++++++++++---
 .../org/apache/flink/api/scala/DataSet.scala    |  4 +-
 2 files changed, 59 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/e03e60da/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java
----------------------------------------------------------------------
diff --git a/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java 
b/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java
index 81a747e..cd63bcb 100644
--- a/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java
+++ b/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java
@@ -1343,10 +1343,49 @@ public abstract class DataSet<T> {
        // 
--------------------------------------------------------------------------------------------
        
        /**
-        * Writes a DataSet as a text file to the specified location.<br>
-        * For each element of the DataSet the result of {@link 
Object#toString()} is written.  
+        * Writes a DataSet as text file(s) to the specified location.<br>
+        * For each element of the DataSet the result of {@link 
Object#toString()} is written.<br/>
+        * <br/>
+        * <span class="strong">Output files and directories</span><br/>
+        * What output how writeAsText() method produces is depending on other 
circumstance
+        * <ul>
+        *   <li>
+        * A directory is created and multiple files are written underneath. 
(Default behavior)<br/>
+        * This sink creates a directory called "path1", and files "1", "2" ... 
are writen underneath depending on <a 
href="https://flink.apache.org/faq.html#what-is-the-parallelism-how-do-i-set-it";>parallelism</a>
+        * <pre>{@code .
+        * âââ path1/
+        *     âââ 1
+        *     âââ 2
+        *     âââ ...}</pre>
+        * Code Example
+        * <pre>{@code dataset.writeAsText("file:///path1");}</pre>
+        *   </li>
+        *   <li>
+        * A single file called "path1" is created when parallelism is set to 1
+        * <pre>{@code .
+        * âââ path1 }</pre>
+        * Code Example
+        * <pre>{@code // Parallelism is set to only this particular operation
+        *dataset.writeAsText("file:///path1").setParallelism(1);
+        *
+        * // This will creates the same effect but note all operators' 
parallelism are set to one 
+        *env.setParallelism(1); 
+        *...
+        *dataset.writeAsText("file:///path1"); }</pre>
+        *   </li>
+        *   <li>
+        * A directory is always created when <a 
href="https://ci.apache.org/projects/flink/flink-docs-master/setup/config.html#file-systems";>fs.output.always-create-directory</a>
+        * is set to true in flink-conf.yaml file, even when parallelism is set 
to 1.
+        * <pre>{@code .
+        * âââ path1/
+        *     âââ 1 }</pre>
+        * Code Example
+        * <pre>{@code // fs.output.always-create-directory = true
+        *dataset.writeAsText("file:///path1").setParallelism(1); }</pre>
+        *   </li>
+        * </ul>
         * 
-        * @param filePath The path pointing to the location the text file is 
written to.
+        * @param filePath The path pointing to the location the text file or 
files under the directory is written to.
         * @return The DataSink that writes the DataSet.
         * 
         * @see TextOutputFormat
@@ -1356,7 +1395,7 @@ public abstract class DataSet<T> {
        }
        
        /**
-        * Writes a DataSet as a text file to the specified location.<br>
+        * Writes a DataSet as text file(s) to the specified location.<br>
         * For each element of the DataSet the result of {@link 
Object#toString()} is written.  
         * 
         * @param filePath The path pointing to the location the text file is 
written to.
@@ -1364,6 +1403,7 @@ public abstract class DataSet<T> {
         * @return The DataSink that writes the DataSet.
         * 
         * @see TextOutputFormat
+        * @see DataSet#writeAsText(String) Output files and directories
         */
        public DataSink<T> writeAsText(String filePath, WriteMode writeMode) {
                TextOutputFormat<T> tof = new TextOutputFormat<T>(new 
Path(filePath));
@@ -1372,7 +1412,7 @@ public abstract class DataSet<T> {
        }
        
        /**
-        * Writes a DataSet as a text file to the specified location.<br>
+        * Writes a DataSet as text file(s) to the specified location.<br>
         * For each element of the DataSet the result of {@link 
TextFormatter#format(Object)} is written.
         *
         * @param filePath The path pointing to the location the text file is 
written to.
@@ -1380,13 +1420,14 @@ public abstract class DataSet<T> {
         * @return The DataSink that writes the DataSet.
         *
         * @see TextOutputFormat
+        * @see DataSet#writeAsText(String) Output files and directories
         */
        public DataSink<String> writeAsFormattedText(String filePath, 
TextFormatter<T> formatter) {
                return map(new 
FormattingMapper<T>(clean(formatter))).writeAsText(filePath);
        }
 
        /**
-        * Writes a DataSet as a text file to the specified location.<br>
+        * Writes a DataSet as text file(s) to the specified location.<br>
         * For each element of the DataSet the result of {@link 
TextFormatter#format(Object)} is written.
         *
         * @param filePath The path pointing to the location the text file is 
written to.
@@ -1395,13 +1436,14 @@ public abstract class DataSet<T> {
         * @return The DataSink that writes the DataSet.
         *
         * @see TextOutputFormat
+        * @see DataSet#writeAsText(String) Output files and directories
         */
        public DataSink<String> writeAsFormattedText(String filePath, WriteMode 
writeMode, TextFormatter<T> formatter) {
                return map(new 
FormattingMapper<T>(clean(formatter))).writeAsText(filePath, writeMode);
        }
        
        /**
-        * Writes a {@link Tuple} DataSet as a CSV file to the specified 
location.<br>
+        * Writes a {@link Tuple} DataSet as CSV file(s) to the specified 
location.<br>
         * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
         * For each Tuple field the result of {@link Object#toString()} is 
written.
         * Tuple fields are separated by the default field delimiter {@code 
"comma" (,)}.<br>
@@ -1412,13 +1454,14 @@ public abstract class DataSet<T> {
         * 
         * @see Tuple
         * @see CsvOutputFormat
+        * @see DataSet#writeAsText(String) Output files and directories 
         */
        public DataSink<T> writeAsCsv(String filePath) {
                return writeAsCsv(filePath, 
CsvOutputFormat.DEFAULT_LINE_DELIMITER, 
CsvOutputFormat.DEFAULT_FIELD_DELIMITER);
        }
        
        /**
-        * Writes a {@link Tuple} DataSet as a CSV file to the specified 
location.<br>
+        * Writes a {@link Tuple} DataSet as CSV file(s) to the specified 
location.<br>
         * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
         * For each Tuple field the result of {@link Object#toString()} is 
written.
         * Tuple fields are separated by the default field delimiter {@code 
"comma" (,)}.<br>
@@ -1430,13 +1473,14 @@ public abstract class DataSet<T> {
         * 
         * @see Tuple
         * @see CsvOutputFormat
+        * @see DataSet#writeAsText(String) Output files and directories
         */
        public DataSink<T> writeAsCsv(String filePath, WriteMode writeMode) {
                return internalWriteAsCsv(new 
Path(filePath),CsvOutputFormat.DEFAULT_LINE_DELIMITER, 
CsvOutputFormat.DEFAULT_FIELD_DELIMITER, writeMode);
        }
        
        /**
-        * Writes a {@link Tuple} DataSet as a CSV file to the specified 
location with the specified field and line delimiters.<br>
+        * Writes a {@link Tuple} DataSet as CSV file(s) to the specified 
location with the specified field and line delimiters.<br>
         * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
         * For each Tuple field the result of {@link Object#toString()} is 
written.
         * 
@@ -1446,13 +1490,14 @@ public abstract class DataSet<T> {
         * 
         * @see Tuple
         * @see CsvOutputFormat
+        * @see DataSet#writeAsText(String) Output files and directories
         */
        public DataSink<T> writeAsCsv(String filePath, String rowDelimiter, 
String fieldDelimiter) {
                return internalWriteAsCsv(new Path(filePath), rowDelimiter, 
fieldDelimiter, null);
        }
 
        /**
-        * Writes a {@link Tuple} DataSet as a CSV file to the specified 
location with the specified field and line delimiters.<br>
+        * Writes a {@link Tuple} DataSet as CSV file(s) to the specified 
location with the specified field and line delimiters.<br>
         * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br>
 Â§      * For each Tuple field the result of {@link Object#toString()} is 
written.
         * 
@@ -1463,6 +1508,7 @@ public abstract class DataSet<T> {
         * 
         * @see Tuple
         * @see CsvOutputFormat
+        * @see DataSet#writeAsText(String) Output files and directories
         */
        public DataSink<T> writeAsCsv(String filePath, String rowDelimiter, 
String fieldDelimiter, WriteMode writeMode) {
                return internalWriteAsCsv(new Path(filePath), rowDelimiter, 
fieldDelimiter, writeMode);

http://git-wip-us.apache.org/repos/asf/flink/blob/e03e60da/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
----------------------------------------------------------------------
diff --git 
a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala 
b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
index a1587a4..350a86f 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
@@ -1461,6 +1461,7 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
   /**
    * Writes `this` DataSet to the specified location. This uses 
[[AnyRef.toString]] on
    * each element.
+   * @see org.apache.flink.api.java.DataSet#writeAsText(String)
    */
   def writeAsText(
       filePath: String,
@@ -1473,9 +1474,10 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
   }
 
   /**
-   * Writes `this` DataSet to the specified location as a CSV file.
+   * Writes `this` DataSet to the specified location as CSV file(s).
    *
    * This only works on Tuple DataSets. For individual tuple fields 
[[AnyRef.toString]] is used.
+   * @see org.apache.flink.api.java.DataSet#writeAsText(String)
    */
   def writeAsCsv(
       filePath: String,

[6/8] flink git commit: [FLINK-3059] Improve JavaDocs for DataSet.writeAsText()

Reply via email to