[FLINK-8475][config][docs] Integrate optimizer options

This closes #5387.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/b5d7d7c6
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/b5d7d7c6
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/b5d7d7c6

Branch: refs/heads/master
Commit: b5d7d7c6bf2b5c4ae157358aecb335a7c06dd41b
Parents: 7910d05
Author: zentol <ches...@apache.org>
Authored: Tue Jan 23 13:20:08 2018 +0100
Committer: zentol <ches...@apache.org>
Committed: Mon Feb 12 17:55:19 2018 +0100

----------------------------------------------------------------------
 .../generated/optimizer_configuration.html      | 26 ++++++++
 docs/ops/config.md                              |  6 +-
 .../api/common/io/DelimitedInputFormat.java     | 21 +++----
 .../flink/configuration/ConfigConstants.java    | 18 ++++++
 .../flink/configuration/OptimizerOptions.java   | 65 ++++++++++++++++++++
 .../io/DelimitedInputFormatSamplingTest.java    |  8 +--
 6 files changed, 123 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/b5d7d7c6/docs/_includes/generated/optimizer_configuration.html
----------------------------------------------------------------------
diff --git a/docs/_includes/generated/optimizer_configuration.html 
b/docs/_includes/generated/optimizer_configuration.html
new file mode 100644
index 0000000..8253db8
--- /dev/null
+++ b/docs/_includes/generated/optimizer_configuration.html
@@ -0,0 +1,26 @@
+<table class="table table-bordered">
+    <thead>
+        <tr>
+            <th class="text-left" style="width: 20%">Key</th>
+            <th class="text-left" style="width: 15%">Default</th>
+            <th class="text-left" style="width: 65%">Description</th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <td><h5>compiler.delimited-informat.max-line-samples</h5></td>
+            <td>10</td>
+            <td>he maximum number of line samples taken by the compiler for 
delimited inputs. The samples are used to estimate the number of records. This 
value can be overridden for a specific input with the input format’s 
parameters.</td>
+        </tr>
+        <tr>
+            <td><h5>compiler.delimited-informat.max-sample-len</h5></td>
+            <td>2097152</td>
+            <td>The maximal length of a line sample that the compiler takes 
for delimited inputs. If the length of a single sample exceeds this value 
(possible because of misconfiguration of the parser), the sampling aborts. This 
value can be overridden for a specific input with the input format’s 
parameters.</td>
+        </tr>
+        <tr>
+            <td><h5>compiler.delimited-informat.min-line-samples</h5></td>
+            <td>2</td>
+            <td>The minimum number of line samples taken by the compiler for 
delimited inputs. The samples are used to estimate the number of records. This 
value can be overridden for a specific input with the input format’s 
parameters</td>
+        </tr>
+    </tbody>
+</table>

http://git-wip-us.apache.org/repos/asf/flink/blob/b5d7d7c6/docs/ops/config.md
----------------------------------------------------------------------
diff --git a/docs/ops/config.md b/docs/ops/config.md
index da35048..cf77546 100644
--- a/docs/ops/config.md
+++ b/docs/ops/config.md
@@ -373,11 +373,7 @@ definition. This scheme is used **ONLY** if no other 
scheme is specified (explic
 
 ### Compiler/Optimizer
 
-- `compiler.delimited-informat.max-line-samples`: The maximum number of line 
samples taken by the compiler for delimited inputs. The samples are used to 
estimate the number of records. This value can be overridden for a specific 
input with the input format's parameters (DEFAULT: 10).
-
-- `compiler.delimited-informat.min-line-samples`: The minimum number of line 
samples taken by the compiler for delimited inputs. The samples are used to 
estimate the number of records. This value can be overridden for a specific 
input with the input format's parameters (DEFAULT: 2).
-
-- `compiler.delimited-informat.max-sample-len`: The maximal length of a line 
sample that the compiler takes for delimited inputs. If the length of a single 
sample exceeds this value (possible because of misconfiguration of the parser), 
the sampling aborts. This value can be overridden for a specific input with the 
input format's parameters (DEFAULT: 2097152 (= 2 MiBytes)).
+{% include generated/optimizer_configuration.html %}
 
 ### Runtime Algorithms
 

http://git-wip-us.apache.org/repos/asf/flink/blob/b5d7d7c6/flink-core/src/main/java/org/apache/flink/api/common/io/DelimitedInputFormat.java
----------------------------------------------------------------------
diff --git 
a/flink-core/src/main/java/org/apache/flink/api/common/io/DelimitedInputFormat.java
 
b/flink-core/src/main/java/org/apache/flink/api/common/io/DelimitedInputFormat.java
index d6397e6..885f537 100644
--- 
a/flink-core/src/main/java/org/apache/flink/api/common/io/DelimitedInputFormat.java
+++ 
b/flink-core/src/main/java/org/apache/flink/api/common/io/DelimitedInputFormat.java
@@ -21,9 +21,9 @@ package org.apache.flink.api.common.io;
 import org.apache.flink.annotation.Public;
 import org.apache.flink.annotation.PublicEvolving;
 import org.apache.flink.api.common.io.statistics.BaseStatistics;
-import org.apache.flink.configuration.ConfigConstants;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.configuration.GlobalConfiguration;
+import org.apache.flink.configuration.OptimizerOptions;
 import org.apache.flink.core.fs.FileInputSplit;
 import org.apache.flink.core.fs.FileStatus;
 import org.apache.flink.core.fs.FileSystem;
@@ -97,20 +97,18 @@ public abstract class DelimitedInputFormat<OT> extends 
FileInputFormat<OT> imple
        }
 
        protected static void loadConfigParameters(Configuration parameters) {
-               int maxSamples = 
parameters.getInteger(ConfigConstants.DELIMITED_FORMAT_MAX_LINE_SAMPLES_KEY,
-                               
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_LINE_SAMPLES);
-               int minSamples = 
parameters.getInteger(ConfigConstants.DELIMITED_FORMAT_MIN_LINE_SAMPLES_KEY,
-                       
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MIN_LINE_SAMPLES);
+               int maxSamples = 
parameters.getInteger(OptimizerOptions.DELIMITED_FORMAT_MAX_LINE_SAMPLES);
+               int minSamples = 
parameters.getInteger(OptimizerOptions.DELIMITED_FORMAT_MIN_LINE_SAMPLES);
                
                if (maxSamples < 0) {
                        LOG.error("Invalid default maximum number of line 
samples: " + maxSamples + ". Using default value of " +
-                               
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_LINE_SAMPLES);
-                       maxSamples = 
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_LINE_SAMPLES;
+                               
OptimizerOptions.DELIMITED_FORMAT_MAX_LINE_SAMPLES.key());
+                       maxSamples = 
OptimizerOptions.DELIMITED_FORMAT_MAX_LINE_SAMPLES.defaultValue();
                }
                if (minSamples < 0) {
                        LOG.error("Invalid default minimum number of line 
samples: " + minSamples + ". Using default value of " +
-                               
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MIN_LINE_SAMPLES);
-                       minSamples = 
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MIN_LINE_SAMPLES;
+                               
OptimizerOptions.DELIMITED_FORMAT_MIN_LINE_SAMPLES.key());
+                       minSamples = 
OptimizerOptions.DELIMITED_FORMAT_MIN_LINE_SAMPLES.defaultValue();
                }
                
                DEFAULT_MAX_NUM_SAMPLES = maxSamples;
@@ -123,10 +121,9 @@ public abstract class DelimitedInputFormat<OT> extends 
FileInputFormat<OT> imple
                        DEFAULT_MIN_NUM_SAMPLES = minSamples;
                }
                
-               int maxLen = 
parameters.getInteger(ConfigConstants.DELIMITED_FORMAT_MAX_SAMPLE_LENGTH_KEY,
-                               
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_SAMPLE_LEN);
+               int maxLen = 
parameters.getInteger(OptimizerOptions.DELIMITED_FORMAT_MAX_SAMPLE_LEN);
                if (maxLen <= 0) {
-                       maxLen = 
ConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_SAMPLE_LEN;
+                       maxLen = 
OptimizerOptions.DELIMITED_FORMAT_MAX_SAMPLE_LEN.defaultValue();
                        LOG.error("Invalid value for the maximum sample record 
length. Using default value of " + maxLen + '.');
                } else if (maxLen < DEFAULT_READ_BUFFER_SIZE) {
                        maxLen = DEFAULT_READ_BUFFER_SIZE;

http://git-wip-us.apache.org/repos/asf/flink/blob/b5d7d7c6/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
----------------------------------------------------------------------
diff --git 
a/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java 
b/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
index 9d70f01..105ee22 100644
--- 
a/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
+++ 
b/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
@@ -656,18 +656,27 @@ public final class ConfigConstants {
        /**
         * The maximum number of line samples to be taken by the delimited 
input format, if no
         * other value is specified for the data source.
+        *
+        * @deprecated use {@link 
OptimizerOptions#DELIMITED_FORMAT_MAX_LINE_SAMPLES} instead
         */
+       @Deprecated
        public static final String DELIMITED_FORMAT_MAX_LINE_SAMPLES_KEY = 
"compiler.delimited-informat.max-line-samples";
 
        /**
         * The minimum number of line samples to be taken by the delimited 
input format, if no
         * other value is specified for the data source.
+        *
+        * @deprecated use {@link 
OptimizerOptions#DELIMITED_FORMAT_MIN_LINE_SAMPLES} instead
         */
+       @Deprecated
        public static final String DELIMITED_FORMAT_MIN_LINE_SAMPLES_KEY = 
"compiler.delimited-informat.min-line-samples";
 
        /**
         * The maximum length of a single sampled record before the sampling is 
aborted.
+        *
+        * @deprecated use {@link 
OptimizerOptions#DELIMITED_FORMAT_MAX_SAMPLE_LEN} instead
         */
+       @Deprecated
        public static final String DELIMITED_FORMAT_MAX_SAMPLE_LENGTH_KEY = 
"compiler.delimited-informat.max-sample-len";
 
 
@@ -1554,17 +1563,26 @@ public final class ConfigConstants {
 
        /**
         * The default maximum number of line samples taken by the delimited 
input format.
+        *
+        * @deprecated use {@link 
OptimizerOptions#DELIMITED_FORMAT_MAX_LINE_SAMPLES} instead
         */
+       @Deprecated
        public static final int DEFAULT_DELIMITED_FORMAT_MAX_LINE_SAMPLES = 10;
 
        /**
         * The default minimum number of line samples taken by the delimited 
input format.
+        *
+        * @deprecated use {@link 
OptimizerOptions#DELIMITED_FORMAT_MIN_LINE_SAMPLES} instead
         */
+       @Deprecated
        public static final int DEFAULT_DELIMITED_FORMAT_MIN_LINE_SAMPLES = 2;
 
        /**
         * The default maximum sample length before sampling is aborted (2 
MiBytes).
+        *
+        * @deprecated use {@link 
OptimizerOptions#DELIMITED_FORMAT_MAX_SAMPLE_LEN} instead
         */
+       @Deprecated
        public static final int DEFAULT_DELIMITED_FORMAT_MAX_SAMPLE_LEN = 2 * 
1024 * 1024;
 
 

http://git-wip-us.apache.org/repos/asf/flink/blob/b5d7d7c6/flink-core/src/main/java/org/apache/flink/configuration/OptimizerOptions.java
----------------------------------------------------------------------
diff --git 
a/flink-core/src/main/java/org/apache/flink/configuration/OptimizerOptions.java 
b/flink-core/src/main/java/org/apache/flink/configuration/OptimizerOptions.java
new file mode 100644
index 0000000..9e7ff68
--- /dev/null
+++ 
b/flink-core/src/main/java/org/apache/flink/configuration/OptimizerOptions.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.configuration;
+
+import org.apache.flink.annotation.PublicEvolving;
+
+import static org.apache.flink.configuration.ConfigOptions.key;
+
+/**
+ * Configuration options for the optimizer.
+ */
+@PublicEvolving
+public class OptimizerOptions {
+
+       /**
+        * The maximum number of line samples taken by the compiler for 
delimited inputs. The samples are used to estimate
+        * the number of records. This value can be overridden for a specific 
input with the input format’s parameters.
+        */
+       public static final ConfigOption<Integer> 
DELIMITED_FORMAT_MAX_LINE_SAMPLES =
+               key("compiler.delimited-informat.max-line-samples")
+                       .defaultValue(10)
+                       .withDescription("he maximum number of line samples 
taken by the compiler for delimited inputs. The samples" +
+                               " are used to estimate the number of records. 
This value can be overridden for a specific input with the" +
+                               " input format’s parameters.");
+
+       /**
+        * The minimum number of line samples taken by the compiler for 
delimited inputs. The samples are used to estimate
+        * the number of records. This value can be overridden for a specific 
input with the input format’s parameters.
+        */
+       public static final ConfigOption<Integer> 
DELIMITED_FORMAT_MIN_LINE_SAMPLES =
+               key("compiler.delimited-informat.min-line-samples")
+                       .defaultValue(2)
+                       .withDescription("The minimum number of line samples 
taken by the compiler for delimited inputs. The samples" +
+                               " are used to estimate the number of records. 
This value can be overridden for a specific input with the" +
+                               " input format’s parameters");
+
+       /**
+        * The maximal length of a line sample that the compiler takes for 
delimited inputs. If the length of a single
+        * sample exceeds this value (possible because of misconfiguration of 
the parser), the sampling aborts. This value
+        * can be overridden for a specific input with the input format’s 
parameters.
+        */
+       public static final ConfigOption<Integer> 
DELIMITED_FORMAT_MAX_SAMPLE_LEN =
+               key("compiler.delimited-informat.max-sample-len")
+                       .defaultValue(2097152)
+                       .withDescription("The maximal length of a line sample 
that the compiler takes for delimited inputs. If the" +
+                               " length of a single sample exceeds this value 
(possible because of misconfiguration of the parser)," +
+                               " the sampling aborts. This value can be 
overridden for a specific input with the input format’s" +
+                               " parameters.");
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/b5d7d7c6/flink-core/src/test/java/org/apache/flink/api/common/io/DelimitedInputFormatSamplingTest.java
----------------------------------------------------------------------
diff --git 
a/flink-core/src/test/java/org/apache/flink/api/common/io/DelimitedInputFormatSamplingTest.java
 
b/flink-core/src/test/java/org/apache/flink/api/common/io/DelimitedInputFormatSamplingTest.java
index 01f8680..fe93332 100644
--- 
a/flink-core/src/test/java/org/apache/flink/api/common/io/DelimitedInputFormatSamplingTest.java
+++ 
b/flink-core/src/test/java/org/apache/flink/api/common/io/DelimitedInputFormatSamplingTest.java
@@ -20,8 +20,8 @@
 package org.apache.flink.api.common.io;
 
 import org.apache.flink.api.common.io.statistics.BaseStatistics;
-import org.apache.flink.configuration.ConfigConstants;
 import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.OptimizerOptions;
 import org.apache.flink.testutils.TestConfigUtils;
 import org.apache.flink.testutils.TestFileSystem;
 import org.apache.flink.testutils.TestFileUtils;
@@ -78,8 +78,8 @@ public class DelimitedInputFormatSamplingTest {
                try {
                        // make sure we do 4 samples
                        CONFIG = TestConfigUtils.loadGlobalConf(
-                               new String[] { 
ConfigConstants.DELIMITED_FORMAT_MIN_LINE_SAMPLES_KEY,
-                                                               
ConfigConstants.DELIMITED_FORMAT_MAX_LINE_SAMPLES_KEY },
+                               new String[] { 
OptimizerOptions.DELIMITED_FORMAT_MIN_LINE_SAMPLES.key(),
+                                                               
OptimizerOptions.DELIMITED_FORMAT_MAX_LINE_SAMPLES.key() },
                                new String[] { "4", "4" });
 
 
@@ -230,7 +230,7 @@ public class DelimitedInputFormatSamplingTest {
        @Test
        public void testSamplingOverlyLongRecord() {
                try {
-                       final String tempFile = TestFileUtils.createTempFile(2 
* ConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_SAMPLE_LEN);
+                       final String tempFile = TestFileUtils.createTempFile(2 
* OptimizerOptions.DELIMITED_FORMAT_MAX_SAMPLE_LEN.defaultValue());
                        final Configuration conf = new Configuration();
                        
                        final TestDelimitedInputFormat format = new 
TestDelimitedInputFormat(CONFIG);

Reply via email to