[22/39] kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f6dda4fe Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f6dda4fe Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f6dda4fe Branch: refs/heads/spark-it Commit: f6dda4fecd5a882a17379987445a5aa0a347ff63 Parents: 4c33e38 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: shaofengshi Committed: Mon Jan 23 16:23:56 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN = "column"; +String CFG_OUTPUT_DICT = "dict"; +String CFG_OUTPUT_STATISTICS
[09/39] kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/7de8aa12 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/7de8aa12 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/7de8aa12 Branch: refs/heads/spark-it Commit: 7de8aa1203a72bad105ed692f7100535939b03af Parents: c2229c9 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: kangkaisen Committed: Sat Jan 21 23:19:50 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN = "column"; +String CFG_OUTPUT_DICT = "dict"; +String CFG_OUTPUT_STATISTICS =
[16/47] kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/7de8aa12 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/7de8aa12 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/7de8aa12 Branch: refs/heads/KYLIN-2361 Commit: 7de8aa1203a72bad105ed692f7100535939b03af Parents: c2229c9 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: kangkaisen Committed: Sat Jan 21 23:19:50 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN = "column"; +String CFG_OUTPUT_DICT = "dict"; +String CFG_OUTPUT_STATISTICS =
[31/47] kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f6dda4fe Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f6dda4fe Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f6dda4fe Branch: refs/heads/KYLIN-2361 Commit: f6dda4fecd5a882a17379987445a5aa0a347ff63 Parents: 4c33e38 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: shaofengshi Committed: Mon Jan 23 16:23:56 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN = "column"; +String CFG_OUTPUT_DICT = "dict"; +String
[06/10] kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f6dda4fe Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f6dda4fe Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f6dda4fe Branch: refs/heads/master-hbase0.98 Commit: f6dda4fecd5a882a17379987445a5aa0a347ff63 Parents: 4c33e38 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: shaofengshi Committed: Mon Jan 23 16:23:56 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN = "column"; +String CFG_OUTPUT_DICT = "dict"; +String
[2/6] kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f6dda4fe Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f6dda4fe Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f6dda4fe Branch: refs/heads/master Commit: f6dda4fecd5a882a17379987445a5aa0a347ff63 Parents: 4c33e38 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: shaofengshi Committed: Mon Jan 23 16:23:56 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/f6dda4fe/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN = "column"; +String CFG_OUTPUT_DICT = "dict"; +String CFG_OUTPUT_STATISTICS =
[05/14] kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/7de8aa12 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/7de8aa12 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/7de8aa12 Branch: refs/heads/master-hbase1.x Commit: 7de8aa1203a72bad105ed692f7100535939b03af Parents: c2229c9 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: kangkaisen Committed: Sat Jan 21 23:19:50 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN = "column"; +String CFG_OUTPUT_DICT = "dict"; +String
kylin git commit: KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs
Repository: kylin Updated Branches: refs/heads/master c2229c9c7 -> 7de8aa120 KYLIN-2242 write multiple files in FactDistinctColumnsReducer with MultipleOutputs Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/7de8aa12 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/7de8aa12 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/7de8aa12 Branch: refs/heads/master Commit: 7de8aa1203a72bad105ed692f7100535939b03af Parents: c2229c9 Author: kangkaisenAuthored: Sat Dec 17 14:12:48 2016 +0800 Committer: kangkaisen Committed: Sat Jan 21 23:19:50 2017 +0800 -- .../apache/kylin/common/util/HadoopUtil.java| 16 ++ .../kylin/engine/mr/JobBuilderSupport.java | 2 +- .../kylin/engine/mr/common/BatchConstants.java | 9 +- .../engine/mr/steps/CreateDictionaryJob.java| 43 ++-- .../engine/mr/steps/FactDistinctColumnsJob.java | 32 ++- .../mr/steps/FactDistinctColumnsReducer.java| 240 +++ .../engine/mr/steps/SaveStatisticsStep.java | 10 +- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 10 +- 8 files changed, 175 insertions(+), 187 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java -- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index bdc4c3e..b9ffe38 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -26,8 +26,10 @@ import java.net.URISyntaxException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; @@ -140,4 +142,18 @@ public class HadoopUtil { } } +public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { +FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { +@Override +public boolean accept(Path path) { +return path.getName().startsWith(filter); +} +}); + +if (fileStatus.length == 1) { +return fileStatus[0].getPath(); +} else { +return null; +} +} } http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java index 696b22a..c34a904 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java @@ -171,7 +171,7 @@ public class JobBuilderSupport { } public String getStatisticsPath(String jobId) { -return getRealizationRootPath(jobId) + "/statistics"; +return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS; } // http://git-wip-us.apache.org/repos/asf/kylin/blob/7de8aa12/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java -- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 0281539..602b4bb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -53,9 +53,16 @@ public interface BatchConstants { String CFG_STATISTICS_ENABLED = "statistics.enabled"; String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent"; -String CFG_STATISTICS_CUBE_ESTIMATION_FILENAME = "cube_statistics.txt"; String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq"; +String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress"; + +String CFG_OUTPUT_COLUMN =