Repository: systemml Updated Branches: refs/heads/master 9f7fae6e6 -> ee2aa4e13
[SYSTEMML-1899] Fix preads in jmlc/mlcontext w/ non-default formats So far JMLC and MLContext disabled the read of meta data files for persistent reads because these meta data files do not exist when data is passed through the programmatic APIs. However, both APIs also support traditional persistent reads, which currently fail because the meta data files are not read, replacing the format with default textcell. This patch makes persistent reads more robust by reading the meta data file whenever it exists. Furthermore, this patch also includes a minor modification of the codegen optimizer to only apply partial cost when cost-based pruning is enabled. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ee2aa4e1 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ee2aa4e1 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ee2aa4e1 Branch: refs/heads/master Commit: ee2aa4e1335756470d084b0bd715209407ac1f78 Parents: 9f7fae6 Author: Matthias Boehm <[email protected]> Authored: Sun Sep 10 19:28:22 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Sep 10 19:28:22 2017 -0700 ---------------------------------------------------------------------- .../sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java | 7 ++++--- src/main/java/org/apache/sysml/parser/DataExpression.java | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/ee2aa4e1/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java index 60cfd0c..a8f7365 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java +++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java @@ -92,8 +92,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection private static final double SPARSE_SAFE_SPARSITY_EST = 0.1; //optimizer configuration - private static final boolean USE_COST_PRUNING = true; - private static final boolean USE_STRUCTURAL_PRUNING = true; + public static boolean USE_COST_PRUNING = true; + public static boolean USE_STRUCTURAL_PRUNING = true; private static final IDSequence COST_ID = new IDSequence(); private static final TemplateRow ROW_TPL = new TemplateRow(); @@ -235,7 +235,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection } //cost assignment on hops. Stop early if exceeds bestC. - double C = getPlanCost(memo, part, matPoints, plan, costs._computeCosts, bestC); + double pCBound = USE_COST_PRUNING ? bestC : Double.MAX_VALUE; + double C = getPlanCost(memo, part, matPoints, plan, costs._computeCosts, pCBound); if (LOG.isTraceEnabled()) LOG.trace("Enum: " + Arrays.toString(plan) + " -> " + C); numEvalPartPlans += (C==Double.POSITIVE_INFINITY) ? 1 : 0; http://git-wip-us.apache.org/repos/asf/systemml/blob/ee2aa4e1/src/main/java/org/apache/sysml/parser/DataExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java b/src/main/java/org/apache/sysml/parser/DataExpression.java index d487020..d1d896c 100644 --- a/src/main/java/org/apache/sysml/parser/DataExpression.java +++ b/src/main/java/org/apache/sysml/parser/DataExpression.java @@ -599,8 +599,9 @@ public class DataExpression extends DataIdentifier String mtdFileName = getMTDFileName(inputFileName); // track whether should attempt to read MTD file or not - boolean shouldReadMTD = _checkMetadata && !ConfigurationManager - .getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA); + boolean shouldReadMTD = _checkMetadata + && (!ConfigurationManager.getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA) + || MapReduceTool.existsFileOnHDFS(mtdFileName)); // existing mtd file // Check for file existence (before metadata parsing for meaningful error messages) if( shouldReadMTD //skip check for jmlc/mlcontext
