[SYSTEMML-383] Integrated sparse block memory estimates / valid nnz Incl new sparse block test for relative memory estimates between MCSR, CSR, COO, and dense formats.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9c422821 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9c422821 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9c422821 Branch: refs/heads/master Commit: 9c4228215a2cb9b2df351a4660f36d9b63d252c8 Parents: a19a14c Author: Matthias Boehm <[email protected]> Authored: Sat Jan 23 01:02:32 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sat Jan 23 16:08:22 2016 -0800 ---------------------------------------------------------------------- .../org/apache/sysml/hops/OptimizerUtils.java | 9 +- .../sysml/runtime/matrix/data/MatrixBlock.java | 15 +-- .../runtime/matrix/data/SparseBlockCOO.java | 25 +++++ .../runtime/matrix/data/SparseBlockCSR.java | 25 +++++ .../runtime/matrix/data/SparseBlockFactory.java | 18 ++++ .../runtime/matrix/data/SparseBlockMCSR.java | 26 +++++ .../sparse/SparseBlockMemEstimate.java | 99 ++++++++++++++++++++ .../functions/sparse/ZPackageSuite.java | 1 + 8 files changed, 203 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/hops/OptimizerUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java index 19acbd1..57d38d7 100644 --- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java +++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java @@ -23,7 +23,6 @@ import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - import org.apache.sysml.api.DMLScript; import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; import org.apache.sysml.conf.ConfigurationManager; @@ -42,6 +41,7 @@ import org.apache.sysml.runtime.instructions.cp.ScalarObject; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.OutputInfo; +import org.apache.sysml.runtime.matrix.data.SparseBlock; import org.apache.sysml.runtime.matrix.data.SparseRow; import org.apache.sysml.runtime.util.UtilFunctions; import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer; @@ -73,7 +73,10 @@ public class OptimizerUtils public static final double BIT_SIZE = (double)1/8; public static final double INVALID_SIZE = -1d; // memory estimate not computed + //constants for valid CP matrix dimension sizes / nnz (dense/sparse) public static final long MAX_NUMCELLS_CP_DENSE = Integer.MAX_VALUE; + public static final long MAX_NNZ_CP_SPARSE = (MatrixBlock.DEFAULT_SPARSEBLOCK == + SparseBlock.Type.MCSR) ? Long.MAX_VALUE : Integer.MAX_VALUE; /** * Enables/disables dynamic re-compilation of lops/instructions. @@ -862,8 +865,8 @@ public class OptimizerUtils if( sparse ) //SPARSE { - //check max nnz - ret = (nnz <= Long.MAX_VALUE); + //check max nnz (dependent on sparse block format) + ret = (nnz <= MAX_NNZ_CP_SPARSE); } else //DENSE { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 22c6347..9a7bfcb 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -2556,18 +2556,9 @@ public class MatrixBlock extends MatrixValue implements Externalizable // basic variables and references sizes double size = 44; - //NOTES: - // * Each sparse row has a fixed overhead of 8B (reference) + 32B (object) + - // 12B (3 int members), 32B (overhead int array), 32B (overhead double array), - // * Each non-zero value requires 12B for the column-index/value pair. - // * Overheads for arrays, objects, and references refer to 64bit JVMs - // * If nnz < than rows we have only also empty rows. - - // account for sparsity and initial capacity - double cnnz = Math.max(SparseRow.initialCapacity, Math.ceil(sparsity*ncols)); - double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols)); - size += rlen * ( 116 + 12 * cnnz ); //sparse row - size += nrows * 8d; //empty rows + // delegate memory estimate to individual sparse blocks + size += SparseBlockFactory.estimateSizeSparseInMemory( + DEFAULT_SPARSEBLOCK, nrows, ncols, sparsity); // robustness for long overflows return (long) Math.min(size, Long.MAX_VALUE); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java index eea0754..5643850 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java @@ -125,6 +125,31 @@ public class SparseBlockCOO extends SparseBlock } } } + + /** + * Get the estimated in-memory size of the sparse block in COO + * with the given dimensions w/o accounting for overallocation. + * + * @param nrows + * @param ncols + * @param sparsity + * @return + */ + public static long estimateMemory(long nrows, long ncols, double sparsity) { + double lnnz = Math.max(INIT_CAPACITY, Math.ceil(sparsity*nrows*ncols)); + + //32B overhead per array, int/int/double arr in nnz + double size = 16 + 8; //object + 2 int fields + size += 32 + lnnz * 4d; //rindexes array (row indexes) + size += 32 + lnnz * 4d; //cindexes array (column indexes) + size += 32 + lnnz * 8d; //values array (non-zero values) + + //robustness for long overflows + return (long) Math.min(size, Long.MAX_VALUE); + } + + /////////////////// + //SparseBlock implementation @Override public void allocate(int r) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java index 876b0ed..c599753 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java @@ -128,6 +128,31 @@ public class SparseBlockCSR extends SparseBlock } } + /** + * Get the estimated in-memory size of the sparse block in CSR + * with the given dimensions w/o accounting for overallocation. + * + * @param nrows + * @param ncols + * @param sparsity + * @return + */ + public static long estimateMemory(long nrows, long ncols, double sparsity) { + double lnnz = Math.max(INIT_CAPACITY, Math.ceil(sparsity*nrows*ncols)); + + //32B overhead per array, int arr in nrows, int/double arr in nnz + double size = 16 + 4; //object + int field + size += 32 + (nrows+1) * 4d; //ptr array (row pointers) + size += 32 + lnnz * 4d; //indexes array (column indexes) + size += 32 + lnnz * 8d; //values array (non-zero values) + + //robustness for long overflows + return (long) Math.min(size, Long.MAX_VALUE); + } + + /////////////////// + //SparseBlock implementation + @Override public void allocate(int r) { //do nothing everything preallocated http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java index 1ac8f16..67b3eae 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java @@ -87,4 +87,22 @@ public abstract class SparseBlockFactory throw new RuntimeException("Unexpected sparse block type: "+type.toString()); } } + + /** + * + * @param type + * @param nrows + * @param ncols + * @param sparsity + * @return + */ + public static long estimateSizeSparseInMemory(SparseBlock.Type type, long nrows, long ncols, double sparsity) { + switch( type ) { + case MCSR: return SparseBlockMCSR.estimateMemory(nrows, ncols, sparsity); + case CSR: return SparseBlockCSR.estimateMemory(nrows, ncols, sparsity); + case COO: return SparseBlockCOO.estimateMemory(nrows, ncols, sparsity); + default: + throw new RuntimeException("Unexpected sparse block type: "+type.toString()); + } + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java index 378a3f4..6e1cded 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java @@ -80,6 +80,32 @@ public class SparseBlockMCSR extends SparseBlock public SparseBlockMCSR(int rlen, int clen) { _rows = new SparseRow[rlen]; } + + /** + * Get the estimated in-memory size of the sparse block in MCSR + * with the given dimensions w/o accounting for overallocation. + * + * @param nrows + * @param ncols + * @param sparsity + * @return + */ + public static long estimateMemory(long nrows, long ncols, double sparsity) { + double cnnz = Math.max(SparseRow.initialCapacity, Math.ceil(sparsity*ncols)); + double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols)); + + //Each sparse row has a fixed overhead of 8B (reference) + 32B (object) + + //12B (3 int members), 32B (overhead int array), 32B (overhead double array), + //Each non-zero value requires 12B for the column-index/value pair. + //Overheads for arrays, objects, and references refer to 64bit JVMs + //If nnz < than rows we have only also empty rows. + double size = 16; //object + size += rlen * (116 + cnnz * 12); //sparse rows + size += 32 + nrows * 8d; //references + + // robustness for long overflows + return (long) Math.min(size, Long.MAX_VALUE); + } /////////////////// //SparseBlock implementation http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java new file mode 100644 index 0000000..8d2fbd0 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.sparse; + +import org.junit.Assert; +import org.junit.Test; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlockFactory; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.utils.TestUtils; + +/** + * This is a sparse matrix block component test for sparse block memory + * estimation functionality. + * + */ +public class SparseBlockMemEstimate extends AutomatedTestBase +{ + private final static int rows = 662; + private final static int cols = 444; + private final static double sparsity1 = 0.39; + private final static double sparsity2 = 0.0001; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + } + + @Test + public void testSparseBlockSparse() { + runSparseBlockMemoryTest(sparsity1); + } + + @Test + public void testSparseBlockUltraSparse() { + runSparseBlockMemoryTest(sparsity2); + } + + /** + * + * @param btype + * @param sparsity + */ + private void runSparseBlockMemoryTest( double sparsity) + { + double memMCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.MCSR, rows, cols, sparsity); + double memCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.CSR, rows, cols, sparsity); + double memCOO = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.COO, rows, cols, sparsity); + double memDense = MatrixBlock.estimateSizeDenseInMemory(rows, cols); + + //check negative estimate + if( memMCSR <= 0 ) + Assert.fail("SparseBlockMCSR memory estimate <= 0."); + if( memCSR <= 0 ) + Assert.fail("SparseBlockCSR memory estimate <= 0."); + if( memCOO <= 0 ) + Assert.fail("SparseBlockCOO memory estimate <= 0."); + + //check dense estimate + if( memMCSR > memDense ) + Assert.fail("SparseBlockMCSR memory estimate larger than dense estimate."); + if( memCSR > memDense ) + Assert.fail("SparseBlockCSR memory estimate larger than dense estimate."); + if( memCOO > memDense ) + Assert.fail("SparseBlockCOO memory estimate larger than dense estimate."); + + //check sparse estimates relations + if( sparsity == sparsity1 ) { //sparse (pref CSR) + if( memMCSR < memCSR ) + Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCSR estimate."); + if( memCOO < memCSR ) + Assert.fail("SparseBlockCOO memory estimate smaller than SparseBlockCSR estimate."); + } + else { //ultra-sparse (pref COO) + if( memMCSR < memCOO ) + Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCOO estimate."); + if( memCSR < memCOO ) + Assert.fail("SparseBlockCSR memory estimate smaller than SparseBlockCOO estimate."); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java index 633e152..01fc461 100644 --- a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java @@ -32,6 +32,7 @@ import org.junit.runners.Suite; SparseBlockGetSet.class, SparseBlockIndexRange.class, SparseBlockIterator.class, + SparseBlockMemEstimate.class, SparseBlockScan.class, SparseBlockSize.class, })
