Repository: systemml Updated Branches: refs/heads/master 660ba7630 -> 8d8004121
[SYSTEMML-2051] Fix ultra-sparse matrix block merge COO format This patch fixes the the matrix block merge primitive - as used for matrix reblock - for ultra-sparse blocks in COO format. Furthermore, this also includes a set of tests for all combinations of sparse formats and different sparsity configurations. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/8d800412 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/8d800412 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/8d800412 Branch: refs/heads/master Commit: 8d80041211919b60417d773a53580cc82dcc7f70 Parents: 660ba76 Author: Matthias Boehm <[email protected]> Authored: Sun Dec 24 12:06:22 2017 +0100 Committer: Matthias Boehm <[email protected]> Committed: Sun Dec 24 12:09:50 2017 +0100 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/MatrixBlock.java | 63 ++--- .../runtime/matrix/data/SparseBlockCOO.java | 28 ++- .../functions/sparse/SparseBlockMerge.java | 235 +++++++++++++++++++ .../functions/sparse/ZPackageSuite.java | 1 + 4 files changed, 286 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/8d800412/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 7a334a4..1245475 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -533,6 +533,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab public SparseBlock getSparseBlock() { return sparseBlock; } + + public void setSparseBlock(SparseBlock sblock) { + sparseBlock = sblock; + } public Iterator<IJV> getSparseBlockIterator() { //check for valid format, should have been checked from outside @@ -1682,7 +1686,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab if( sparse ) mergeIntoSparse(that, appendOnly); else - mergeIntoDense(that); + mergeIntoDense(that); //maintain number of nonzeros nonZeros = nnz; @@ -1722,49 +1726,46 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab private void mergeIntoSparse(MatrixBlock that, boolean appendOnly) { + SparseBlock a = sparseBlock; + final boolean COO = (a instanceof SparseBlockCOO); + final int m = rlen; + final int n = clen; + if( that.sparse ) //SPARSE <- SPARSE { - SparseBlock a = sparseBlock; SparseBlock b = that.sparseBlock; - int m = rlen; for( int i=0; i<m; i++ ) { - if( !b.isEmpty(i) ) + if( b.isEmpty(i) ) continue; + if( !COO && a.isEmpty(i) ) { + //copy entire sparse row (no sort required) + a.set(i, b.get(i), true); + } + else { - if( a.isEmpty(i) ) { - //copy entire sparse row (no sort required) - a.set(i, b.get(i), true); - } - else - { - boolean appended = false; - int bpos = b.pos(i); - int blen = b.size(i); - int[] bix = b.indexes(i); - double[] bval = b.values(i); - for( int j=bpos; j<bpos+blen; j++ ) { - if( bval[j] != 0 ) { - a.append(i, bix[j], bval[j]); - appended = true; - } + boolean appended = false; + int bpos = b.pos(i); + int blen = b.size(i); + int[] bix = b.indexes(i); + double[] bval = b.values(i); + for( int j=bpos; j<bpos+blen; j++ ) { + if( bval[j] != 0 ) { + a.append(i, bix[j], bval[j]); + appended = true; } - //only sort if value appended - if( !appendOnly && appended ) - a.sort(i); } + //only sort if value appended + if( !appendOnly && appended ) + a.sort(i); } } } else //SPARSE <- DENSE { - SparseBlock a = sparseBlock; double[] b = that.getDenseBlockValues(); - int m = rlen; - int n = clen; - for( int i=0, bix=0; i<m; i++, bix+=n ) - { + for( int i=0, bix=0; i<m; i++, bix+=n ) { boolean appended = false; for( int j=0; j<n; j++ ) { if( b[bix+j] != 0 ) { @@ -1773,10 +1774,14 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab } } //only sort if value appended - if( !appendOnly && appended ) + if( !COO && !appendOnly && appended ) a.sort(i); } } + + //full sort of coordinate blocks + if( COO && !appendOnly ) + a.sort(); } //////// http://git-wip-us.apache.org/repos/asf/systemml/blob/8d800412/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java index de43855..8c2604b 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java @@ -277,7 +277,7 @@ public class SparseBlockCOO extends SparseBlock //scan to begin of row index partition while( index>0 && _rindexes[index-1]==r ) index--; - return index; + return index; } @Override @@ -295,7 +295,7 @@ public class SparseBlockCOO extends SparseBlock shiftLeftAndDelete(index); return true; // nnz-- } - else { + else { _values[index] = v; return false; } @@ -319,12 +319,16 @@ public class SparseBlockCOO extends SparseBlock int alen = row.size(); int[] aix = row.indexes(); double[] avals = row.values(); + //delete existing values in range if necessary deleteIndexRange(r, aix[0], aix[alen-1]+1); + //prepare free space (allocate and shift) + int lsize = _size+alen; + if( _values.length < lsize ) + resize(lsize); shiftRightByN(pos, alen); Arrays.fill(_rindexes, pos, pos+alen, r); System.arraycopy(aix, 0, _cindexes, pos, alen); System.arraycopy(avals, 0, _values, pos, alen); - _size+=alen; } @Override @@ -334,7 +338,7 @@ public class SparseBlockCOO extends SparseBlock if( _size==_values.length ) resize(); - insert(_size, r, c, v); + insert(_size, r, c, v); } @Override @@ -390,10 +394,11 @@ public class SparseBlockCOO extends SparseBlock //sort _cindexes/_values by _cindexes per row partition int index = 0; while( index < _size ){ - int r = _rindexes[index]; + int r = _rindexes[index]; int len = 0; while( r == _rindexes[index] ) { - len ++; index ++; + len ++; + index ++; } SortUtils.sortByIndex(index-len, index, _cindexes, _values); } @@ -403,7 +408,7 @@ public class SparseBlockCOO extends SparseBlock public void sort(int r) { int pos = pos(r); int len = size(r); - + if( len<=100 || !SortUtils.isSorted(pos, pos+len, _cindexes) ) SortUtils.sortByIndex(pos, pos+len, _cindexes, _values); } @@ -530,7 +535,7 @@ public class SparseBlockCOO extends SparseBlock private void resize() { //compute new size - double tmpCap = _values.length * RESIZE_FACTOR1; + double tmpCap = Math.ceil(_values.length * RESIZE_FACTOR1); int newCap = (int)Math.min(tmpCap, Integer.MAX_VALUE); resize(newCap); @@ -545,11 +550,11 @@ public class SparseBlockCOO extends SparseBlock private void resizeAndInsert(int ix, int r, int c, double v) { //compute new size - double tmpCap = _values.length * RESIZE_FACTOR1; + double tmpCap = Math.ceil(_values.length * RESIZE_FACTOR1); int newCap = (int)Math.min(tmpCap, Integer.MAX_VALUE); int[] oldrindexes = _rindexes; - int[] oldcindexes = _cindexes; + int[] oldcindexes = _cindexes; double[] oldvalues = _values; _rindexes = new int[newCap]; _cindexes = new int[newCap]; @@ -588,8 +593,7 @@ public class SparseBlockCOO extends SparseBlock _size--; } - private void shiftRightByN(int ix, int n) - { + private void shiftRightByN(int ix, int n) { //overlapping array copy (shift rhs values right by 1) System.arraycopy(_rindexes, ix, _rindexes, ix+n, _size-ix); System.arraycopy(_cindexes, ix, _cindexes, ix+n, _size-ix); http://git-wip-us.apache.org/repos/asf/systemml/blob/8d800412/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMerge.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMerge.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMerge.java new file mode 100644 index 0000000..019b89b --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMerge.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.sparse; + +import org.junit.Assert; +import org.junit.Test; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlockFactory; +import org.apache.sysml.runtime.util.DataConverter; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.utils.TestUtils; + +public class SparseBlockMerge extends AutomatedTestBase +{ + private final static int rows = 1000; + private final static int cols = 1000; + private final static double sparsity1 = 0.001; + private final static double sparsity2 = 0.01; + private final static double sparsity3 = 0.1; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + } + + @Test + public void testMergeMCSR_MCSR_1() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.MCSR, sparsity1); + } + + @Test + public void testMergeMCSR_MCSR_2() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.MCSR, sparsity2); + } + + @Test + public void testMergeMCSR_MCSR_3() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.MCSR, sparsity3); + } + + @Test + public void testMergeMCSR_CSR_1() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.CSR, sparsity1); + } + + @Test + public void testMergeMCSR_CSR_2() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.CSR, sparsity2); + } + + @Test + public void testMergeMCSR_CSR_3() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.CSR, sparsity3); + } + + @Test + public void testMergeMCSR_COO_1() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.COO, sparsity1); + } + + @Test + public void testMergeMCSR_COO_2() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.COO, sparsity2); + } + + @Test + public void testMergeMCSR_COO_3() { + runSparseBlockMergeTest(SparseBlock.Type.MCSR, SparseBlock.Type.COO, sparsity3); + } + + @Test + public void testMergeCSR_CSR_1() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.CSR, sparsity1); + } + + @Test + public void testMergeCSR_CSR_2() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.CSR, sparsity2); + } + + @Test + public void testMergeCSR_CSR_3() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.CSR, sparsity3); + } + + @Test + public void testMergeCSR_MCSR_1() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.MCSR, sparsity1); + } + + @Test + public void testMergeCSR_MCSR_2() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.MCSR, sparsity2); + } + + @Test + public void testMergeCSR_MCSR_3() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.MCSR, sparsity3); + } + + @Test + public void testMergeCSR_COO_1() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.COO, sparsity1); + } + + @Test + public void testMergeCSR_COO_2() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.COO, sparsity2); + } + + @Test + public void testMergeCSR_COO_3() { + runSparseBlockMergeTest(SparseBlock.Type.CSR, SparseBlock.Type.COO, sparsity3); + } + + @Test + public void testMergeCOO_COO_1() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.COO, sparsity1); + } + + @Test + public void testMergeCOO_COO_2() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.COO, sparsity2); + } + + @Test + public void testMergeCOO_COO_3() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.COO, sparsity3); + } + + @Test + public void testMergeCOO_MCSR_1() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.MCSR, sparsity1); + } + + @Test + public void testMergeCOO_MCSR_2() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.MCSR, sparsity2); + } + + @Test + public void testMergeCOO_MCSR_3() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.MCSR, sparsity3); + } + + @Test + public void testMergeCOO_CSR_1() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.CSR, sparsity1); + } + + @Test + public void testMergeCOO_CSR_2() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.CSR, sparsity2); + } + + @Test + public void testMergeCOO_CSR_3() { + runSparseBlockMergeTest(SparseBlock.Type.COO, SparseBlock.Type.CSR, sparsity3); + } + + private void runSparseBlockMergeTest( SparseBlock.Type btype1, SparseBlock.Type btype2, double sparsity) + { + try + { + //data generation + double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 1234); + double[][] B1 = new double[A.length][]; + double[][] B2 = new double[A.length][]; + for(int i=0; i<A.length; i++) { + B1[i] = new double[A[i].length]; + B2[i] = new double[A[2].length]; + for(int j=0; j<A[i].length; j++) { + if( j%2 == 0 ) + B1[i][j] = A[i][j]; + else + B2[i][j] = A[i][j]; + } + } + + //init sparse block + MatrixBlock mb1 = DataConverter.convertToMatrixBlock(B1); + MatrixBlock mb2 = DataConverter.convertToMatrixBlock(B2); + long nnz = mb1.getNonZeros() + mb2.getNonZeros(); + mb1.setSparseBlock(SparseBlockFactory.copySparseBlock(btype1, mb1.getSparseBlock(), false)); + mb2.setSparseBlock(SparseBlockFactory.copySparseBlock(btype2, mb2.getSparseBlock(), false)); + + //execute merge + mb1.merge(mb2, false); + + //check for correct number of non-zeros + if( nnz != mb1.getNonZeros() ) + Assert.fail("Wrong number of non-zeros: "+mb1.getNonZeros()+", expected: "+nnz); + + //check correct values + long count = 0; + SparseBlock sblock = mb1.getSparseBlock(); + for( int i=0; i<rows; i++) { + if( sblock.isEmpty(i) ) continue; + int alen = sblock.size(i); + int apos = sblock.pos(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for( int j=0; j<alen; j++ ) { + if( avals[apos+j] != A[i][aix[apos+j]] ) + Assert.fail("Wrong value returned by scan: "+avals[apos+j]+", expected: "+A[i][apos+aix[j]]); + count++; + } + } + if( count != nnz ) + Assert.fail("Wrong number of values returned by merge: "+count+", expected: "+nnz); + } + catch(Exception ex) { + ex.printStackTrace(); + throw new RuntimeException(ex); + } + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/8d800412/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java index 0196da1..7ce92d1 100644 --- a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java @@ -34,6 +34,7 @@ import org.junit.runners.Suite; SparseBlockIndexRange.class, SparseBlockIterator.class, SparseBlockMemEstimate.class, + SparseBlockMerge.class, SparseBlockScan.class, SparseBlockSize.class, })
