This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 387bc2cd30 [SYSTEMDS-3457] MatrixBlock equals Sparse Specialization
387bc2cd30 is described below
commit 387bc2cd3075acec3ab209c6dcd3606115e0ee5f
Author: baunsgaard <[email protected]>
AuthorDate: Thu Oct 27 18:34:08 2022 +0200
[SYSTEMDS-3457] MatrixBlock equals Sparse Specialization
This commit adds support for efficient sparse comparison of equality,
Supported with individual kernels is now sparse-sparse and sparse-dense.
With of cause the best performance if both sides are allocated in the
same way.
Closes #1713
---
.../java/org/apache/sysds/runtime/data/Block.java | 31 ++++++++
.../org/apache/sysds/runtime/data/DenseBlock.java | 11 ++-
.../org/apache/sysds/runtime/data/SparseBlock.java | 82 +++++++++++++++++++++-
.../sysds/runtime/matrix/data/LibMatrixEquals.java | 8 ++-
.../sysds/runtime/matrix/data/MatrixBlock.java | 2 +-
src/test/java/org/apache/sysds/test/TestUtils.java | 28 ++++++++
.../sysds/test/component/matrix/EqualsTest.java | 20 ++++++
7 files changed, 178 insertions(+), 4 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/data/Block.java
b/src/main/java/org/apache/sysds/runtime/data/Block.java
new file mode 100644
index 0000000000..a0a693530a
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/data/Block.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.data;
+
+public interface Block {
+ /**
+ * Get value of matrix cell (r,c). In case of non existing values this
call returns 0.
+ *
+ * @param r row index starting at 0
+ * @param c column index starting at 0
+ * @return value of cell at position (r,c)
+ */
+ public abstract double get(int r, int c);
+}
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
index bd0a888b2f..c0833ef8ac 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
@@ -35,7 +35,7 @@ import org.apache.sysds.utils.MemoryEstimates;
* one or many contiguous rows.
*
*/
-public abstract class DenseBlock implements Serializable
+public abstract class DenseBlock implements Serializable, Block
{
private static final long serialVersionUID = 7517220490270237832L;
@@ -187,6 +187,15 @@ public abstract class DenseBlock implements Serializable
public final int numRows() {
return _rlen;
}
+
+ /**
+ * Get the number of columns / first dimension
+ *
+ * @return number of columns
+ */
+ public final int numCols(){
+ return _odims[0];
+ }
/**
* Get the number of dimensions.
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
index af43124cc6..e5310dc23c 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
@@ -20,6 +20,7 @@
package org.apache.sysds.runtime.data;
import java.io.Serializable;
+import java.util.Arrays;
import java.util.Iterator;
import org.apache.sysds.runtime.matrix.data.IJV;
@@ -35,7 +36,7 @@ import org.apache.sysds.runtime.matrix.data.IJV;
* CSR, MCSR, and - with performance drawbacks - COO.
*
*/
-public abstract class SparseBlock implements Serializable
+public abstract class SparseBlock implements Serializable, Block
{
private static final long serialVersionUID = -5008747088111141395L;
@@ -535,6 +536,85 @@ public abstract class SparseBlock implements Serializable
@Override
public abstract String toString();
+
+ @Override
+ public boolean equals(Object o) {
+ if(o instanceof SparseBlock)
+ return equals((SparseBlock) o, Double.MIN_NORMAL *
1024);
+ return false;
+ }
+
+ /**
+ * Verify if the values in this sparse block is equivalent to that
sparse block, not taking into account the
+ * dimensions of the contained values.
+ *
+ * @param o Other block
+ * @param eps Epsilon allowed
+ * @return If the blocs are equivalent.
+ */
+ public boolean equals(SparseBlock o, double eps) {
+ for(int r = 0; r < numRows(); r++){
+ if(isEmpty(r) != o.isEmpty(r))
+ return false;
+ if(isEmpty(r))
+ continue;
+
+ final int apos = pos(r);
+ final int alen = apos + size(r);
+
+ final int aposO = o.pos(r);
+ final int alenO = aposO + o.size(r);
+
+ if(! Arrays.equals(indexes(r), apos, alen,
o.indexes(r), aposO, alenO))
+ return false;
+ if(! Arrays.equals(values(r), apos, alen, o.values(r),
aposO, alenO))
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Get if the dense double array is equivalent to this sparse Block.
+ *
+ * @param denseValues row major double values same dimensions of sparse
Block.
+ * @param nCol Number of columns in dense values (and hopefully
in this sparse block)
+ * @param eps Epsilon allowed to be off. Note we treat zero
differently and it must be zero.
+ * @return If the dense array is equivalent
+ */
+ public boolean equals(double[] denseValues, int nCol, double eps) {
+ for(int r = 0; r < numRows(); r++) {
+ final int off = r * nCol;
+ final int offEnd = off + nCol;
+ if(isEmpty(r)) {
+ // all in row should be zero.
+ for(int i = off; i < offEnd; i++)
+ if(denseValues[i] != 0)
+ return false;
+ }
+ else {
+ final int apos = pos(r);
+ final int alen = apos + size(r);
+ final double[] avals = values(r);
+ final int[] aix = indexes(r);
+ int j = apos;
+ int i = off;
+ for(int k = 0; i < offEnd && j < alen; i++,
k++) {
+ if(aix[j] == k) {
+ if(Math.abs(denseValues[i] -
avals[j]) > eps)
+ return false;
+ j++;
+ }
+ else if(denseValues[i] != 0.0)
+ return false;
+ }
+ for(; i < offEnd; i++)
+ if(denseValues[i] != 0)
+ return false;
+ }
+ }
+ return true;
+ }
+
/**
* Default sparse block iterator implemented against the sparse block
* api in an implementation-agnostic manner.
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixEquals.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixEquals.java
index 1862ff8a4e..63536d4c04 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixEquals.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixEquals.java
@@ -148,6 +148,13 @@ public class LibMatrixEquals {
if(a.denseBlock != null && b.denseBlock != null)
return a.denseBlock.equals(b.denseBlock, eps);
+ if(a.sparseBlock != null && b.sparseBlock != null)
+ return a.sparseBlock.equals(b.sparseBlock, eps);
+ if(a.sparseBlock != null && b.denseBlock != null &&
b.denseBlock.isContiguous())
+ return a.sparseBlock.equals(b.denseBlock.values(0),
b.getNumColumns(), eps);
+ if(b.sparseBlock != null && a.denseBlock != null &&
a.denseBlock.isContiguous())
+ return b.sparseBlock.equals(a.denseBlock.values(0),
a.getNumColumns(), eps);
+
return genericEquals();
}
@@ -195,7 +202,6 @@ public class LibMatrixEquals {
LOG.warn("Using generic equals, potential optimizations are
possible");
final int rows = a.getNumRows();
final int cols = a.getNumColumns();
-
for(int i = 0; i < rows; i++)
for(int j = 0; j < cols; j++)
if(Math.abs(a.quickGetValue(i, j) -
b.quickGetValue(i, j)) > eps)
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 7da4acae46..08d6eb4044 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -53,6 +53,7 @@ import
org.apache.sysds.runtime.compress.DMLCompressionException;
import org.apache.sysds.runtime.controlprogram.caching.CacheBlock;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject.UpdateType;
import
org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysds.runtime.data.Block;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.DenseBlockFP64;
import org.apache.sysds.runtime.data.DenseBlockFactory;
@@ -551,7 +552,6 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
////////
// Data handling
-
public DenseBlock getDenseBlock() {
return denseBlock;
}
diff --git a/src/test/java/org/apache/sysds/test/TestUtils.java
b/src/test/java/org/apache/sysds/test/TestUtils.java
index 2b9eacf788..ede17bd229 100644
--- a/src/test/java/org/apache/sysds/test/TestUtils.java
+++ b/src/test/java/org/apache/sysds/test/TestUtils.java
@@ -65,6 +65,7 @@ import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.runtime.data.DenseBlockFP64;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.TensorBlock;
import org.apache.sysds.runtime.functionobjects.Builtin;
@@ -3519,4 +3520,31 @@ public class TestUtils
// FIXME: Fails to skip if gpu available but no libraries
return 1; //return false for now
}
+
+ public static MatrixBlock mockNonContiguousMatrix(MatrixBlock db){
+ db.sparseToDense();
+ double[] vals = db.getDenseBlockValues();
+ int[] dims = new int[]{db.getNumRows(), db.getNumColumns()};
+ MatrixBlock m = new MatrixBlock(db.getNumRows(),
db.getNumColumns(), new DenseBlockFP64Mock(dims, vals));
+ m.setNonZeros(db.getNonZeros());
+ return m;
+ }
+
+ private static class DenseBlockFP64Mock extends DenseBlockFP64 {
+ private static final long serialVersionUID =
-3601232958390554672L;
+
+ public DenseBlockFP64Mock(int[] dims, double[] data) {
+ super(dims, data);
+ }
+
+ @Override
+ public boolean isContiguous() {
+ return false;
+ }
+
+ @Override
+ public int numBlocks() {
+ return 2;
+ }
+ }
}
diff --git
a/src/test/java/org/apache/sysds/test/component/matrix/EqualsTest.java
b/src/test/java/org/apache/sysds/test/component/matrix/EqualsTest.java
index 36fe9914da..f64035997d 100644
--- a/src/test/java/org/apache/sysds/test/component/matrix/EqualsTest.java
+++ b/src/test/java/org/apache/sysds/test/component/matrix/EqualsTest.java
@@ -288,4 +288,24 @@ public class EqualsTest {
assertFalse(LibMatrixEquals.equals(m1, m2, 0.00000001));
assertFalse(LibMatrixEquals.equals(m2, m1, 0.00000001));
}
+
+ @Test
+ public void testForcedNonContiguousNotEqual() {
+ MatrixBlock m1 = TestUtils.generateTestMatrixBlock(100, 100, 0,
100, 0.05, 231);
+ MatrixBlock m2 = TestUtils.mockNonContiguousMatrix( //
+ TestUtils.generateTestMatrixBlock(100, 100, 0, 100,
0.05, 231));
+ m1.getSparseBlock().get(13).values()[2] = 1324;
+ assertFalse(LibMatrixEquals.equals(m1, m2, 0.00000001));
+ assertFalse(LibMatrixEquals.equals(m2, m1, 0.00000001));
+ }
+
+ @Test
+ public void testForcedNonContiguousEqual() {
+ MatrixBlock m1 = TestUtils.generateTestMatrixBlock(100, 100, 0,
100, 0.05, 231);
+ MatrixBlock m2 = TestUtils.mockNonContiguousMatrix( //
+ TestUtils.generateTestMatrixBlock(100, 100, 0, 100,
0.05, 231));
+ assertTrue(LibMatrixEquals.equals(m1, m2, 0.00000001));
+ assertTrue(LibMatrixEquals.equals(m2, m1, 0.00000001));
+ }
+
}