This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 2889c0d37d5cfa50c3ee130ac5d40f9d50cd485b Author: Matthias Boehm <[email protected]> AuthorDate: Thu Oct 5 16:00:30 2023 +0200 [MINOR] Fix text/matrix-market readers counting nnz for symmetric data There was a double counting for non-zeros on the diagonal, leading to unnecessary allocation and partially incorrect meta data. --- .../sysds/runtime/io/ReaderTextCellParallel.java | 34 +++++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java index e34d96a2a9..23b6b738ca 100644 --- a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java +++ b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java @@ -259,24 +259,42 @@ public class ReaderTextCellParallel extends ReaderTextCell RecordReader<LongWritable,Text> reader = _informat.getRecordReader(_split, _job, Reporter.NULL); try { - //counting without locking as conflicts unlikely + //skip matrix-matrix meta data + boolean foundComment = false; while( reader.next(key, value) ) { if( value.toString().charAt(0) == '%' ) - continue; - st.reset( value.toString() ); - int nv = (int)st.nextLong()-1; - if(nv >= 0){ - _rNnz[nv] ++; - if( _isSymmetric ) - _rNnz[(int)st.nextLong()-1] ++; + foundComment = true; + else if( foundComment ) + break; //skip meta data + else { + countCell(st, value.toString()); + break; } } + + //counting without locking as conflicts unlikely + while( reader.next(key, value) ) { + countCell(st, value.toString()); + } } finally { IOUtilFunctions.closeSilently(reader); } return null; } + + private void countCell(FastStringTokenizer st, String value) { + st.reset( value ); + int rix = (int)st.nextLong()-1; + if(rix >= 0){ + _rNnz[rix] ++; + if( _isSymmetric ) { + int cix = (int)st.nextLong()-1; + if(rix != cix) + _rNnz[cix] ++; + } + } + } } /**
