Baunsgaard commented on code in PR #2226: URL: https://github.com/apache/systemds/pull/2226#discussion_r1955731013
########## src/main/java/org/apache/sysds/hops/rewrite/RewriteQuantizationFusedCompression.java: ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.hops.rewrite; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map.Entry; + +import org.apache.sysds.common.Types.OpOp1; +import org.apache.sysds.common.Types.OpOp2; +import org.apache.sysds.hops.UnaryOp; +import org.apache.sysds.runtime.instructions.cp.DoubleObject; +import org.apache.sysds.runtime.instructions.cp.ScalarObject; +import org.apache.sysds.hops.BinaryOp; + +import org.apache.sysds.common.Types.DataType; +import org.apache.sysds.common.Types.ValueType; + +import org.apache.sysds.hops.Hop; + +/** + * Rule: RewriteFloorCompress. Detects the sequence `M2 = floor(M * S)` followed by `C = compress(M2)` and prepares for + * fusion into a single operation. This rewrite improves performance by avoiding intermediate results. Currently, it + * identifies the pattern without applying fusion. + */ +public class RewriteQuantizationFusedCompression extends HopRewriteRule { + @Override Review Comment: change indentation to tabs. ########## src/main/java/org/apache/sysds/parser/BuiltinFunctionExpression.java: ########## @@ -752,6 +752,31 @@ else if(((ConstIdentifier) getThirdExpr().getOutput()) raiseValidateError("Compress/DeCompress instruction not allowed in dml script"); break; + case QUANTIZE_COMPRESS: // this is not used when with two arguments it seems Review Comment: if it does not hit here, remove, you will see shortly (if all tests pass) a code coverage report, that will indicate if this is used. ########## src/main/java/org/apache/sysds/runtime/instructions/InstructionParser.java: ########## @@ -65,8 +68,10 @@ public static Instruction[] parseMixedInstructions ( String str ) { return null; String[] strlist = str.split(Instruction.INSTRUCTION_DELIM); Instruction[] inst = new Instruction[strlist.length]; - for ( int i=0; i < inst.length; i++ ) + for ( int i=0; i < inst.length; i++ ) { + System.out.println(strlist[i]); Review Comment: remove printing. If you want outputs, use LOG.debug ########## src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java: ########## @@ -254,26 +258,51 @@ else if(g instanceof ColGroupSDC) private AColGroup compress(CompressedSizeInfoColGroup cg) throws Exception { final IColIndex colIndexes = cg.getColumns(); - final CompressionType ct = cg.getBestCompressionType(); + final CompressionType ct = cg.getBestCompressionType(); // TODO: handle quantization-fused compression Review Comment: the best compression, should already at this point be correct subject to quantization, assuming the estimators behave correctly. Therefore, remove this TODO. ########## src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java: ########## @@ -29,6 +29,9 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; +import javax.swing.plaf.basic.BasicInternalFrameTitlePane.SystemMenuBar; Review Comment: please remove this import ########## src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java: ########## @@ -69,6 +70,11 @@ public CompressionSettingsBuilder() { } + public CompressionSettingsBuilder setScaleFactor(double[] scaleFactors) { Review Comment: add a doc string please, (probably just copy from other setter method with minor modifications.) ########## src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java: ########## @@ -41,6 +41,8 @@ import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import scala.NotImplementedError; Review Comment: Use Java NotImplementedException. ########## src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java: ########## @@ -269,9 +338,17 @@ private static IEncode createFromDense(MatrixBlock m, int col) { final AMapToData d = MapToFactory.create(nV, nUnique - 1); int di = 0; for(int i = off, r = 0; i < end; i += nCol, r++) { - if(vals[i] != 0) { + double value = vals[i]; Review Comment: make two ########## src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java: ########## @@ -253,9 +303,28 @@ private static IEncode createFromDense(MatrixBlock m, int col) { final int end = off + nRow * nCol; final double[] vals = m.getDenseBlockValues(); + // Validate scaleFactors + boolean useSingleScalar = false; + if(scaleFactors != null) { + if(scaleFactors.length == 1) { + useSingleScalar = true; + } + } + // Iteration 1, make Count HashMap. - for(int i = off; i < end; i += nCol) // jump down through rows. - map.increment(vals[i]); + for(int i = off, r = 0; i < end; i += nCol, r++) {// jump down through rows. + double value = vals[i]; Review Comment: these modification of inner for loops slow things down, please branch out before the for loop and make two versions, one with scaling one without. ########## src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java: ########## @@ -285,9 +362,21 @@ private static IEncode createFromDense(MatrixBlock m, int col) { // Allocate counts, and iterate once to replace counts with u ids final AMapToData d = MapToFactory.create(nRow, nUnique); + // Iteration 2, make final map - for(int i = off, r = 0; i < end; i += nCol, r++) - d.set(r, map.getId(vals[i])); + for(int i = off, r = 0; i < end; i += nCol, r++) { + double value = vals[i]; Review Comment: make two ########## src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java: ########## @@ -137,6 +138,25 @@ public static Pair<MatrixBlock, CompressionStatistics> compress(MatrixBlock mb, return compress(mb, k, new CompressionSettingsBuilder(), root); } + public static Pair<MatrixBlock, CompressionStatistics> compress(MatrixBlock mb, MatrixBlock sf, int k, WTreeRoot root) { + // Handle only row vectors, as column-wise quantization is not allowed. + // The restriction is handled upstream + LOG.debug("Number of columns of sf " + sf.getNumColumns()); + double[] scaleFactors = new double[sf.getNumRows()]; Review Comment: good solution to extract the double vector, However, in this case if the MatrixBlock is dense, please use sf.getDenseblockValues(), then you do not have to allocate anything. ########## src/main/java/org/apache/sysds/runtime/instructions/InstructionParser.java: ########## @@ -26,6 +26,8 @@ import org.apache.sysds.runtime.instructions.gpu.GPUInstruction.GPUINSTRUCTION_TYPE; import org.apache.sysds.runtime.instructions.spark.SPInstruction.SPType; +import com.esotericsoftware.minlog.Log; Review Comment: correct the logging framework. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@systemds.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org