This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new ea86d4b93d [MINOR] Cleanup transform encoders (separated federated 
utils)
ea86d4b93d is described below

commit ea86d4b93d1847320c62a5421143fcfe3ebcc2f1
Author: OlgaOvcharenko <[email protected]>
AuthorDate: Sun Jun 5 23:48:55 2022 +0200

    [MINOR] Cleanup transform encoders (separated federated utils)
    
    Closes #1626.
---
 ...tiReturnParameterizedBuiltinFEDInstruction.java |  2 +-
 .../runtime/transform/encode/ColumnEncoderBin.java | 50 +++++++---------------
 2 files changed, 17 insertions(+), 35 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
index 00e7b6fafc..da51164012 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
@@ -239,7 +239,7 @@ public class MultiReturnParameterizedBuiltinFEDInstruction 
extends ComputationFE
                                for(Encoder compositeEncoder : 
((ColumnEncoderComposite) enc).getEncoders())
                                        if(compositeEncoder instanceof 
ColumnEncoderBin && ((ColumnEncoderBin) compositeEncoder)
                                                .getBinMethod() == 
ColumnEncoderBin.BinMethod.EQUI_HEIGHT)
-                                               ((ColumnEncoderBin) 
compositeEncoder).buildEquiHeight(equiHeightBinsPerColumn
+                                               ((ColumnEncoderBin) 
compositeEncoder).build(null, equiHeightBinsPerColumn
                                                        
.get(((ColumnEncoderBin) compositeEncoder).getColID() - 1));
                                ((ColumnEncoderComposite) 
enc).updateAllDCEncoders();
                        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderBin.java 
b/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderBin.java
index 2f5f6d4297..4f8914f5cb 100644
--- 
a/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderBin.java
+++ 
b/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderBin.java
@@ -112,26 +112,13 @@ public class ColumnEncoderBin extends ColumnEncoder {
                }
                else if(_binMethod == BinMethod.EQUI_HEIGHT) {
                        double[] sortedCol = prepareDataForEqualHeightBins(in, 
_colID, 0, -1);
-                       computeEqualHeightBins(sortedCol);
+                       computeEqualHeightBins(sortedCol, false);
                }
 
                if(DMLScript.STATISTICS)
                        
TransformStatistics.incBinningBuildTime(System.nanoTime()-t0);
        }
 
-       //TODO move federated things outside the location-agnostic encoder,
-       // and/or generalize to fit the existing mergeAt and similar methods
-       public void buildEquiHeight(double[] equiHeightMaxs) {
-               long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-               if(!isApplicable())
-                       return;
-               if(_binMethod == BinMethod.EQUI_HEIGHT)
-                       computeFedEqualHeightBins(equiHeightMaxs);
-
-               if(DMLScript.STATISTICS)
-                       
TransformStatistics.incBinningBuildTime(System.nanoTime()-t0);
-       }
-
        public void build(CacheBlock in, double[] equiHeightMaxs) {
                long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
                if(!isApplicable())
@@ -141,7 +128,7 @@ public class ColumnEncoderBin extends ColumnEncoder {
                        computeBins(pairMinMax[0], pairMinMax[1]);
                }
                else if(_binMethod == BinMethod.EQUI_HEIGHT) {
-                       computeFedEqualHeightBins(equiHeightMaxs);
+                       computeEqualHeightBins(equiHeightMaxs, true);
                }
 
                if(DMLScript.STATISTICS)
@@ -264,30 +251,25 @@ public class ColumnEncoderBin extends ColumnEncoder {
                }
        }
 
-       private void computeEqualHeightBins(double[] sortedCol) {
+       private void computeEqualHeightBins(double[] sortedCol, boolean 
isSorted) {
                if(_binMins == null || _binMaxs == null) {
                        _binMins = new double[_numBin];
                        _binMaxs = new double[_numBin];
                }
-               int n = sortedCol.length;
-               for(int i = 0; i < _numBin; i++) {
-                       double pos = n * (i + 1d) / _numBin;
-                       _binMaxs[i] = (pos % 1 == 0) ? // pos is integer
-                               sortedCol[(int) pos-1] :
-                               sortedCol[(int) Math.floor(pos)];
-               }
-               _binMaxs[_numBin-1] = sortedCol[n-1];
-               _binMins[0] = sortedCol[0];
-               System.arraycopy(_binMaxs, 0, _binMins, 1, _numBin - 1);
-       }
+               if(!isSorted) {
+                       int n = sortedCol.length;
+                       for(int i = 0; i < _numBin; i++) {
+                               double pos = n * (i + 1d) / _numBin;
+                               _binMaxs[i] = (pos % 1 == 0) ? // pos is integer
+                                       sortedCol[(int) pos - 1] : 
sortedCol[(int) Math.floor(pos)];
+                       }
+                       _binMaxs[_numBin - 1] = sortedCol[n - 1];
 
-       private void computeFedEqualHeightBins(double[] binMaxs) {
-               if(_binMins == null || _binMaxs == null) {
-                       _binMins = new double[_numBin];
-                       _binMaxs = new double[_numBin];
+               } else {
+                       System.arraycopy(sortedCol, 1, _binMaxs, 0, _numBin);
                }
-               System.arraycopy(binMaxs, 1, _binMaxs, 0, _numBin);
-               _binMins[0] = binMaxs[0];
+
+               _binMins[0] = sortedCol[0];
                System.arraycopy(_binMaxs, 0, _binMins, 1, _numBin - 1);
        }
 
@@ -539,7 +521,7 @@ public class ColumnEncoderBin extends ColumnEncoder {
                                // TODO: Derive bin boundaries from partial 
aggregates, avoiding
                                // materializing the sorted arrays (e.g. 
federated quantile)
                                double[] sortedRes = 
mergeKSortedArrays(allParts);
-                               _encoder.computeEqualHeightBins(sortedRes);
+                               _encoder.computeEqualHeightBins(sortedRes, 
false);
                        }
 
                        if(DMLScript.STATISTICS)

Reply via email to