This is an automated email from the ASF dual-hosted git repository.

yuyuankang pushed a commit to branch research/auto-aligned
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/research/auto-aligned by this 
push:
     new a180a71d6c add column-groups storage with exact and approximate 
evaluations (#5599)
a180a71d6c is described below

commit a180a71d6cb924d1f84b0fba6d776e1fcd9b4fa6
Author: iotdbColumnGroup <[email protected]>
AuthorDate: Tue Apr 19 19:56:27 2022 +0800

    add column-groups storage with exact and approximate evaluations (#5599)
---
 .../flush/ApproximateFlushGroupingEngine.java      | 717 +++++++++++++++++++++
 1 file changed, 717 insertions(+)

diff --git 
a/server/src/main/java/org/apache/iotdb/db/engine/flush/ApproximateFlushGroupingEngine.java
 
b/server/src/main/java/org/apache/iotdb/db/engine/flush/ApproximateFlushGroupingEngine.java
new file mode 100644
index 0000000000..f0831fbe9a
--- /dev/null
+++ 
b/server/src/main/java/org/apache/iotdb/db/engine/flush/ApproximateFlushGroupingEngine.java
@@ -0,0 +1,717 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.engine.flush;
+
+import org.apache.iotdb.db.utils.datastructure.AlignedTVList;
+import org.apache.iotdb.tsfile.utils.BitMap;
+import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.apache.iotdb.db.rescon.PrimitiveArrayManager.ARRAY_SIZE;
+
+public class ApproximateFlushGroupingEngine {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(MemTableFlushTask.class);
+
+  public static class ColumnGroupAppr {
+    public ArrayList<Integer> columns;
+    public int maxGain = -1;
+    public int maxGainPosition = -1;
+    public long lastTimestampIdx;
+    public long startTimestampIdx;
+    public int timeSeriesLength; // number of exist timestamps, i.e., zeros in 
bitmap
+    public long interval;
+    public ArrayList<BitMap> bitmap;
+    public int size; // total length of the bitmap
+    public ArrayList<Integer> gains;
+    public ArrayList<Feature> features;
+    public ArrayList<Long> rangeMap;
+    public ArrayList<ArrayList<Integer>> rangeMapCols;
+
+    public ColumnGroupAppr(
+        ArrayList<Integer> columns,
+        int maxGain,
+        ArrayList<BitMap> bitmap,
+        int size,
+        int GroupNum,
+        AlignedTVList dataList) {
+      this.columns = columns;
+      this.maxGain = maxGain;
+      this.bitmap = bitmap;
+      this.size = size;
+      this.gains = new ArrayList<>(Collections.nCopies(GroupNum, -1));
+      this.updateTimestampInfo(dataList);
+    }
+
+    public ColumnGroupAppr(
+        ArrayList<Integer> columns,
+        int maxGain,
+        int timeSeriesLength,
+        int GroupNum,
+        ArrayList<Feature> features,
+        ArrayList<Long> rangeMap,
+        ArrayList<ArrayList<Integer>> rangeMapCols) {
+      this.columns = columns;
+      this.maxGain = maxGain;
+      this.timeSeriesLength = timeSeriesLength;
+      this.gains = new ArrayList<>(Collections.nCopies(GroupNum, -1));
+      this.features = features;
+      this.rangeMap = rangeMap;
+      this.rangeMapCols = rangeMapCols;
+    }
+
+    public void updateTimestampInfo(AlignedTVList dataList) {
+      int onesCount = 0;
+      this.startTimestampIdx = -1;
+      this.lastTimestampIdx = -1;
+      ArrayList<Long> timestamps = new ArrayList<>();
+      int bitmapSize = ARRAY_SIZE;
+      int intervalTimes = 1000;
+      if (this.bitmap == null) {
+        for (int i = 0; i < size; i++) {
+          timestamps.add(dataList.getTime(i) / intervalTimes);
+        }
+      } else {
+        for (int i = 0; i < this.bitmap.size(); i++) {
+          if (this.bitmap.get(i) == null) {
+            if (this.startTimestampIdx == -1) {
+              this.startTimestampIdx = i * bitmapSize;
+            }
+            if (i * bitmapSize + bitmapSize - 1 < size) {
+              this.lastTimestampIdx = i * bitmapSize + bitmapSize - 1;
+              for (int k = i * bitmapSize; k <= i * bitmapSize + bitmapSize - 
1; k++) {
+                timestamps.add(dataList.getTime(k) / intervalTimes);
+              }
+            } else {
+              if (i * bitmapSize - 1 < size) {
+                this.lastTimestampIdx = size - 1;
+                for (int k = i * bitmapSize; k <= size - 1; k++) {
+                  timestamps.add(dataList.getTime(k) / intervalTimes);
+                }
+              }
+            }
+          } else {
+            BitMap bm = this.bitmap.get(i);
+            for (int j = 0; j < bm.getSize() / Byte.SIZE; j++) {
+              int onesNumber = countOnesForByte(bm.getByteArray()[j]);
+              if (this.startTimestampIdx == -1 && onesNumber != 8) {
+                this.startTimestampIdx = i * bitmapSize + j * 8 + 
firstZero(bm.getByteArray()[j]);
+              }
+              if (onesNumber != 8) {
+                if (i * bitmapSize + j * 8 + lastZero(bm.getByteArray()[j]) < 
size) {
+                  this.lastTimestampIdx = i * bitmapSize + j * 8 + 
lastZero(bm.getByteArray()[j]);
+                  for (int k = i * bitmapSize + j * 8 + 
firstZero(bm.getByteArray()[j]);
+                      (k < size) && (k <= i * bitmapSize + j * 8 + 
lastZero(bm.getByteArray()[j]));
+                      k++) {
+                    if (!bm.isMarked(k - i * bitmapSize)) {
+                      timestamps.add(dataList.getTime(k) / intervalTimes);
+                    }
+                  }
+                }
+              }
+              onesCount += countOnesForByte(bm.getByteArray()[j]);
+            }
+          }
+        }
+      }
+      int intervalGran = 1;
+      long intervalSum = 0;
+      int intervalCount = 0;
+      int intervalMax = 1000;
+
+      for (int i = 1; i < timestamps.size(); i++) {
+        long interval_ = timestamps.get(i) - timestamps.get(i - 1);
+        if (interval_ < intervalMax) {
+          intervalSum += interval_;
+          intervalCount++;
+        }
+      }
+      long interval = intervalSum / intervalCount;
+      interval = interval / intervalGran * intervalGran;
+
+      // compute n
+      this.timeSeriesLength = this.size - onesCount;
+      // compute start
+      long start = timestamps.get(0);
+      double sigma = 0;
+      double sigmaSum = 0;
+      ArrayList<Double> sigmaList = new ArrayList<>();
+      long offset = 0;
+      for (int i = 0; i < timestamps.size(); i++) {
+        sigma = Math.abs(timestamps.get(i) - start - i * interval - offset);
+        if (sigma > 10 * interval) {
+          sigma = Math.abs(timestamps.get(i) - start - i * interval) % 
interval;
+          offset = Math.abs(timestamps.get(i) - start - i * interval) / 
interval * interval;
+        }
+        sigmaSum += sigma;
+      }
+      sigma = sigmaSum / timestamps.size();
+
+      this.features = new ArrayList<>();
+      this.features.add(new Feature(interval, this.timeSeriesLength, start, 
sigma));
+
+      this.rangeMap = new ArrayList<>();
+      this.rangeMapCols = new ArrayList<>();
+      rangeMap.add(start);
+      rangeMap.add(start + interval * (this.timeSeriesLength - 1));
+      ArrayList<Integer> overlapGroup = new ArrayList<>();
+      overlapGroup.add(this.columns.get(0));
+      rangeMapCols.add(overlapGroup);
+    }
+
+    public static ColumnGroupAppr mergeColumnGroup(ColumnGroupAppr g1, 
ColumnGroupAppr g2) {
+      // merge bitmap
+      ArrayList<BitMap> newBitmap = new ArrayList<>();
+
+      // merge columns, features;
+
+      ArrayList<Integer> newColumns = (ArrayList<Integer>) g1.columns.clone();
+      newColumns.addAll(g2.columns);
+      ArrayList<Feature> features = (ArrayList<Feature>) g1.features.clone();
+      features.addAll(g2.features);
+
+      int overlap = estimateOverlap(g1, g2);
+      int timeSeriesLength = g1.timeSeriesLength + g2.timeSeriesLength - 
overlap;
+      int GroupNum = g1.gains.size() - 1;
+
+      // merge rangeMap
+      ArrayList<Long> map1;
+      ArrayList<Long> map2;
+      ArrayList<ArrayList<Integer>> mapCols1;
+      ArrayList<ArrayList<Integer>> mapCols2;
+      ArrayList<Long> newMap = new ArrayList<>();
+      ArrayList<ArrayList<Integer>> newMapCols = new ArrayList<>();
+
+      if (g1.rangeMap.get(0) < g2.rangeMap.get(0)) {
+        map1 = g1.rangeMap;
+        map2 = g2.rangeMap;
+        mapCols1 = g1.rangeMapCols;
+        mapCols2 = g2.rangeMapCols;
+      } else {
+        map1 = g2.rangeMap;
+        map2 = g1.rangeMap;
+        mapCols1 = g2.rangeMapCols;
+        mapCols2 = g1.rangeMapCols;
+      }
+
+      int i = 0;
+      int j = 0;
+
+      int lastPoint = map1.get(0).equals(map2.get(0)) ? 3 : 1; // 1: map1, 
2:map2, 3: equal
+      newMap.add(map1.get(i));
+
+      while (true) {
+        if (lastPoint == 1) {
+          i += 1;
+          if (i >= map1.size()) {
+            break;
+          }
+          if (map1.get(i) < map2.get(j)) {
+            newMap.add(map1.get(i));
+            newMapCols.add(mapCols1.get(i - 1));
+          } else if (map1.get(i).equals(map2.get(j))) {
+            lastPoint = 3;
+            newMap.add(map1.get(i));
+            if (j == 0) {
+              newMapCols.add(mapCols1.get(i - 1));
+            } else {
+              ArrayList<Integer> tmpCols = (ArrayList<Integer>) mapCols1.get(i 
- 1).clone();
+              tmpCols.addAll(mapCols2.get(j - 1));
+              newMapCols.add(tmpCols);
+            }
+          } else {
+            lastPoint = 2;
+            newMap.add(map2.get(j));
+            if (j == 0) {
+              newMapCols.add(mapCols1.get(i - 1));
+            } else {
+              ArrayList<Integer> tmpCols = (ArrayList<Integer>) mapCols1.get(i 
- 1).clone();
+              tmpCols.addAll(mapCols2.get(j - 1));
+              newMapCols.add(tmpCols);
+            }
+          }
+
+        } else if (lastPoint == 2) {
+          j += 1;
+          if (j >= map2.size()) {
+            break;
+          }
+          if (map2.get(j) < map1.get(i)) {
+            newMap.add(map2.get(j));
+            newMapCols.add(mapCols2.get(j - 1));
+          } else if (map1.get(i).equals(map2.get(j))) {
+            lastPoint = 3;
+            newMap.add(map2.get(j));
+            if (i == 0) {
+              newMapCols.add(mapCols2.get(j - 1));
+            } else {
+              ArrayList<Integer> tmpCols = (ArrayList<Integer>) mapCols2.get(j 
- 1).clone();
+              tmpCols.addAll(mapCols1.get(i - 1));
+              newMapCols.add(tmpCols);
+            }
+          } else {
+            lastPoint = 1;
+            newMap.add(map1.get(i));
+            if (i == 0) {
+              newMapCols.add(mapCols2.get(j - 1));
+            } else {
+              ArrayList<Integer> tmpCols = (ArrayList<Integer>) mapCols2.get(j 
- 1).clone();
+              tmpCols.addAll(mapCols1.get(i - 1));
+              newMapCols.add(tmpCols);
+            }
+          }
+        } else {
+          // lastPoint == 3
+          i += 1;
+          j += 1;
+          if ((i >= map1.size()) || (j >= map2.size())) {
+            break;
+          }
+          if (map1.get(i) < map2.get(j)) {
+            lastPoint = 1;
+            newMap.add(map1.get(i));
+          } else if (map1.get(i) == map2.get(j)) {
+            newMap.add(map1.get(i));
+          } else {
+            lastPoint = 3;
+            newMap.add(map2.get(j));
+          }
+          ArrayList<Integer> tmpCols = (ArrayList<Integer>) mapCols1.get(i - 
1).clone();
+          tmpCols.addAll(mapCols2.get(j - 1));
+          newMapCols.add(tmpCols);
+        }
+      }
+
+      if (i >= map1.size()) {
+        if (lastPoint == 1) {
+          while (j < map2.size()) {
+            newMap.add(map2.get(j));
+            newMapCols.add(mapCols2.get(j - 1));
+            j += 1;
+          }
+        } else {
+          j += 1;
+          while (j < map2.size()) {
+            newMap.add(map2.get(j));
+            newMapCols.add(mapCols2.get(j - 1));
+            j += 1;
+          }
+        }
+      } else if (j >= map2.size()) {
+        if (lastPoint == 2) {
+          while (i < map1.size()) {
+            newMap.add(map1.get(i));
+            newMapCols.add(mapCols1.get(i - 1));
+            i += 1;
+          }
+        } else {
+          i += 1;
+          while (i < map1.size()) {
+            newMap.add(map1.get(i));
+            newMapCols.add(mapCols1.get(i - 1));
+            i += 1;
+          }
+        }
+      }
+
+      return new ColumnGroupAppr(
+          newColumns, -1, timeSeriesLength, GroupNum, features, newMap, 
newMapCols);
+    }
+  }
+
+  /** autoaligned grouping method * */
+  public static void grouping(AlignedTVList dataList, List<IMeasurementSchema> 
schemaList) {
+    try {
+      if (dataList.getBitMaps() == null) {
+        return;
+      }
+
+      int timeSeriesNumber = dataList.getValues().size();
+      int size = dataList.rowCount();
+      ArrayList<ColumnGroupAppr> columnGroups = new ArrayList<>();
+
+      if (timeSeriesNumber == 0) {
+        return;
+      }
+      if (timeSeriesNumber == 1) {
+        return;
+      }
+
+      // init posMap, maxGain, groupingResult
+      for (int i = 0; i < timeSeriesNumber; i++) {
+        ArrayList<Integer> newGroup = new ArrayList<>();
+        newGroup.add(i);
+        ArrayList<BitMap> bitmap = (ArrayList<BitMap>) 
dataList.getBitMaps().get(i);
+        if (bitmap == null) {
+          bitmap = new ArrayList<>();
+          for (int j = 0; j < dataList.getBitMaps().size(); j++) {
+            if (dataList.getBitMaps().get(j) != null) {
+              int sizeBitmap = dataList.getBitMaps().get(j).size();
+              for (int k = 0; k < sizeBitmap; k++) {
+                bitmap.add(null);
+              }
+            }
+          }
+        }
+        columnGroups.add(
+            new ColumnGroupAppr(newGroup, -1, bitmap, size, timeSeriesNumber, 
dataList));
+      }
+
+      // init gain matrix
+      for (int i = 0; i < timeSeriesNumber; i++) {
+        for (int j = i + 1; j < timeSeriesNumber; j++) {
+          int gain = computeGain(columnGroups.get(i), columnGroups.get(j), 
size);
+          columnGroups.get(i).gains.set(j, gain);
+          columnGroups.get(j).gains.set(i, gain);
+          if (columnGroups.get(i).maxGain < gain) {
+            columnGroups.get(i).maxGain = gain;
+            columnGroups.get(i).maxGainPosition = j;
+          }
+          if (columnGroups.get(j).maxGain < gain) {
+            columnGroups.get(j).maxGain = gain;
+            columnGroups.get(j).maxGainPosition = i;
+          }
+        }
+      }
+
+      while (true) {
+        /** merge * */
+
+        // find the main gain
+        int maxGainOfAll = -1;
+        int maxGainOfAllPos = -1;
+        for (int i = 0; i < columnGroups.size(); i++) {
+          if (columnGroups.get(i).maxGain > maxGainOfAll) {
+            maxGainOfAll = columnGroups.get(i).maxGain;
+            maxGainOfAllPos = i;
+          }
+        }
+        if (maxGainOfAll <= 0) {
+          break;
+        }
+
+        // merge the group, create a new group
+        int source = maxGainOfAllPos;
+        int target = columnGroups.get(source).maxGainPosition;
+
+        ColumnGroupAppr newGroup =
+            ColumnGroupAppr.mergeColumnGroup(columnGroups.get(source), 
columnGroups.get(target));
+
+        // remove the old groups
+        columnGroups.remove(target);
+        columnGroups.remove(source);
+
+        // load target into source
+        columnGroups.add(source, newGroup);
+
+        /** update * */
+
+        // update gains
+        // remove the target, and update the source
+        for (int i = 0; i < columnGroups.size(); i++) {
+          if (i != source) {
+            ColumnGroupAppr g = columnGroups.get(i);
+            ColumnGroupAppr gSource = columnGroups.get(source);
+            g.gains.remove(target);
+            int gain = computeGain(g, gSource, size);
+            g.gains.set(source, gain);
+            // update the maxgain in i
+            if ((g.maxGainPosition != source) && (g.maxGainPosition != 
target)) {
+              if (gain > g.maxGain) {
+                g.maxGain = gain;
+                g.maxGainPosition = source;
+              } else {
+                if (target < g.maxGainPosition) {
+                  g.maxGainPosition -= 1;
+                }
+              }
+            } else {
+              g.maxGain = gain;
+              g.maxGainPosition = source;
+              for (int j = 0; j < g.gains.size(); j++) {
+                if (g.gains.get(j) > g.maxGain) {
+                  g.maxGain = g.gains.get(j);
+                  g.maxGainPosition = j;
+                }
+              }
+            }
+            // update the maxgain and data in source
+            gSource.gains.set(i, gain);
+            if (gain > gSource.maxGain) {
+              gSource.maxGain = gain;
+              gSource.maxGainPosition = i;
+            }
+          }
+        }
+      }
+    } catch (Exception e) {
+      LOGGER.error(e.getMessage());
+    }
+  }
+
+  public static double computeProbability(Feature f1, Feature f2) {
+    int lambda = 3;
+    int tau = 10;
+    double prob = 0;
+    long lcmInterval = lcm(f1.epsilon, f2.epsilon);
+    long scenarios;
+    if (f1.epsilon > f2.epsilon) {
+      scenarios = lcmInterval / f1.epsilon;
+      for (int i = 0; i < scenarios; i++) {
+        int delta =
+            ((int) (f1.epsilon - f2.epsilon) * i + (int) f1.start - (int) 
f2.start)
+                % (int) f2.epsilon;
+        if (f1.sigma == 0 && f2.sigma == 0) {
+          if (delta == 0) {
+            prob += 1;
+          } else {
+            prob += 0;
+          }
+        } else {
+          double z1 =
+              ((lambda * tau) - delta) / Math.sqrt(f1.sigma * f1.sigma + 
f2.sigma * f2.sigma);
+          double z2 =
+              (-(lambda * tau) - delta) / Math.sqrt(f1.sigma * f1.sigma + 
f2.sigma * f2.sigma);
+          prob += NormSDist(z1) - NormSDist(z2);
+        }
+      }
+    } else {
+      scenarios = lcmInterval / f2.epsilon;
+      for (int i = 0; i < scenarios; i++) {
+        int delta =
+            ((int) (f2.epsilon - f1.epsilon) * i + (int) f2.start - (int) 
f1.start)
+                % (int) f1.epsilon;
+        if (f1.sigma == 0 && f2.sigma == 0) {
+          if (delta == 0) {
+            prob += 1;
+          } else {
+            prob += 0;
+          }
+        } else {
+          double z1 =
+              ((lambda * tau) - delta) / Math.sqrt(f1.sigma * f1.sigma + 
f2.sigma * f2.sigma);
+          double z2 =
+              (-(lambda * tau) - delta) / Math.sqrt(f1.sigma * f1.sigma + 
f2.sigma * f2.sigma);
+          prob += NormSDist(z1) - NormSDist(z2);
+        }
+      }
+    }
+    prob /= scenarios;
+    return prob;
+  }
+
+  public static int estimateOverlap(ColumnGroupAppr g1, ColumnGroupAppr g2) {
+    ArrayList<Double> overlapList = new ArrayList<>();
+
+    for (int i = 0; i < g2.columns.size(); i++) {
+      Feature f2 = g2.features.get(i);
+      long start2 = f2.start;
+      long end2 = f2.start + f2.epsilon * (f2.n - 1);
+      double overlapTmp = 0;
+
+      int t = 0;
+
+      while ((t < g1.rangeMap.size() - 1) && (g1.rangeMap.get(t) < start2)) {
+        t += 1;
+      }
+      if (t == g1.rangeMap.size() - 1) {
+        overlapList.add(0.0);
+        continue;
+      } else {
+        if ((t > 0) && (g1.rangeMap.get(t - 1) < start2)) {
+          int n2Tmp = (int) ((start2 - g1.rangeMap.get(t - 1)) / f2.epsilon);
+          double prob = 1;
+          for (int col : g1.rangeMapCols.get(t - 1)) {
+            for (int k = 0; k < g1.columns.size(); k++) {
+              if (g1.columns.get(k) == col) {
+                Feature f1 = g1.features.get(k);
+                prob *= (1 - computeProbability(f1, f2));
+                break;
+              }
+            }
+          }
+          prob -= 1;
+          overlapTmp += prob * n2Tmp;
+        }
+      }
+
+      if (g1.rangeMap.get(t) >= start2) {
+        int j = t;
+        while ((j < g1.rangeMap.size() - 1) && (g1.rangeMap.get(j) < end2)) {
+          j += 1;
+          int n2Tmp;
+          if (g1.rangeMap.get(j) < end2) {
+            n2Tmp = (int) ((g1.rangeMap.get(j) - g1.rangeMap.get(j - 1)) / 
f2.epsilon);
+          } else {
+            n2Tmp = (int) ((end2 - g1.rangeMap.get(j - 1)) / f2.epsilon);
+          }
+          double prob = 1;
+          for (int col : g1.rangeMapCols.get(j - 1)) {
+            for (int k = 0; k < g1.columns.size(); k++) {
+              if (g1.columns.get(k) == col) {
+                Feature f1 = g1.features.get(k);
+                prob *= (1 - computeProbability(f1, f2));
+                break;
+              }
+            }
+          }
+          prob = 1 - prob;
+          overlapTmp += prob * n2Tmp;
+        }
+      }
+      overlapList.add(overlapTmp);
+    }
+
+    double sumOverlap = 0;
+    for (double overlap : overlapList) {
+      sumOverlap += overlap;
+    }
+    int sumN = 0;
+    for (Feature f : g2.features) {
+      sumN += f.n;
+    }
+
+    int overlapEstimation = (int) ((sumOverlap / sumN) * g2.timeSeriesLength);
+    return overlapEstimation;
+  }
+
+  public static int computeGain(ColumnGroupAppr g1, ColumnGroupAppr g2, int 
size) {
+    int overlap = estimateOverlap(g1, g2);
+    //    int overlap = computeOverlap(g1.bitmap, g2.bitmap, size);
+    if (g1.columns.size() == 1 && g2.columns.size() == 1) {
+      // col - col
+      int gain = overlap * Long.SIZE - 2 * (g1.timeSeriesLength + 
g2.timeSeriesLength - overlap);
+      return gain;
+    }
+    if (g1.columns.size() == 1) {
+      // col - group
+      int m_s_a = g1.timeSeriesLength;
+      int n_g_a = g2.columns.size();
+      int m_g_a = g2.timeSeriesLength;
+      int gain = overlap * Long.SIZE + n_g_a * m_g_a - (n_g_a + 1) * (m_s_a + 
m_g_a - overlap);
+      return gain;
+    }
+    if (g2.columns.size() == 1) {
+      // group - col
+      int m_s_a = g2.timeSeriesLength;
+      int n_g_a = g1.columns.size();
+      int m_g_a = g1.timeSeriesLength;
+      int gain = overlap * Long.SIZE + n_g_a * m_g_a - (n_g_a + 1) * (m_s_a + 
m_g_a - overlap);
+      return gain;
+    }
+
+    // group - group
+    int n_g_a = g1.columns.size();
+    int m_g_a = g1.timeSeriesLength;
+    int n_g_b = g2.columns.size();
+    int m_g_b = g2.timeSeriesLength;
+
+    int gain = overlap * Long.SIZE + (n_g_a + n_g_b) * overlap - n_g_a * m_g_b 
- n_g_b * m_g_a;
+    return gain;
+  }
+
+  /** belows are some util functions* */
+  public static class Feature {
+    public long epsilon;
+    public int n;
+    public long start;
+    public double sigma;
+
+    public Feature(long epsilon_, int n_, long start_, double sigma_) {
+      this.epsilon = epsilon_;
+      this.n = n_;
+      this.start = start_;
+      this.sigma = sigma_;
+    }
+  }
+
+  public static double NormSDist(double z) {
+    // compute approximate normal distribution cdf F(z)
+    if (z > 6) return 1;
+    if (z < -6) return 0;
+    double gamma = 0.231641900,
+        a1 = 0.319381530,
+        a2 = -0.356563782,
+        a3 = 1.781477973,
+        a4 = -1.821255978,
+        a5 = 1.330274429;
+    double x = Math.abs(z);
+    double t = 1 / (1 + gamma * x);
+    double n =
+        1
+            - (1 / (Math.sqrt(2 * Math.PI)) * Math.exp(-z * z / 2))
+                * (a1 * t
+                    + a2 * Math.pow(t, 2)
+                    + a3 * Math.pow(t, 3)
+                    + a4 * Math.pow(t, 4)
+                    + a5 * Math.pow(t, 5));
+    if (z < 0) return 1.0 - n;
+    return n;
+  }
+
+  public static long lcm(long number1, long number2) {
+    if (number1 == 0 || number2 == 0) {
+      return 0;
+    }
+    long absNumber1 = Math.abs(number1);
+    long absNumber2 = Math.abs(number2);
+    long absHigherNumber = Math.max(absNumber1, absNumber2);
+    long absLowerNumber = Math.min(absNumber1, absNumber2);
+    long lcm = absHigherNumber;
+    while (lcm % absLowerNumber != 0) {
+      lcm += absHigherNumber;
+    }
+    return lcm;
+  }
+
+  public static int countOnesForByte(byte x) {
+    return ((x >> 7) & 1)
+        + ((x >> 6) & 1)
+        + ((x >> 5) & 1)
+        + ((x >> 4) & 1)
+        + ((x >> 3) & 1)
+        + ((x >> 2) & 1)
+        + ((x >> 1) & 1)
+        + (x & 1);
+  }
+
+  public static int firstZero(byte x) {
+    for (int i = 0; i <= 7; i++) {
+      if (((x >> i) & 1) == 0) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
+  public static int lastZero(byte x) {
+    for (int i = 7; i >= 0; i--) {
+      if (((x >> i) & 1) == 0) {
+        return i;
+      }
+    }
+    return -1;
+  }
+}

Reply via email to