This is an automated email from the ASF dual-hosted git repository.

leirui pushed a commit to branch research/area-visualization
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit e6bd68ed6a2419bf2b0059d38d31343ae55082cd
Author: Lei Rui <[email protected]>
AuthorDate: Tue Jan 28 07:28:38 2025 +0800

    rdp
---
 server/pom.xml                                     |   6 +-
 server/sample_minmax-jar-with-dependencies.jar     | Bin 40761381 -> 40768753 
bytes
 ...es.jar => sample_rdp-jar-with-dependencies.jar} | Bin 40761381 -> 40768959 
bytes
 .../java/org/apache/iotdb/db/query/eBUG/Rdp.java   | 286 +++++++++++++++++++++
 .../apache/iotdb/db/query/eBUG/sample_MinMax.java  |   3 +-
 .../eBUG/{sample_MinMax.java => sampled_Rdp.java}  |  48 ++--
 6 files changed, 318 insertions(+), 25 deletions(-)

diff --git a/server/pom.xml b/server/pom.xml
index 1808c5f6791..998fcc8f41f 100644
--- a/server/pom.xml
+++ b/server/pom.xml
@@ -287,13 +287,15 @@
             <plugin>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <configuration>
-                    <finalName>sample_minmax</finalName>
+                    <finalName>sample_rdp</finalName>
+                    <!--                    
<finalName>sample_minmax</finalName>-->
                     <!--                    
<finalName>sample_fsw</finalName>-->
                     <!--                    
<finalName>sample_eBUG</finalName>-->
                     <!--                    
<finalName>sample_BUYdiff</finalName>-->
                     <archive>
                         <manifest>
-                            
<mainClass>org.apache.iotdb.db.query.eBUG.sample_MinMax</mainClass>
+                            
<mainClass>org.apache.iotdb.db.query.eBUG.sampled_Rdp</mainClass>
+                            <!--                            
<mainClass>org.apache.iotdb.db.query.eBUG.sample_MinMax</mainClass>-->
                             <!--                            
<mainClass>org.apache.iotdb.db.query.eBUG.sample_FSW</mainClass>-->
                             <!--                            
<mainClass>org.apache.iotdb.db.query.eBUG.sample_eBUG</mainClass>-->
                             <!--                            
<mainClass>org.apache.iotdb.db.query.eBUG.sample_bottomUpYdiff</mainClass>-->
diff --git a/server/sample_minmax-jar-with-dependencies.jar 
b/server/sample_minmax-jar-with-dependencies.jar
index cd09c6c3bbf..1cf9f21e3f1 100644
Binary files a/server/sample_minmax-jar-with-dependencies.jar and 
b/server/sample_minmax-jar-with-dependencies.jar differ
diff --git a/server/sample_minmax-jar-with-dependencies.jar 
b/server/sample_rdp-jar-with-dependencies.jar
similarity index 99%
copy from server/sample_minmax-jar-with-dependencies.jar
copy to server/sample_rdp-jar-with-dependencies.jar
index cd09c6c3bbf..4f69e1781d5 100644
Binary files a/server/sample_minmax-jar-with-dependencies.jar and 
b/server/sample_rdp-jar-with-dependencies.jar differ
diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Rdp.java 
b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Rdp.java
new file mode 100644
index 00000000000..64cfc12e889
--- /dev/null
+++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Rdp.java
@@ -0,0 +1,286 @@
+package org.apache.iotdb.db.query.eBUG;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.Stack;
+
+public class Rdp {
+  // Ramer–Douglas–Peucker algorithm (RDP)
+  // top-down, for min perpendicular distance
+  // worst case complexity n2
+  // best case complexity nlogn
+
+  public static double point_line_distance(Point point, Point lineStart, Point 
lineEnd) {
+    // 计算点到直线的垂直正交距离
+    // (x0,y0)到直线(经过(x1,y1)、(x2,y2)点的直线)的正交距离
+    double x0 = point.x;
+    double y0 = point.y;
+    double x1 = lineStart.x;
+    double y1 = lineStart.y;
+    double x2 = lineEnd.x;
+    double y2 = lineEnd.y;
+
+    double numerator = Math.abs((x2 - x1) * (y1 - y0) - (x1 - x0) * (y2 - y1));
+    double denominator = Math.sqrt(Math.pow(x2 - x1, 2) + Math.pow(y2 - y1, 
2));
+    return numerator / denominator;
+  }
+
+  //  public static List<Point> reducePoints(List<Point> points, double 
epsilon, Object... kwargs) {
+  //    if (points == null || points.size() < 3) {
+  //      return points;
+  //    }
+  //
+  //    // 找到距离最远的点
+  //    double maxDistance = 0;
+  //    int index = 0;
+  //    Point start = points.get(0);
+  //    Point end = points.get(points.size() - 1);
+  //
+  //    for (int i = 1; i < points.size() - 1; i++) {
+  //      double distance = point_line_distance(points.get(i), start, end);
+  //      if (distance > maxDistance) {
+  //        maxDistance = distance;
+  //        index = i;
+  //      }
+  //    }
+  //
+  //    List<Point> result = new ArrayList<>();
+  //    if (maxDistance > epsilon) {
+  //      List<Point> firstPart = reducePoints(points.subList(0, index + 1), 
epsilon);
+  //      List<Point> secondPart = reducePoints(points.subList(index, 
points.size()), epsilon);
+  //
+  //      // 合并结果
+  //      result.addAll(firstPart.subList(0, firstPart.size() - 1)); // 
避免重复添加中间点
+  //      result.addAll(secondPart);
+  //    } else {
+  //      // 如果最远距离小于阈值,则只保留起点和终点
+  //      result.add(start);
+  //      result.add(end);
+  //    }
+  //
+  //    return result;
+  //  }
+
+  /**
+   * 使用迭代实现 RDP 算法。避免递归深度过大。
+   *
+   * @param points 点集
+   * @param epsilon 简化阈值
+   * @return 简化后的点集
+   */
+  public static List<Point> reducePoints(List<Point> points, double epsilon, 
Object... kwargs) {
+    if (points.size() <= 2) {
+      return points;
+    }
+
+    // 使用栈模拟递归
+    Stack<int[]> stack = new Stack<>();
+    stack.push(new int[] {0, points.size() - 1});
+
+    // 用于标记需要保留的点
+    boolean[] keep = new boolean[points.size()];
+    keep[0] = true;
+    keep[points.size() - 1] = true;
+
+    while (!stack.isEmpty()) {
+      int[] range = stack.pop();
+      int start = range[0];
+      int end = range[1];
+
+      // 找到距离最远的点
+      double maxDistance = 0;
+      int index = start;
+      Point startPoint = points.get(start);
+      Point endPoint = points.get(end);
+
+      for (int i = start + 1; i < end; i++) {
+        double distance = point_line_distance(points.get(i), startPoint, 
endPoint);
+        if (distance > maxDistance) {
+          maxDistance = distance;
+          index = i;
+        }
+      }
+
+      // 如果最远距离大于阈值,则继续分割
+      if (maxDistance > epsilon) {
+        keep[index] = true; // 保留当前点
+        stack.push(new int[] {start, index});
+        stack.push(new int[] {index, end});
+      }
+    }
+
+    // 构建简化后的点集
+    List<Point> result = new ArrayList<>();
+    for (int i = 0; i < points.size(); i++) {
+      if (keep[i]) {
+        result.add(points.get(i));
+      }
+    }
+
+    return result;
+  }
+
+  public static Object[] normalizePoints(List<Point> points) {
+    // 提取 x 和 y 值
+    double[] xValues = points.stream().mapToDouble(p -> p.x).toArray();
+    double[] yValues = points.stream().mapToDouble(p -> p.y).toArray();
+
+    // 计算 x 和 y 的最小值和最大值
+    double xMin = Double.MAX_VALUE, xMax = Double.MIN_VALUE;
+    double yMin = Double.MAX_VALUE, yMax = Double.MIN_VALUE;
+
+    for (double x : xValues) {
+      if (x < xMin) xMin = x;
+      if (x > xMax) xMax = x;
+    }
+    for (double y : yValues) {
+      if (y < yMin) yMin = y;
+      if (y > yMax) yMax = y;
+    }
+
+    // 标准化
+    List<Point> normalizedPoints = new ArrayList<>();
+    for (Point p : points) {
+      double xNormalized = (xMax != xMin) ? (p.x - xMin) / (xMax - xMin) : 0;
+      double yNormalized = (yMax != yMin) ? (p.y - yMin) / (yMax - yMin) : 0;
+      normalizedPoints.add(new Point(xNormalized, yNormalized));
+    }
+
+    return new Object[] {normalizedPoints, xMin, xMax, yMin, yMax};
+  }
+
+  public static List<Point> denormalizePoints(List<Point> points, double[] 
originalScale) {
+    double xMin = originalScale[0];
+    double xMax = originalScale[1];
+    double yMin = originalScale[2];
+    double yMax = originalScale[3];
+
+    List<Point> denormalizedPoints = new ArrayList<>();
+    for (Point p : points) {
+      double xOriginal = p.x * (xMax - xMin) + xMin;
+      double yOriginal = p.y * (yMax - yMin) + yMin;
+      denormalizedPoints.add(new Point(xOriginal, yOriginal));
+    }
+
+    return denormalizedPoints;
+  }
+
+  public static void main(String[] args) throws IOException {
+    Random rand = new Random(10);
+    String input = "D:\\datasets\\regular\\tmp2.csv";
+    boolean hasHeader = true;
+    int timeIdx = 0;
+    int valueIdx = 1;
+    int N = 1000_0000;
+    List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, 
valueIdx, N);
+    //        Polyline polyline = new Polyline();
+    //        for (int i = 0; i < N; i += 1) {
+    //            double v = rand.nextInt(1000);
+    //            polyline.addVertex(new Point(i, v));
+    //        }
+    //        List<Point> points = polyline.getVertices();
+    try (FileWriter writer = new FileWriter("raw.csv")) {
+      // 写入CSV头部
+      writer.append("x,y,z\n");
+
+      // 写入每个点的数据
+      for (Point point : points) {
+        writer.append(point.x + "," + point.y + "," + point.z + "\n");
+      }
+      System.out.println(points.size() + " Data has been written");
+    } catch (IOException e) {
+      System.out.println("Error writing to CSV file: " + e.getMessage());
+    }
+
+    // 标准化
+    Object[] normalizedData = normalizePoints(points);
+    List<Point> normalizedPoints = (List<Point>) normalizedData[0];
+    double xMin = (double) normalizedData[1];
+    double xMax = (double) normalizedData[2];
+    double yMin = (double) normalizedData[3];
+    double yMax = (double) normalizedData[4];
+
+    int m = 10;
+    double tolerantPts = 0;
+    double epsilon = Tool.getParam(normalizedPoints, m, Rdp::reducePoints, 
tolerantPts);
+    //        double epsilon = 0.001;
+
+    long startTime = System.currentTimeMillis();
+    List<Point> sampled_tmp = reducePoints(normalizedPoints, epsilon);
+    long endTime = System.currentTimeMillis();
+    System.out.println("Time taken: " + (endTime - startTime) + "ms");
+
+    // 还原到原始尺度
+    double[] originalScale = {xMin, xMax, yMin, yMax};
+    List<Point> sampled = denormalizePoints(sampled_tmp, originalScale);
+
+    //        for (Point p : sampled) {
+    //            System.out.println(p);
+    //        }
+    System.out.println(sampled.size());
+
+    try (PrintWriter writer = new PrintWriter(new File("output.csv"))) {
+      // 写入字符串
+      for (int i = 0; i < sampled.size(); i++) {
+        writer.println(sampled.get(i).x + "," + sampled.get(i).y);
+      }
+
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    }
+  }
+
+  //    public static void reducePoints(List<Point> points, double epsilon, 
int s, int e,
+  // List<Point> resultList) {
+  //        double dmax = 0;
+  //        int index = 0;
+  //
+  //        final int start = s;
+  //        final int end = e - 1;
+  //        for (int i = start + 1; i < end; i++) {
+  //            // Point
+  //            final double px = list.get(i)[0];
+  //            final double py = list.get(i)[1];
+  //            // Start
+  //            final double vx = list.get(start)[0];
+  //            final double vy = list.get(start)[1];
+  //            // End
+  //            final double wx = list.get(end)[0];
+  //            final double wy = list.get(end)[1];
+  //            final double d = perpendicularDistance(px, py, vx, vy, wx, wy);
+  //            if (d > dmax) {
+  //                index = i;
+  //                dmax = d;
+  //            }
+  //        }
+  //        // If max distance is greater than epsilon, recursively simplify
+  //        if (dmax > epsilon) {
+  //            // Recursive call
+  //            douglasPeucker(list, s, index, epsilon, resultList);
+  //            douglasPeucker(list, index, e, epsilon, resultList);
+  //        } else {
+  //            if ((end - start) > 0) {
+  //                resultList.add(list.get(start));
+  //                resultList.add(list.get(end));
+  //            } else {
+  //                resultList.add(list.get(start));
+  //            }
+  //        }
+  //    }
+
+  //    /**
+  //     * Given a curve composed of line segments find a similar curve with 
fewer points.
+  //     *
+  //     * @param list    List of Double[] points (x,y)
+  //     * @param epsilon Distance dimension
+  //     * @return Similar curve with fewer points
+  //     */
+  //    public static final List<Double[]> douglasPeucker(List<Double[]> list, 
double epsilon) {
+  //        final List<Double[]> resultList = new ArrayList<Double[]>();
+  //        douglasPeucker(list, 0, list.size(), epsilon, resultList);
+  //        return resultList;
+  //    }
+
+}
diff --git 
a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java 
b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java
index 767b7f7151e..5d841a62e87 100644
--- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java
+++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java
@@ -49,7 +49,8 @@ public class sample_MinMax {
     List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, 
valueIdx, N);
 
     String outputFile =
-        generateOutputFileName(input, outDir, "-" + bucketType + "-n" + 
points.size() + "-m" + m);
+        generateOutputFileName(
+            input, outDir, "-MinMax-" + bucketType + "-n" + points.size() + 
"-m" + m);
     System.out.println("Output file: " + outputFile); // do not modify this 
hint string log
 
     // 采样
diff --git 
a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java 
b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sampled_Rdp.java
similarity index 60%
copy from server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java
copy to server/src/main/java/org/apache/iotdb/db/query/eBUG/sampled_Rdp.java
index 767b7f7151e..834f84d09b7 100644
--- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java
+++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sampled_Rdp.java
@@ -5,13 +5,16 @@ import java.io.FileWriter;
 import java.io.IOException;
 import java.util.List;
 
+import static org.apache.iotdb.db.query.eBUG.Rdp.denormalizePoints;
+import static org.apache.iotdb.db.query.eBUG.Rdp.normalizePoints;
 import static org.apache.iotdb.db.query.eBUG.Tool.generateOutputFileName;
+import static org.apache.iotdb.db.query.eBUG.Tool.getParam;
 
-public class sample_MinMax {
+public class sampled_Rdp {
   public static void main(String[] args) throws IOException {
     if (args.length < 7) {
       System.out.println(
-          "Usage: Please provide arguments: 
inputFilePath,hasHeader,timeIdx,valueIdx,N,m,bucketType,(outDir)");
+          "Usage: Please provide arguments: 
inputFilePath,hasHeader,timeIdx,valueIdx,N,m,tolerantRatio,(outDir)");
     }
     String input = args[0];
     boolean hasHeader = Boolean.parseBoolean(args[1]);
@@ -19,15 +22,7 @@ public class sample_MinMax {
     int valueIdx = Integer.parseInt(args[3]);
     int N = Integer.parseInt(args[4]); // N<=0表示读全部行,N>0表示读最多N行
     int m = Integer.parseInt(args[5]);
-    String bucketTypeStr = args[6];
-    MinMax.fixedBUCKETtype bucketType;
-    if (bucketTypeStr.equals("width")) {
-      bucketType = MinMax.fixedBUCKETtype.width;
-    } else if (bucketTypeStr.equals("frequency")) {
-      bucketType = MinMax.fixedBUCKETtype.frequency;
-    } else {
-      throw new IOException("please input fixed bucket type as 
width/frequency");
-    }
+    double tolerantRatio = Double.parseDouble(args[6]); // 
查找参数epsilon以使得输出采样点数接近m的程度,比如0.01
     String outDir;
     if (args.length > 7) {
       outDir = args[7]; // 表示输出文件保存到指定的文件夹
@@ -42,33 +37,42 @@ public class sample_MinMax {
     System.out.println("Value index: " + valueIdx);
     System.out.println("N: " + N);
     System.out.println("m: " + m);
-    System.out.println("bucketType: " + bucketType);
+    System.out.println("tolerantRatio: " + tolerantRatio);
     System.out.println("outDir: " + outDir);
 
     // 读取原始序列
     List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, 
valueIdx, N);
-
     String outputFile =
-        generateOutputFileName(input, outDir, "-" + bucketType + "-n" + 
points.size() + "-m" + m);
-    System.out.println("Output file: " + outputFile); // do not modify this 
hint string log
+        generateOutputFileName(input, outDir, "-Rdp" + "-n" + points.size() + 
"-m" + m);
+    System.out.println("Output file: " + outputFile);
+
+    // 标准化,避免非常大的时间戳主导perpendicular distance
+    Object[] normalizedData = normalizePoints(points);
+    List<Point> normalizedPoints = (List<Point>) normalizedData[0];
+    double xMin = (double) normalizedData[1];
+    double xMax = (double) normalizedData[2];
+    double yMin = (double) normalizedData[3];
+    double yMax = (double) normalizedData[4];
+
+    // 查找epsilon参数使得采样点数接近m
+    double epsilon = getParam(normalizedPoints, m, Rdp::reducePoints, m * 
tolerantRatio);
 
     // 采样
-    List<Point> results;
     long startTime = System.currentTimeMillis();
-    if (bucketType == MinMax.fixedBUCKETtype.width) {
-      results = MinMax.reducePoints_equalWidthBucket(points, m, false);
-    } else {
-      results = MinMax.reducePoints_equalFrequencyBucket(points, m, false);
-    }
+    List<Point> results_tmp = Rdp.reducePoints(normalizedPoints, epsilon);
     long endTime = System.currentTimeMillis();
 
+    // 还原到原始尺度
+    double[] originalScale = {xMin, xMax, yMin, yMax};
+    List<Point> results = denormalizePoints(results_tmp, originalScale);
+
     System.out.println("result point number: " + results.size());
     System.out.println(
         "n="
             + points.size()
             + ", m="
             + m
-            + ", Time taken to reduce points: " // do not modify this hint 
string log
+            + ", Time taken to reduce points: "
             + (endTime - startTime)
             + "ms");
 

Reply via email to