This is an automated email from the ASF dual-hosted git repository. leirui pushed a commit to branch research/area-visualization in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 5d649f12ea6b4d480cb93b3fbbd1a62ed830f783 Author: Lei Rui <[email protected]> AuthorDate: Tue Jan 28 02:11:14 2025 +0800 bottomUpYdiff --- server/pom.xml | 4 +- ...ar => sample_BUYdiff-jar-with-dependencies.jar} | Bin 38716084 -> 38729082 bytes server/sample_eBUG-jar-with-dependencies.jar | Bin 38716020 -> 38729070 bytes server/sample_fsw-jar-with-dependencies.jar | Bin 38715995 -> 38729071 bytes .../apache/iotdb/db/query/eBUG/BottomUpYdiff.java | 245 +++++++ .../java/org/apache/iotdb/db/query/eBUG/DNSL1.java | 154 ++--- .../java/org/apache/iotdb/db/query/eBUG/DP.java | 513 +++++++------- .../java/org/apache/iotdb/db/query/eBUG/SWAB.java | 428 ++++++------ .../org/apache/iotdb/db/query/eBUG/Test10.java | 95 ++- .../java/org/apache/iotdb/db/query/eBUG/Tool.java | 754 ++++++++++----------- ...e_bottomUpL2.java => sample_bottomUpYdiff.java} | 31 +- 11 files changed, 1228 insertions(+), 996 deletions(-) diff --git a/server/pom.xml b/server/pom.xml index b0b6be3c539..8ea227c6cd3 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -283,12 +283,12 @@ <configuration> <!-- <finalName>sample_fsw</finalName>--> <finalName>sample_eBUG</finalName> - <!-- <finalName>sample_BUL2</finalName>--> + <!-- <finalName>sample_BUYdiff</finalName>--> <archive> <manifest> <!-- <mainClass>org.apache.iotdb.db.query.eBUG.sample_FSW</mainClass>--> <mainClass>org.apache.iotdb.db.query.eBUG.sample_eBUG</mainClass> - <!-- <mainClass>org.apache.iotdb.db.query.eBUG.sample_bottomUpL2</mainClass>--> + <!-- <mainClass>org.apache.iotdb.db.query.eBUG.sample_bottomUpYdiff</mainClass>--> </manifest> </archive> <descriptorRefs> diff --git a/server/sample_BUL2-jar-with-dependencies.jar b/server/sample_BUYdiff-jar-with-dependencies.jar similarity index 96% rename from server/sample_BUL2-jar-with-dependencies.jar rename to server/sample_BUYdiff-jar-with-dependencies.jar index d592875251c..d7360b7038a 100644 Binary files a/server/sample_BUL2-jar-with-dependencies.jar and b/server/sample_BUYdiff-jar-with-dependencies.jar differ diff --git a/server/sample_eBUG-jar-with-dependencies.jar b/server/sample_eBUG-jar-with-dependencies.jar index 0df972fa4a5..f442da1afb8 100644 Binary files a/server/sample_eBUG-jar-with-dependencies.jar and b/server/sample_eBUG-jar-with-dependencies.jar differ diff --git a/server/sample_fsw-jar-with-dependencies.jar b/server/sample_fsw-jar-with-dependencies.jar index c1981d2a984..3ca5857146d 100644 Binary files a/server/sample_fsw-jar-with-dependencies.jar and b/server/sample_fsw-jar-with-dependencies.jar differ diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/BottomUpYdiff.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/BottomUpYdiff.java new file mode 100644 index 00000000000..6090415241f --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/BottomUpYdiff.java @@ -0,0 +1,245 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.*; + +public class BottomUpYdiff { + + public static List<Point> reducePoints( + List<Point> points, int m, DP.ERRORtype errorType, boolean debug) throws IOException { + if (m <= 2) { + throw new IOException("please make m>2"); + } + // 直接控制采样点数 + int total = points.size(); + if (total < 3) { + return points; // 不足 3 个点无法形成三角形 + } + + // 每个triangle对应两个相邻的待合并的分段 + int nTriangles = total - 2; + Triangle[] triangles = new Triangle[nTriangles]; + + // 创建所有三角形并计算初始面积 + points.get(0).prev = null; + points.get(0).next = points.get(1); + // points.get(0).index = 0; + points.get(total - 1).next = null; + points.get(total - 1).prev = points.get(total - 2); + // points.get(total - 1).index = points.size() - 1; + for (int i = 1; i < total - 1; i++) { + int index1 = i - 1, index2 = i, index3 = i + 1; + + double mc = DP.joint_segment_error(points, index1, index3, errorType); + + triangles[i - 1] = new Triangle(index1, index2, index3, mc); + + // 初始化点的状态 用于最后在线采样结果顺序输出未淘汰点 + points.get(i).prev = points.get(i - 1); + points.get(i).next = points.get(i + 1); + // points.get(i).index = i; // for swab usage + } + + // 设置三角形的前后关系 + for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 + triangles[i].prev = (i == 0 ? null : triangles[i - 1]); + triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); + } + + // 使用优先队列构建 minHeap + PriorityQueue<Triangle> triangleHeap = + new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); + Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? + + int remainNonTerminalPoint = m - 2; + int fakeCnt = 0; + // while (!triangleHeap.isEmpty() && triangleHeap.peek().area < maxError) { // TODO + while (triangleHeap.size() - fakeCnt > remainNonTerminalPoint) { + // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 + // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 + Triangle tri = triangleHeap.poll(); // O(logn) + + // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity + // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 + if (tri.isDeleted) { + fakeCnt--; // 取出了一个被标记删除点 + if (debug) { + System.out.println( + ">>>bottom-up, remaining " + + triangleHeap.size() + + " triangles (including those marked for removal)"); + } + continue; + } + + // 真正的淘汰点 + // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 + if (debug) { + System.out.println("(1) eliminate " + points.get(tri.indices[1])); + } + points.get(tri.indices[1]).markEliminated(); + + // 更新相邻三角形 + if (tri.prev != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + + // 1. 处理旧的tri.prev被标记删除的事情(角色替代) + // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) complexity! + tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 + + Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection + // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) + + // 2. 处理pi被淘汰引起tri.prev被更新的事情 + // 前一个三角形连到后一个三角形 + tri.prev.next = + tri.next; // ATTENTION!!!: 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! + tri.prev.indices[2] = tri.indices[2]; + + double mc = + DP.joint_segment_error(points, tri.prev.indices[0], tri.prev.indices[2], errorType); + + tri.prev.area = mc; + if (debug) { + System.out.println( + "(2) updating point on the left " + + points.get(tri.prev.indices[1]) + + ", ranging [" + + tri.prev.indices[0] + + "," + + tri.prev.indices[2] + + "]"); + } + + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add来显式重新插入修改后的元素 + triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } + + if (tri.next != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + + // 1. 处理旧的tri.next被标记删除的事情(角色替代) + // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) complexity + tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 + + Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection + // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) + + if (tri.prev != null) { + tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! + } + + // 2. 处理pi被淘汰引起tri.next被更新的事情 + tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 + tri.next.indices[0] = tri.indices[0]; + + double mc = + DP.joint_segment_error(points, tri.next.indices[0], tri.next.indices[2], errorType); + + tri.next.area = mc; + if (debug) { + System.out.println( + "(3) updating point on the right " + + points.get(tri.next.indices[1]) + + ", ranging [" + + tri.next.indices[0] + + "," + + tri.next.indices[2] + + "]"); + } + + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add 来显式重新插入修改后的元素 + triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } + if (debug) { + System.out.println( + ">>>bottom-up, remaining " + + triangleHeap.size() + + " triangles (including those marked for removal)"); + } + } + + List<Point> onlineSampled = new ArrayList<>(); + Point start = points.get(0); + Point end = points.get(points.size() - 1); + while (start + != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 + onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity + start = start.next; // when e=0, only traversing three points pa pi pb + } + onlineSampled.add(end); + return onlineSampled; + } + + public static void main(String[] args) throws IOException { + Random rand = new Random(10); + String input = "D:\\datasets\\regular\\tmp2.csv"; + boolean hasHeader = false; + int timeIdx = 0; + int valueIdx = 1; + int N = 1000_00; + // List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + List<Point> points = polyline.getVertices(); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (int i = 0; i < points.size(); i++) { + Point point = points.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(points.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + + int m = 100; + + long startTime = System.currentTimeMillis(); + List<Point> sampled = reducePoints(points, m, DP.ERRORtype.L_infy, false); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime - startTime) + "ms"); + // for (Point p : sampled) { + // System.out.println(p); + // } + System.out.println(sampled.size()); + + // startTime = System.currentTimeMillis(); + //// List<Point> sampled2 = eBUG.buildEffectiveArea(points, 100000000, false, m); + // List<Point> sampled2 = SWAB.seg_bottomUp_m_withTimestamps(points, m, null, false); + // endTime = System.currentTimeMillis(); + // System.out.println("Time taken: " + (endTime - startTime) + "ms"); + //// for (Point p : sampled2) { + //// System.out.println(p); + //// } + // System.out.println(sampled2.size()); + // for (int i = 0; i < sampled2.size(); i++) { + // if (sampled.get(i).x != sampled2.get(i).x) { + // throw new IOException("wrong!"); + // } + // } + + // try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // // 写入字符串 + // for (int i = 0; i < sampled.size(); i++) { + // writer.println(sampled.get(i).x + "," + sampled.get(i).y); + // } + // + // } catch (FileNotFoundException e) { + // e.printStackTrace(); + // } + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/DNSL1.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DNSL1.java index df2c09d35ab..be4ffe2d6f1 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/DNSL1.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DNSL1.java @@ -9,89 +9,91 @@ import java.util.Random; import static org.apache.iotdb.db.query.eBUG.DP.dynamic_programming; public class DNSL1 { - // 默认使用linear interpolation连接分段首尾点来近似 - // 默认使用L1 error - // 使用divide and conquer算法 - public static List<Point> reducePoints(List<Point> points, int m, boolean debug) { - int n = points.size(); - int k = m - 1; // 分段数 - int intervalPts = (int) Math.pow((double) n / k, (double) 2 / 3); // divide batch点数 + // 默认使用linear interpolation连接分段首尾点来近似 + // 默认使用L1 error + // 使用divide and conquer算法 + public static List<Point> reducePoints(List<Point> points, int m, boolean debug) { + int n = points.size(); + int k = m - 1; // 分段数 + int intervalPts = (int) Math.pow((double) n / k, (double) 2 / 3); // divide batch点数 - if (debug) { - System.out.println("interval point length=" + intervalPts); - } + if (debug) { + System.out.println("interval point length=" + intervalPts); + } - // divide into intervalPts equal-length intervals - List<Point> allSampledPoints = new ArrayList<>(); - for (int start = 0; start < n; start += intervalPts) { - int end = Math.min(start + intervalPts, n); - List<Point> batch = points.subList(start, end); // 左闭右开 - if (debug) { - System.out.println("Processing batch: [" + start + "," + end + ")"); - } - List<Point> sampledForBatch = dynamic_programming(batch, k, DP.ERROR.L1, false); - allSampledPoints.addAll(sampledForBatch); // 把上一步的结果加到大的容器里 - } + // divide into intervalPts equal-length intervals + List<Point> allSampledPoints = new ArrayList<>(); + for (int start = 0; start < n; start += intervalPts) { + int end = Math.min(start + intervalPts, n); + List<Point> batch = points.subList(start, end); // 左闭右开 + if (debug) { + System.out.println("Processing batch: [" + start + "," + end + ")"); + } + List<Point> sampledForBatch = dynamic_programming(batch, k, DP.ERRORtype.L1, false); + allSampledPoints.addAll(sampledForBatch); // 把上一步的结果加到大的容器里 + } - // 在大的容器上最后执行DP.dynamic_programming得到最后的m个采样点 - if (debug) { - System.out.println("number of points from batch sampling: " + allSampledPoints.size()); - } - List<Point> finalSampledPoints = dynamic_programming(allSampledPoints, k, DP.ERROR.L1, false); - if (debug) { - System.out.println("result point number: " + finalSampledPoints.size()); - } - return finalSampledPoints; + // 在大的容器上最后执行DP.dynamic_programming得到最后的m个采样点 + if (debug) { + System.out.println("number of points from batch sampling: " + allSampledPoints.size()); + } + List<Point> finalSampledPoints = + dynamic_programming(allSampledPoints, k, DP.ERRORtype.L1, false); + if (debug) { + System.out.println("result point number: " + finalSampledPoints.size()); } + return finalSampledPoints; + } - public static void main(String[] args) { - Random rand = new Random(10); - String input = "raw.csv"; - boolean hasHeader = true; - int timeIdx = 0; - int valueIdx = 1; - int N = 1000; -// List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); - Polyline polyline = new Polyline(); - for (int i = 0; i < N; i += 1) { - double v = rand.nextInt(1000); - polyline.addVertex(new Point(i, v)); - } - List<Point> points = polyline.getVertices(); - try (FileWriter writer = new FileWriter("raw.csv")) { - // 写入CSV头部 - writer.append("x,y,z\n"); + public static void main(String[] args) { + Random rand = new Random(10); + String input = "raw.csv"; + boolean hasHeader = true; + int timeIdx = 0; + int valueIdx = 1; + int N = 1000; + // List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + List<Point> points = polyline.getVertices(); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); - // 写入每个点的数据 - for (Point point : points) { - writer.append(point.x + "," + point.y + "," + point.z + "\n"); - } - System.out.println(points.size() + " Data has been written"); - } catch (IOException e) { - System.out.println("Error writing to CSV file: " + e.getMessage()); - } + // 写入每个点的数据 + for (Point point : points) { + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(points.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } -// int m = (int) Math.pow(points.size(), 0.5); - int m = 10; + // int m = (int) Math.pow(points.size(), 0.5); + int m = 10; -// long startTime = System.currentTimeMillis(); -// List<Point> sampled = dynamic_programming(points, m - 1, DP.ERROR.area, false); -// long endTime = System.currentTimeMillis(); -// System.out.println("Time taken: " + (endTime - startTime) + "ms"); -// for (Point p : sampled) { -// System.out.println(p); -// } -// System.out.println(sampled.size()); + // long startTime = System.currentTimeMillis(); + // List<Point> sampled = dynamic_programming(points, m - 1, DP.ERROR.area, false); + // long endTime = System.currentTimeMillis(); + // System.out.println("Time taken: " + (endTime - startTime) + "ms"); + // for (Point p : sampled) { + // System.out.println(p); + // } + // System.out.println(sampled.size()); - long startTime2 = System.currentTimeMillis(); -// System.out.println(points.size() * m / (Math.pow(points.size() * 1.0 / (m - 1), 2 / 3.0))); -// System.out.println(10000000 * (2 / (Math.pow(10000000 * 1.0 / (2 - 1), 2 / 3.0)))); - List<Point> sampled2 = reducePoints(points, m, true); - long endTime2 = System.currentTimeMillis(); - System.out.println("Time taken: " + (endTime2 - startTime2) + "ms"); -// for (Point p : sampled2) { -// System.out.println(p); -// } -// System.out.println(sampled2.size()); - } + long startTime2 = System.currentTimeMillis(); + // System.out.println(points.size() * m / (Math.pow(points.size() * 1.0 / (m - 1), 2 / + // 3.0))); + // System.out.println(10000000 * (2 / (Math.pow(10000000 * 1.0 / (2 - 1), 2 / 3.0)))); + List<Point> sampled2 = reducePoints(points, m, true); + long endTime2 = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime2 - startTime2) + "ms"); + // for (Point p : sampled2) { + // System.out.println(p); + // } + // System.out.println(sampled2.size()); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java index 62c81cd10ea..1cd10c19fcc 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java @@ -10,284 +10,305 @@ import java.util.*; public class DP { // Dynamic-Programming - // Enum to represent error types - public enum ERROR { - L1, L2, L_infy, area - } - - // precomputing error measures of all possible segments, e.g., ad2 table - public static double[][] prepareCostTable(List<Point> points, ERROR errorType, boolean debug) { - int N = points.size(); - double[][] dists = new double[N][N]; // 默认0,对角线元素已经都是0了 - // 一个 double[47700][47700] 矩阵大约需要 16.94 GB 的内存。 - - // 外层循环i:range长度=right boundary-left boundary - // 内层循环j: 矩阵逐行 - for (int i = 1; i < N; i++) { // TODO - for (int j = 0; j < N - i; j++) { - // j = left boundary, r = right boundary, 左闭右闭 - int r = i + j; // r<N - if (debug) { - System.out.println(">>>> i=" + i + ", j=" + j + ", r=" + r); - } + // Enum to represent error types + public enum ERRORtype { + L1, + L2, + L_infy, + area + } - // lx=j,rx=r, 从lx=j到rx=r(左闭右闭)的linear interpolation(即连接lx和rx点)的errorType类型的误差 - double mc = joint_segment_error(points, j, r, errorType); - dists[j][r] = mc; - if (debug) { - System.out.println("dists="); - Tool.printArray(dists); - System.out.println("---------------------"); - } - } - } - return dists; - } + // precomputing error measures of all possible segments, e.g., ad2 table + public static double[][] prepareCostTable( + List<Point> points, ERRORtype errorType, boolean debug) { + int N = points.size(); + double[][] dists = new double[N][N]; // 默认0,对角线元素已经都是0了 + // 一个 double[47700][47700] 矩阵大约需要 16.94 GB 的内存。 - // 注意k是分段数,不是采点数 - public static List<Point> dynamic_programming(List<Point> points, int k, ERROR errorType, boolean debug) { - int N = points.size(); - - // 预计算dists(即ad2 table),dists[i][j]代表对子序列i:j左闭右闭直接连接pi和pj的近似误差 - double[][] dists = prepareCostTable(points, errorType, debug); + // 外层循环i:range长度=right boundary-left boundary + // 内层循环j: 矩阵逐行 + for (int i = 1; i < N; i++) { // TODO + for (int j = 0; j < N - i; j++) { + // j = left boundary, r = right boundary, 左闭右闭 + int r = i + j; // r<N if (debug) { - Tool.printArray(dists); - System.out.println("----------------------------------"); + System.out.println(">>>> i=" + i + ", j=" + j + ", r=" + r); } - // 创建dp table。kSegDist[i][j]代表把子序列T[0:j]左闭右闭分成i段(java从0开始计数)的最小误差 - double[][] kSegDist = new double[k][N]; - // Initialize the case k=1 directly from the pre-computed distances - // 注意java从0开始,所以是0 index第一行代表分段数1 - System.arraycopy(dists[0], 0, kSegDist[0], 0, kSegDist[0].length); + // lx=j,rx=r, 从lx=j到rx=r(左闭右闭)的linear interpolation(即连接lx和rx点)的errorType类型的误差 + double mc = joint_segment_error(points, j, r, errorType); + dists[j][r] = mc; if (debug) { - System.out.println("k_seg_dist="); - Tool.printArray(kSegDist); - } - - // 创建path table。记录子问题的解。kSegPath[i][j]代表把T[0:j]分成i段这个问题的最优子问题的位置 - // 子问题是把T[0:kSegPath[i][j]]分成i-1段,以及最后一段T[kSegPath[i][j]:end] - int[][] kSegPath = new int[k][N]; - // 第一行只分一段,所以不管终点是哪个(列),左边的闭合起点都是0 - // 虽然java数组默认初始是0,但是这里还是显式赋值 - Arrays.fill(kSegPath[0], 0); - if (debug) { - System.out.println("k_seg_path="); - Tool.printArray(kSegPath); + System.out.println("dists="); + Tool.printArray(dists); + System.out.println("---------------------"); } + } + } + return dists; + } - // 外层循环i:分段数,注意python从0开始,所以实际含义是2~k个分段数 - for (int i = 1; i < k; i++) { - // 内层循环j:从第一个点开始到j终点(闭合)的序列 - // 所以含义是:找到从第一个点开始到j终点(闭合)的序列的分成(i+1)段的最佳分段方案(误差最小) - for (int j = 0; j < N; j++) { - // 动态规划 - // 注意linear interpolation不需要单点成为一个分段的情况 - // TODO 误差类型 - // TODO ghost修复 - // 从0:j的序列分成i段的问题:遍历所有可能的子问题组合解 - if (debug) { - System.out.println(">>>分段数i+1" + (i + 1) + ",end pos j=" + j); - } - double[] choices = new double[j + 1]; // java从0开始 - int bestIndex = -1; - double bestVal = Double.MAX_VALUE; // 越小越好 - for (int xtmp = 0; xtmp < j + 1; xtmp++) { - if (errorType == ERROR.L_infy) { - choices[xtmp] = Math.max(kSegDist[i - 1][xtmp], dists[xtmp][j]); - } else { - choices[xtmp] = kSegDist[i - 1][xtmp] + dists[xtmp][j]; - } - if (choices[xtmp] < bestVal) { - bestVal = choices[xtmp]; - bestIndex = xtmp; - } - } - if (debug) { - for (int xtmp = 0; xtmp < j + 1; xtmp++) { // 遍历从 0 到 j 的每个元素 - if (errorType == ERROR.L_infy) { - System.out.printf(" max((k_seg_dist[%d, %d] = %f), (dists[%d, %d] = %f)) --> %f%n", - i - 1, xtmp, kSegDist[i - 1][xtmp], - xtmp, j, dists[xtmp][j], - Math.max(kSegDist[i - 1][xtmp], dists[xtmp][j])); - } else { - System.out.printf(" (k_seg_dist[%d, %d] = %f) + (dists[%d, %d] = %f) --> %f%n", - i - 1, xtmp, kSegDist[i - 1][xtmp], - xtmp, j, dists[xtmp][j], - kSegDist[i - 1][xtmp] + dists[xtmp][j]); - } - } - } + // 注意k是分段数,不是采点数 + public static List<Point> dynamic_programming( + List<Point> points, int k, ERRORtype errorType, boolean debug) { + int N = points.size(); - // Store the sub-problem solution - kSegPath[i][j] = bestIndex; - kSegDist[i][j] = bestVal; + // 预计算dists(即ad2 table),dists[i][j]代表对子序列i:j左闭右闭直接连接pi和pj的近似误差 + double[][] dists = prepareCostTable(points, errorType, debug); + if (debug) { + Tool.printArray(dists); + System.out.println("----------------------------------"); + } - if (debug) { - System.out.println("kSegDist[" + i + "][" + j + "] = " + bestVal); - System.out.println("kSegDist="); - Tool.printArray(kSegDist); - System.out.println("kSegPath="); - Tool.printArray(kSegPath); - } - } - } + // 创建dp table。kSegDist[i][j]代表把子序列T[0:j]左闭右闭分成i段(java从0开始计数)的最小误差 + double[][] kSegDist = new double[k][N]; + // Initialize the case k=1 directly from the pre-computed distances + // 注意java从0开始,所以是0 index第一行代表分段数1 + System.arraycopy(dists[0], 0, kSegDist[0], 0, kSegDist[0].length); + if (debug) { + System.out.println("k_seg_dist="); + Tool.printArray(kSegDist); + } - // 开始回溯构建采样结果 - List<Point> sDs = new ArrayList<>(); // k+1个采样点 - List<Integer> sDs_idx = new ArrayList<>(); - int rhs = N - 1; - sDs.add(points.get(rhs)); // 全局尾点 - sDs_idx.add(rhs); + // 创建path table。记录子问题的解。kSegPath[i][j]代表把T[0:j]分成i段这个问题的最优子问题的位置 + // 子问题是把T[0:kSegPath[i][j]]分成i-1段,以及最后一段T[kSegPath[i][j]:end] + int[][] kSegPath = new int[k][N]; + // 第一行只分一段,所以不管终点是哪个(列),左边的闭合起点都是0 + // 虽然java数组默认初始是0,但是这里还是显式赋值 + Arrays.fill(kSegPath[0], 0); + if (debug) { + System.out.println("k_seg_path="); + Tool.printArray(kSegPath); + } - for (int i = k - 1; i >= 0; i--) { - int lhs = kSegPath[i][rhs]; - sDs.add(points.get(lhs)); - sDs_idx.add(lhs); - rhs = lhs; + // 外层循环i:分段数,注意python从0开始,所以实际含义是2~k个分段数 + for (int i = 1; i < k; i++) { + // 内层循环j:从第一个点开始到j终点(闭合)的序列 + // 所以含义是:找到从第一个点开始到j终点(闭合)的序列的分成(i+1)段的最佳分段方案(误差最小) + for (int j = 0; j < N; j++) { + // 动态规划 + // 注意linear interpolation不需要单点成为一个分段的情况 + // TODO 误差类型 + // TODO ghost修复 + // 从0:j的序列分成i段的问题:遍历所有可能的子问题组合解 + if (debug) { + System.out.println(">>>分段数i+1" + (i + 1) + ",end pos j=" + j); + } + double[] choices = new double[j + 1]; // java从0开始 + int bestIndex = -1; + double bestVal = Double.MAX_VALUE; // 越小越好 + for (int xtmp = 0; xtmp < j + 1; xtmp++) { + if (errorType == ERRORtype.L_infy) { + choices[xtmp] = Math.max(kSegDist[i - 1][xtmp], dists[xtmp][j]); + } else { + choices[xtmp] = kSegDist[i - 1][xtmp] + dists[xtmp][j]; + } + if (choices[xtmp] < bestVal) { + bestVal = choices[xtmp]; + bestIndex = xtmp; + } + } + if (debug) { + for (int xtmp = 0; xtmp < j + 1; xtmp++) { // 遍历从 0 到 j 的每个元素 + if (errorType == ERRORtype.L_infy) { + System.out.printf( + " max((k_seg_dist[%d, %d] = %f), (dists[%d, %d] = %f)) --> %f%n", + i - 1, + xtmp, + kSegDist[i - 1][xtmp], + xtmp, + j, + dists[xtmp][j], + Math.max(kSegDist[i - 1][xtmp], dists[xtmp][j])); + } else { + System.out.printf( + " (k_seg_dist[%d, %d] = %f) + (dists[%d, %d] = %f) --> %f%n", + i - 1, + xtmp, + kSegDist[i - 1][xtmp], + xtmp, + j, + dists[xtmp][j], + kSegDist[i - 1][xtmp] + dists[xtmp][j]); + } + } } - // 反转列表 - Collections.reverse(sDs); - Collections.reverse(sDs_idx); + // Store the sub-problem solution + kSegPath[i][j] = bestIndex; + kSegDist[i][j] = bestVal; if (debug) { - System.out.println(sDs); - System.out.println(">>>>>ad2[][]="); - Tool.printArray(dists); - System.out.println(">>>>>dp[][]="); - Tool.printTransposeArray(kSegDist); - System.out.println(">>>>>path[][]="); - Tool.printTransposeArray(kSegPath); - System.out.println(error(points, sDs_idx, errorType)); + System.out.println("kSegDist[" + i + "][" + j + "] = " + bestVal); + System.out.println("kSegDist="); + Tool.printArray(kSegDist); + System.out.println("kSegPath="); + Tool.printArray(kSegPath); } + } + } + + // 开始回溯构建采样结果 + List<Point> sDs = new ArrayList<>(); // k+1个采样点 + List<Integer> sDs_idx = new ArrayList<>(); + int rhs = N - 1; + sDs.add(points.get(rhs)); // 全局尾点 + sDs_idx.add(rhs); - return sDs; + for (int i = k - 1; i >= 0; i--) { + int lhs = kSegPath[i][rhs]; + sDs.add(points.get(lhs)); + sDs_idx.add(lhs); + rhs = lhs; } -// // Helper method to get index of minimum value -// private static int getIndexOfMin(double[] array) { -// double minVal = array[0]; -// int minIndex = 0; -// for (int i = 1; i < array.length; i++) { -// if (array[i] < minVal) { -// minVal = array[i]; -// minIndex = i; -// } -// } -// return minIndex; -// } + // 反转列表 + Collections.reverse(sDs); + Collections.reverse(sDs_idx); - // Method to calculate joint segment error based on error type - private static double joint_segment_error(List<Point> points, int lx, int rx, ERROR errorType) { - // 默认joint=true, residual=true,即使用linear interpolation近似分段 - // lx~rx 左闭右闭 - if (lx == rx) { - return 0; - } + if (debug) { + System.out.println(sDs); + System.out.println(">>>>>ad2[][]="); + Tool.printArray(dists); + System.out.println(">>>>>dp[][]="); + Tool.printTransposeArray(kSegDist); + System.out.println(">>>>>path[][]="); + Tool.printTransposeArray(kSegPath); + System.out.println(error(points, sDs_idx, errorType)); + } - if (errorType != ERROR.area) { - double x1 = points.get(lx).x; - double y1 = points.get(lx).y; - double x2 = points.get(rx).x; - double y2 = points.get(rx).y; + return sDs; + } - // linear interpolation: - double k = (y2 - y1) / (x2 - x1); - double b = (y1 * x2 - y2 * x1) / (x2 - x1); + // // Helper method to get index of minimum value + // private static int getIndexOfMin(double[] array) { + // double minVal = array[0]; + // int minIndex = 0; + // for (int i = 1; i < array.length; i++) { + // if (array[i] < minVal) { + // minVal = array[i]; + // minIndex = i; + // } + // } + // return minIndex; + // } - double tmp = 0; - if (errorType == ERROR.L2) { - for (int i = lx; i <= rx; i++) { - double e = (k * points.get(i).x + b - points.get(i).y) * (k * points.get(i).x + b - points.get(i).y); - tmp += e; - } - } else if (errorType == ERROR.L1) { - for (int i = lx; i <= rx; i++) { - double e = Math.abs(k * points.get(i).x + b - points.get(i).y); - tmp += e; - } - } else if (errorType == ERROR.L_infy) { - for (int i = lx; i <= rx; i++) { - double e = Math.abs(k * points.get(i).x + b - points.get(i).y); // 注意绝对值 - if (e > tmp) { - tmp = e; - } - } - } - return tmp; - } else { // AD - List<Point> linearInterpolation = new ArrayList<>(); - linearInterpolation.add(points.get(lx)); - linearInterpolation.add(points.get(rx)); - return Tool.total_areal_displacement(points.subList(lx, rx + 1), // 注意lx,rx左闭右闭 - linearInterpolation, false); - } + // Method to calculate joint segment error based on error type + public static double joint_segment_error( + List<Point> points, int lx, int rx, ERRORtype errorType) { + // 默认joint=true, residual=true,即使用linear interpolation近似分段 + // lx~rx 左闭右闭 + if (lx == rx) { + return 0; } - public static double error(List<Point> points, List<Integer> sampledIdx, ERROR errorType) { - double res = 0; - for (int i = 0; i < sampledIdx.size() - 1; i++) { - int lx = sampledIdx.get(i); - int rx = sampledIdx.get(i + 1); - double e = joint_segment_error(points, lx, rx, errorType); - if (errorType == ERROR.L_infy) { - res = Math.max(res, e); - } else { - res += e; - } + if (errorType != ERRORtype.area) { + double x1 = points.get(lx).x; + double y1 = points.get(lx).y; + double x2 = points.get(rx).x; + double y2 = points.get(rx).y; + + // linear interpolation: + double k = (y2 - y1) / (x2 - x1); + double b = (y1 * x2 - y2 * x1) / (x2 - x1); + + double tmp = 0; + if (errorType == ERRORtype.L2) { + for (int i = lx; i <= rx; i++) { + double e = + (k * points.get(i).x + b - points.get(i).y) + * (k * points.get(i).x + b - points.get(i).y); + tmp += e; } - return res; + } else if (errorType == ERRORtype.L1) { + for (int i = lx; i <= rx; i++) { + double e = Math.abs(k * points.get(i).x + b - points.get(i).y); + tmp += e; + } + } else if (errorType == ERRORtype.L_infy) { + for (int i = lx; i <= rx; i++) { + double e = Math.abs(k * points.get(i).x + b - points.get(i).y); // 注意绝对值 + if (e > tmp) { + tmp = e; + } + } + } + return tmp; + } else { // AD + List<Point> linearInterpolation = new ArrayList<>(); + linearInterpolation.add(points.get(lx)); + linearInterpolation.add(points.get(rx)); + return Tool.total_areal_displacement( + points.subList(lx, rx + 1), // 注意lx,rx左闭右闭 + linearInterpolation, + false); } + } - // Example usage - public static void main(String[] args) { - Random rand = new Random(10); - String input = "D:\\LabSync\\iotdb\\我的Gitbook基地\\RUI Lei gitbook\\士论\\49. visval改进\\notebook\\raw.csv"; - boolean hasHeader = true; - int timeIdx = 0; - int valueIdx = 1; - int N = 100; -// List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); - Polyline polyline = new Polyline(); - for (int i = 0; i < N; i += 1) { - double v = rand.nextInt(1000); - polyline.addVertex(new Point(i, v)); - } - List<Point> points = polyline.getVertices(); - try (FileWriter writer = new FileWriter("raw.csv")) { - // 写入CSV头部 - writer.append("x,y,z\n"); + public static double error(List<Point> points, List<Integer> sampledIdx, ERRORtype errorType) { + double res = 0; + for (int i = 0; i < sampledIdx.size() - 1; i++) { + int lx = sampledIdx.get(i); + int rx = sampledIdx.get(i + 1); + double e = joint_segment_error(points, lx, rx, errorType); + if (errorType == ERRORtype.L_infy) { + res = Math.max(res, e); + } else { + res += e; + } + } + return res; + } - // 写入每个点的数据 - for (Point point : points) { - writer.append(point.x + "," + point.y + "," + point.z + "\n"); - } - System.out.println(points.size() + " Data has been written"); - } catch (IOException e) { - System.out.println("Error writing to CSV file: " + e.getMessage()); - } + // Example usage + public static void main(String[] args) { + Random rand = new Random(10); + String input = + "D:\\LabSync\\iotdb\\我的Gitbook基地\\RUI Lei gitbook\\士论\\49. visval改进\\notebook\\raw.csv"; + boolean hasHeader = true; + int timeIdx = 0; + int valueIdx = 1; + int N = 100; + // List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + List<Point> points = polyline.getVertices(); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); - long startTime = System.currentTimeMillis(); -// double[][] ad2 = prepareKSegments(points, ERROR.L1, false); - int m = 10; - List<Point> sampled = dynamic_programming(points, m - 1, ERROR.area, false); - long endTime = System.currentTimeMillis(); - System.out.println("Time taken: " + (endTime - startTime) + "ms"); + // 写入每个点的数据 + for (Point point : points) { + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(points.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } - for (Point p : sampled) { - System.out.println(p); - } - System.out.println(sampled.size()); + long startTime = System.currentTimeMillis(); + // double[][] ad2 = prepareKSegments(points, ERROR.L1, false); + int m = 10; + List<Point> sampled = dynamic_programming(points, m - 1, ERRORtype.area, false); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime - startTime) + "ms"); -// try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { -// // 写入字符串 -// for (int i = 0; i < sampled.size(); i++) { -// writer.println(sampled.get(i).x + "," + sampled.get(i).y); -// } -// -// } catch (FileNotFoundException e) { -// e.printStackTrace(); -// } + for (Point p : sampled) { + System.out.println(p); } + System.out.println(sampled.size()); + + // try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // // 写入字符串 + // for (int i = 0; i < sampled.size(); i++) { + // writer.println(sampled.get(i).x + "," + sampled.get(i).y); + // } + // + // } catch (FileNotFoundException e) { + // e.printStackTrace(); + // } + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/SWAB.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/SWAB.java index f6d5e2fc6a3..5c29355fec1 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/SWAB.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/SWAB.java @@ -1,6 +1,6 @@ package org.apache.iotdb.db.query.eBUG; -import java.io.*; +import java.io.IOException; import java.util.*; public class SWAB { @@ -593,242 +593,194 @@ public class SWAB { return segTs; } - public static List<Point> seg_bottomUp_m_withTimestamps( - List<Point> points, int m, Object[] prefixSum, boolean debug) throws IOException { - if (m <= 2) { - throw new IOException("please make m>2"); - } - // 直接控制采样点数 - int total = points.size(); - if (total < 3) { - return points; // 不足 3 个点无法形成三角形 - } - - // 每个triangle对应两个相邻的待合并的分段 - int nTriangles = total - 2; - Triangle[] triangles = new Triangle[nTriangles]; - - // 创建所有三角形并计算初始面积 - points.get(0).prev = null; - points.get(0).next = points.get(1); - // points.get(0).index = 0; - points.get(total - 1).next = null; - points.get(total - 1).prev = points.get(total - 2); - // points.get(total - 1).index = points.size() - 1; - for (int i = 1; i < total - 1; i++) { - int index1 = i - 1, index2 = i, index3 = i + 1; - double mc; - if (prefixSum == null) { - mc = myLA_withTimestamps(points, index1, index3); - } else { - mc = myLA_withTimestamps(points, prefixSum, index1, index3); - } - triangles[i - 1] = new Triangle(index1, index2, index3, mc); - - // 初始化点的状态 用于最后在线采样结果顺序输出未淘汰点 - points.get(i).prev = points.get(i - 1); - points.get(i).next = points.get(i + 1); - // points.get(i).index = i; // for swab usage - } - - // 设置三角形的前后关系 - for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 - triangles[i].prev = (i == 0 ? null : triangles[i - 1]); - triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); - } - - // 使用优先队列构建 minHeap - PriorityQueue<Triangle> triangleHeap = - new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); - Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? - - int remainNonTerminalPoint = m - 2; - int fakeCnt = 0; - // while (!triangleHeap.isEmpty() && triangleHeap.peek().area < maxError) { // TODO - while (triangleHeap.size() - fakeCnt > remainNonTerminalPoint) { - // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 - // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 - Triangle tri = triangleHeap.poll(); // O(logn) - - // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity - // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 - if (tri.isDeleted) { - fakeCnt--; // 取出了一个被标记删除点 - if (debug) { - System.out.println( - ">>>bottom-up, remaining " - + triangleHeap.size() - + " triangles (including those marked for removal)"); - } - continue; - } - - // 真正的淘汰点 - // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 - if (debug) { - System.out.println("(1) eliminate " + points.get(tri.indices[1])); - } - points.get(tri.indices[1]).markEliminated(); - - // 更新相邻三角形 - if (tri.prev != null) { - // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 - - // 1. 处理旧的tri.prev被标记删除的事情(角色替代) - // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) complexity! - tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 - - Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection - // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) - - // 2. 处理pi被淘汰引起tri.prev被更新的事情 - // 前一个三角形连到后一个三角形 - tri.prev.next = - tri.next; // ATTENTION!!!: 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! - tri.prev.indices[2] = tri.indices[2]; - - double mc; - if (prefixSum == null) { - mc = myLA_withTimestamps(points, tri.prev.indices[0], tri.prev.indices[2]); - } else { - mc = myLA_withTimestamps(points, prefixSum, tri.prev.indices[0], tri.prev.indices[2]); - } - tri.prev.area = mc; - if (debug) { - System.out.println( - "(2) updating point on the left " - + points.get(tri.prev.indices[1]) - + ", ranging [" - + tri.prev.indices[0] - + "," - + tri.prev.indices[2] - + "]"); - } - - // 重新加入堆 - // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 - // 所以必须通过add来显式重新插入修改后的元素 - triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false - fakeCnt++; // 表示heap里多了一个被标记删除的假点 - } - - if (tri.next != null) { - // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 - - // 1. 处理旧的tri.next被标记删除的事情(角色替代) - // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) complexity - tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 - - Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection - // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) - - if (tri.prev != null) { - tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! - } - - // 2. 处理pi被淘汰引起tri.next被更新的事情 - tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 - tri.next.indices[0] = tri.indices[0]; - - double mc; - if (prefixSum == null) { - mc = myLA_withTimestamps(points, tri.next.indices[0], tri.next.indices[2]); - } else { - mc = myLA_withTimestamps(points, prefixSum, tri.next.indices[0], tri.next.indices[2]); - } - tri.next.area = mc; - if (debug) { - System.out.println( - "(3) updating point on the right " - + points.get(tri.next.indices[1]) - + ", ranging [" - + tri.next.indices[0] - + "," - + tri.next.indices[2] - + "]"); - } + // public static List<Point> seg_bottomUp_m_withTimestamps( + // List<Point> points, int m, Object[] prefixSum, boolean debug) throws IOException { + // if (m <= 2) { + // throw new IOException("please make m>2"); + // } + // // 直接控制采样点数 + // int total = points.size(); + // if (total < 3) { + // return points; // 不足 3 个点无法形成三角形 + // } + // + // // 每个triangle对应两个相邻的待合并的分段 + // int nTriangles = total - 2; + // Triangle[] triangles = new Triangle[nTriangles]; + // + // // 创建所有三角形并计算初始面积 + // points.get(0).prev = null; + // points.get(0).next = points.get(1); + // // points.get(0).index = 0; + // points.get(total - 1).next = null; + // points.get(total - 1).prev = points.get(total - 2); + // // points.get(total - 1).index = points.size() - 1; + // for (int i = 1; i < total - 1; i++) { + // int index1 = i - 1, index2 = i, index3 = i + 1; + // double mc; + // if (prefixSum == null) { + // mc = myLA_withTimestamps(points, index1, index3); + // } else { + // mc = myLA_withTimestamps(points, prefixSum, index1, index3); + // } + // triangles[i - 1] = new Triangle(index1, index2, index3, mc); + // + // // 初始化点的状态 用于最后在线采样结果顺序输出未淘汰点 + // points.get(i).prev = points.get(i - 1); + // points.get(i).next = points.get(i + 1); + // // points.get(i).index = i; // for swab usage + // } + // + // // 设置三角形的前后关系 + // for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 + // triangles[i].prev = (i == 0 ? null : triangles[i - 1]); + // triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); + // } + // + // // 使用优先队列构建 minHeap + // PriorityQueue<Triangle> triangleHeap = + // new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); + // Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? + // + // int remainNonTerminalPoint = m - 2; + // int fakeCnt = 0; + // // while (!triangleHeap.isEmpty() && triangleHeap.peek().area < maxError) { // + // TODO + // while (triangleHeap.size() - fakeCnt > remainNonTerminalPoint) { + // // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 + // // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 + // Triangle tri = triangleHeap.poll(); // O(logn) + // + // // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity + // // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 + // if (tri.isDeleted) { + // fakeCnt--; // 取出了一个被标记删除点 + // if (debug) { + // System.out.println( + // ">>>bottom-up, remaining " + // + triangleHeap.size() + // + " triangles (including those marked for removal)"); + // } + // continue; + // } + // + // // 真正的淘汰点 + // // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 + // if (debug) { + // System.out.println("(1) eliminate " + points.get(tri.indices[1])); + // } + // points.get(tri.indices[1]).markEliminated(); + // + // // 更新相邻三角形 + // if (tri.prev != null) { + // // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + // + // // 1. 处理旧的tri.prev被标记删除的事情(角色替代) + // // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) + // complexity! + // tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap + // poll到它就跳过就可以 + // + // Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection + // // tri.prev = newPre; // can omit, because already done by new + // Triangle(tri.prev) + // + // // 2. 处理pi被淘汰引起tri.prev被更新的事情 + // // 前一个三角形连到后一个三角形 + // tri.prev.next = + // tri.next; // ATTENTION!!!: + // 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! + // tri.prev.indices[2] = tri.indices[2]; + // + // double mc; + // if (prefixSum == null) { + // mc = myLA_withTimestamps(points, tri.prev.indices[0], tri.prev.indices[2]); + // } else { + // mc = myLA_withTimestamps(points, prefixSum, tri.prev.indices[0], + // tri.prev.indices[2]); + // } + // tri.prev.area = mc; + // if (debug) { + // System.out.println( + // "(2) updating point on the left " + // + points.get(tri.prev.indices[1]) + // + ", ranging [" + // + tri.prev.indices[0] + // + "," + // + tri.prev.indices[2] + // + "]"); + // } + // + // // 重新加入堆 + // // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // // 所以必须通过add来显式重新插入修改后的元素 + // triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false + // fakeCnt++; // 表示heap里多了一个被标记删除的假点 + // } + // + // if (tri.next != null) { + // // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + // + // // 1. 处理旧的tri.next被标记删除的事情(角色替代) + // // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) + // complexity + // tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 + // + // Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection + // // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) + // + // if (tri.prev != null) { + // tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! + // } + // + // // 2. 处理pi被淘汰引起tri.next被更新的事情 + // tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 + // tri.next.indices[0] = tri.indices[0]; + // + // double mc; + // if (prefixSum == null) { + // mc = myLA_withTimestamps(points, tri.next.indices[0], tri.next.indices[2]); + // } else { + // mc = myLA_withTimestamps(points, prefixSum, tri.next.indices[0], + // tri.next.indices[2]); + // } + // tri.next.area = mc; + // if (debug) { + // System.out.println( + // "(3) updating point on the right " + // + points.get(tri.next.indices[1]) + // + ", ranging [" + // + tri.next.indices[0] + // + "," + // + tri.next.indices[2] + // + "]"); + // } + // + // // 重新加入堆 + // // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // // 所以必须通过add 来显式重新插入修改后的元素 + // triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false + // fakeCnt++; // 表示heap里多了一个被标记删除的假点 + // } + // if (debug) { + // System.out.println( + // ">>>bottom-up, remaining " + // + triangleHeap.size() + // + " triangles (including those marked for removal)"); + // } + // } + // + // List<Point> onlineSampled = new ArrayList<>(); + // Point start = points.get(0); + // Point end = points.get(points.size() - 1); + // while (start + // != end) { // + // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 + // onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity + // start = start.next; // when e=0, only traversing three points pa pi pb + // } + // onlineSampled.add(end); + // return onlineSampled; + // } - // 重新加入堆 - // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 - // 所以必须通过add 来显式重新插入修改后的元素 - triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false - fakeCnt++; // 表示heap里多了一个被标记删除的假点 - } - if (debug) { - System.out.println( - ">>>bottom-up, remaining " - + triangleHeap.size() - + " triangles (including those marked for removal)"); - } - } - - List<Point> onlineSampled = new ArrayList<>(); - Point start = points.get(0); - Point end = points.get(points.size() - 1); - while (start - != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 - onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity - start = start.next; // when e=0, only traversing three points pa pi pb - } - onlineSampled.add(end); - return onlineSampled; - } - - public static void main(String[] args) throws IOException { - Random rand = new Random(10); - String input = "D:\\datasets\\regular\\tmp2.csv"; - boolean hasHeader = false; - int timeIdx = 0; - int valueIdx = 1; - int N = 2000; - // List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); - Polyline polyline = new Polyline(); - for (int i = 0; i < N; i += 1) { - double v = rand.nextInt(1000); - polyline.addVertex(new Point(i, v)); - } - List<Point> points = polyline.getVertices(); - try (FileWriter writer = new FileWriter("raw.csv")) { - // 写入CSV头部 - writer.append("x,y,z\n"); - - // 写入每个点的数据 - for (int i = 0; i < points.size(); i++) { - Point point = points.get(i); - writer.append(point.x + "," + point.y + "," + point.z + "\n"); - } - System.out.println(points.size() + " Data has been written"); - } catch (IOException e) { - System.out.println("Error writing to CSV file: " + e.getMessage()); - } - - // long startTime = System.currentTimeMillis(); - Object[] prefixSum = prefixSum(polyline.getVertices()); - // long endTime = System.currentTimeMillis(); - // System.out.println("Time taken to precompute prefix sum: " + (endTime - startTime) + - // "ms"); - - int m = 100; - - // 33s worst-case O(n2)因为没有prefix sum加速计算L2误差,但是实际达不到worst-case所以实际运行不会那么大耗时 - long startTime = System.currentTimeMillis(); - List<Point> sampled = seg_bottomUp_m_withTimestamps(points, m, null, false); - long endTime = System.currentTimeMillis(); - System.out.println("Time taken: " + (endTime - startTime) + "ms"); - - // for (Point p : sampled) { - // System.out.println(p); - // } - System.out.println(sampled.size()); - - try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { - // 写入字符串 - for (int i = 0; i < sampled.size(); i++) { - writer.println(sampled.get(i).x + "," + sampled.get(i).y); - } - - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test10.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test10.java index 4dba96d0e3e..31c1cff7352 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test10.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test10.java @@ -8,55 +8,54 @@ import java.util.Random; import static org.apache.iotdb.db.query.eBUG.DP.dynamic_programming; public class Test10 { - // 用于验证java DP正确性 - public static void main(String[] args) { - Random rand = new Random(10); - String input = "raw.csv"; - boolean hasHeader = true; - int timeIdx = 0; - int valueIdx = 1; - int N = 100; -// List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); - Polyline polyline = new Polyline(); - for (int i = 0; i < N; i += 1) { - double v = rand.nextInt(1000); - polyline.addVertex(new Point(i, v)); - } - List<Point> points = polyline.getVertices(); - try (FileWriter writer = new FileWriter("raw.csv")) { - // 写入CSV头部 - writer.append("x,y,z\n"); - - // 写入每个点的数据 - for (Point point : points) { - writer.append(point.x + "," + point.y + "," + point.z + "\n"); - } - System.out.println(points.size() + " Data has been written"); - } catch (IOException e) { - System.out.println("Error writing to CSV file: " + e.getMessage()); - } - - - long startTime = System.currentTimeMillis(); -// double[][] ad2 = prepareKSegments(points, ERROR.L1, false); - int m = 10; - List<Point> sampled = dynamic_programming(points, m - 1, DP.ERROR.area, false); - long endTime = System.currentTimeMillis(); - System.out.println("Time taken: " + (endTime - startTime) + "ms"); + // 用于验证java DP正确性 + public static void main(String[] args) { + Random rand = new Random(10); + String input = "raw.csv"; + boolean hasHeader = true; + int timeIdx = 0; + int valueIdx = 1; + int N = 100; + // List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + List<Point> points = polyline.getVertices(); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (Point point : points) { + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(points.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } - for (Point p : sampled) { - System.out.println(p); - } - System.out.println(sampled.size()); + long startTime = System.currentTimeMillis(); + // double[][] ad2 = prepareKSegments(points, ERROR.L1, false); + int m = 10; + List<Point> sampled = dynamic_programming(points, m - 1, DP.ERRORtype.area, false); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime - startTime) + "ms"); -// try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { -// // 写入字符串 -// for (int i = 0; i < sampled.size(); i++) { -// writer.println(sampled.get(i).x + "," + sampled.get(i).y); -// } -// -// } catch (FileNotFoundException e) { -// e.printStackTrace(); -// } + for (Point p : sampled) { + System.out.println(p); } + System.out.println(sampled.size()); + + // try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // // 写入字符串 + // for (int i = 0; i < sampled.size(); i++) { + // writer.println(sampled.get(i).x + "," + sampled.get(i).y); + // } + // + // } catch (FileNotFoundException e) { + // e.printStackTrace(); + // } + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java index 9a54f835ec3..f2e2c0d68d9 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java @@ -12,406 +12,406 @@ import java.util.stream.Stream; public class Tool { - public static void printArray(double[][] array) { - // 遍历每一行 - for (double[] row : array) { - for (double value : row) { - // 格式化输出:%10.2f 表示右对齐,占10个字符,保留2位小数 - System.out.printf("%10.2f ", value); - } - System.out.println(); // 换行 - } + public static void printArray(double[][] array) { + // 遍历每一行 + for (double[] row : array) { + for (double value : row) { + // 格式化输出:%10.2f 表示右对齐,占10个字符,保留2位小数 + System.out.printf("%10.2f ", value); + } + System.out.println(); // 换行 } - - public static void printTransposeArray(double[][] array) { - // 遍历每一列 - for (int i = 0; i < array[0].length; i++) { - for (int j = 0; j < array.length; j++) { - // 格式化输出:%10.2f 表示右对齐,占10个字符,保留2位小数 - System.out.printf("%10.2f ", array[j][i]); - } - System.out.println(); // 换行 - } + } + + public static void printTransposeArray(double[][] array) { + // 遍历每一列 + for (int i = 0; i < array[0].length; i++) { + for (int j = 0; j < array.length; j++) { + // 格式化输出:%10.2f 表示右对齐,占10个字符,保留2位小数 + System.out.printf("%10.2f ", array[j][i]); + } + System.out.println(); // 换行 } - - public static void printArray(int[][] array) { - // 遍历每一行 - for (int[] row : array) { - for (int value : row) { - System.out.printf("%10d ", value); - } - System.out.println(); // 换行 - } + } + + public static void printArray(int[][] array) { + // 遍历每一行 + for (int[] row : array) { + for (int value : row) { + System.out.printf("%10d ", value); + } + System.out.println(); // 换行 } - - public static void printTransposeArray(int[][] array) { - // 遍历每一列 - for (int i = 0; i < array[0].length; i++) { - for (int j = 0; j < array.length; j++) { - // 格式化输出:%10.2f 表示右对齐,占10个字符,保留2位小数 - System.out.printf("%10d ", array[j][i]); - } - System.out.println(); // 换行 - } + } + + public static void printTransposeArray(int[][] array) { + // 遍历每一列 + for (int i = 0; i < array[0].length; i++) { + for (int j = 0; j < array.length; j++) { + // 格式化输出:%10.2f 表示右对齐,占10个字符,保留2位小数 + System.out.printf("%10d ", array[j][i]); + } + System.out.println(); // 换行 } - - // Assumed interface for the simplification function - interface SimplifyFunction { - List<Point> reducePoints(List<Point> points, double epsilon, Object... kwargs) - throws IOException; + } + + // Assumed interface for the simplification function + interface SimplifyFunction { + List<Point> reducePoints(List<Point> points, double epsilon, Object... kwargs) + throws IOException; + } + + // Method to generate the output file name based on input path + public static String generateOutputFileName( + String inputFilePath, String outDir, String customize) { + // Get the file name without extension + Path path = Paths.get(inputFilePath); + String fileNameWithoutExtension = path.getFileName().toString(); + String name = fileNameWithoutExtension.substring(0, fileNameWithoutExtension.lastIndexOf('.')); + + // Get the file extension + String extension = + path.getFileName().toString().substring(fileNameWithoutExtension.lastIndexOf('.')); + + // Create output file path by appending '-ds' before the extension + // String outputFile = name + "-ds-e" + e + extension; + String outputFile = name + customize + extension; + + if (outDir == null) { // 表示使用input文件所在的文件夹 + // Handle path compatibility for different operating systems (Linux/Windows) + return path.getParent().resolve(outputFile).toString(); + } else { + Path out = Paths.get(outDir); + return out.resolve(outputFile).toString(); } - - // Method to generate the output file name based on input path - public static String generateOutputFileName( - String inputFilePath, String outDir, String customize) { - // Get the file name without extension - Path path = Paths.get(inputFilePath); - String fileNameWithoutExtension = path.getFileName().toString(); - String name = fileNameWithoutExtension.substring(0, fileNameWithoutExtension.lastIndexOf('.')); - - // Get the file extension - String extension = - path.getFileName().toString().substring(fileNameWithoutExtension.lastIndexOf('.')); - - // Create output file path by appending '-ds' before the extension - // String outputFile = name + "-ds-e" + e + extension; - String outputFile = name + customize + extension; - - if (outDir == null) { // 表示使用input文件所在的文件夹 - // Handle path compatibility for different operating systems (Linux/Windows) - return path.getParent().resolve(outputFile).toString(); - } else { - Path out = Paths.get(outDir); - return out.resolve(outputFile).toString(); + } + + public static double getParam( + List<Point> points, + int m, + SimplifyFunction simplifyFunc, + double tolerantPts, + Object... kwargs) + throws IOException { + // double x = 1; 不要从1开始,因为对于swab来说使用如此小的maxError会很慢,宁可让maxError从大到小 + double x = 10; + boolean directLess = false; + boolean directMore = false; + + int iterNum = 0; + int maxIterNum = 50; + + // First binary search loop to find the initial range + double base = 2; + while (true) { + // System.out.println("x=" + x); + iterNum++; + if (iterNum > maxIterNum) { + // System.out.println("reach maxIterNum1"); + break; // Avoid infinite loops for special cases + } + List<Point> res = simplifyFunc.reducePoints(points, x, kwargs); + int length = res.size(); + // System.out.println(length); + + if (Math.abs(length - m) <= tolerantPts) { + return x; // Return the found parameter + } + + if (length > m) { + if (directMore) { + break; // Reach the first more } - } - - public static double getParam( - List<Point> points, - int m, - SimplifyFunction simplifyFunc, - double tolerantPts, - Object... kwargs) - throws IOException { - // double x = 1; 不要从1开始,因为对于swab来说使用如此小的maxError会很慢,宁可让maxError从大到小 - double x = 10; - boolean directLess = false; - boolean directMore = false; - - int iterNum = 0; - int maxIterNum = 50; - - // First binary search loop to find the initial range - double base = 2; - while (true) { - // System.out.println("x=" + x); - iterNum++; - if (iterNum > maxIterNum) { - // System.out.println("reach maxIterNum1"); - break; // Avoid infinite loops for special cases - } - List<Point> res = simplifyFunc.reducePoints(points, x, kwargs); - int length = res.size(); - // System.out.println(length); - - if (Math.abs(length - m) <= tolerantPts) { - return x; // Return the found parameter - } - - if (length > m) { - if (directMore) { - break; // Reach the first more - } - if (!directLess) { - directLess = true; - } - - x *= base; // Need less points, increase x - } else { - if (directLess) { - break; // Reach the first less - } - if (!directMore) { - directMore = true; - } - - x /= base; // Need more points, decrease x - } + if (!directLess) { + directLess = true; } - // Determine the initial left and right bounds - double left = (directLess) ? x / base : x; - double right = (directMore) ? x * base : x; - - // Refine the range with binary search - iterNum = 0; - while (true) { - iterNum++; - if (iterNum > maxIterNum) { - // System.out.println("reach maxIterNum"); - break; // Avoid infinite loops - } - - double mid = (left + right) / 2; - - List<Point> res = simplifyFunc.reducePoints(points, mid, kwargs); - int length = res.size(); - // System.out.println(length); - - if (Math.abs(length - m) <= tolerantPts) { - return mid; // Return the refined parameter - } - - if (length > m) { - left = mid; // Need less, narrow range to the left - } else { - right = mid; // Need more, narrow range to the right - } - // System.out.println(left + "," + right); + x *= base; // Need less points, increase x + } else { + if (directLess) { + break; // Reach the first less } - - return (left + right) / 2; // Return the average of left and right as the final parameter - } - - public static List<Point> readFromFile( - String input, boolean hasHeader, int timeIdx, int valueIdx, int N) { - List<Point> res = new ArrayList<>(); - - // Using Files.lines() for memory-efficient line-by-line reading - try (Stream<String> lines = Files.lines(Paths.get(input))) { - Iterator<String> iterator = lines.iterator(); - int lineNumber = 0; - - // Skip the header line if necessary - if (hasHeader && iterator.hasNext()) { - iterator.next(); // Skip the header - } - - // Process each line - while (iterator.hasNext()) { - lineNumber++; - String line = iterator.next(); - String[] columns = line.split(","); - - if (columns.length > Math.max(timeIdx, valueIdx)) { - double time; - if (timeIdx < 0) { - time = lineNumber; - } else { - time = Double.parseDouble(columns[timeIdx]); - } - double value = Double.parseDouble(columns[valueIdx]); - res.add(new Point(time, value)); - // System.out.println("Line " + lineNumber + " - Time: " + time + ", - // Value: " + value); - } else { - System.out.println("Line " + lineNumber + " is malformed (not enough columns)."); - } - if (N > 0 && lineNumber >= N) { - // N>0控制点数,否则N<=0表示读全部数据 - break; - } - } - } catch (IOException e) { - e.printStackTrace(); + if (!directMore) { + directMore = true; } - return res; - } - - // 计算三角形面积 - public static double triArea(Point d0, Point d1, Point d2) { - double dArea = ((d1.x - d0.x) * (d2.y - d0.y) - (d2.x - d0.x) * (d1.y - d0.y)) / 2.0; - return (dArea > 0.0) ? dArea : -dArea; // abs - } - // 计算简单多边形(边不交叉)的面积(鞋带公式) - public static double calculatePolygonArea(List<Point> points) { - // points多边形顶点,要求按照逆时针或者顺时针的顺序枚举,否则鞋带公式无法给出正确结果 - int n = points.size(); - double area = 0; - for (int i = 0; i < n; i++) { - int next = (i + 1) % n; - area += points.get(i).x * points.get(next).y - points.get(next).x * points.get(i).y; - } - return Math.abs(area) / 2.0; + x /= base; // Need more points, decrease x + } } - // 计算两个向量的叉积 - public static double crossProduct(double x1, double y1, double x2, double y2) { - // >0: (x2,y2)在(x1,y1)的逆时针方向 - // <0: (x2,y2)在(x1,y1)的顺时针方向 - // =0: 平行或共线 - return x1 * y2 - y1 * x2; + // Determine the initial left and right bounds + double left = (directLess) ? x / base : x; + double right = (directMore) ? x * base : x; + + // Refine the range with binary search + iterNum = 0; + while (true) { + iterNum++; + if (iterNum > maxIterNum) { + // System.out.println("reach maxIterNum"); + break; // Avoid infinite loops + } + + double mid = (left + right) / 2; + + List<Point> res = simplifyFunc.reducePoints(points, mid, kwargs); + int length = res.size(); + // System.out.println(length); + + if (Math.abs(length - m) <= tolerantPts) { + return mid; // Return the refined parameter + } + + if (length > m) { + left = mid; // Need less, narrow range to the left + } else { + right = mid; // Need more, narrow range to the right + } + // System.out.println(left + "," + right); } - // 判断两条线段是否相交并计算交点 - // L1包含一个线段的两个端点,L2包含另一个线段的两个端点 - public static Object[] lineIntersection(Point[] L1, Point[] L2) { - double x1 = L1[0].x, y1 = L1[0].y, x2 = L1[1].x, y2 = L1[1].y; - double x3 = L2[0].x, y3 = L2[0].y, x4 = L2[1].x, y4 = L2[1].y; - - // 判断是否相交(叉积) - double d1 = crossProduct(x2 - x1, y2 - y1, x3 - x1, y3 - y1); - double d2 = crossProduct(x2 - x1, y2 - y1, x4 - x1, y4 - y1); - double d3 = crossProduct(x4 - x3, y4 - y3, x1 - x3, y1 - y3); - double d4 = crossProduct(x4 - x3, y4 - y3, x2 - x3, y2 - y3); - - // 如果叉积条件满足,表示有交点 - // d1*d2<0意味着P3、P4分别在L12的两边 - // d3*d4<0意味着P1、P2分别在L34的两边 - // 两个同时满足说明有交点 - if (d1 * d2 < 0 && d3 * d4 < 0) { - double denominator = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1); // 不可能为0(平行或共线),因为已经判断相交了 - double t1 = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / denominator; - double x = x1 + t1 * (x2 - x1); - double y = y1 + t1 * (y2 - y1); - return new Object[]{true, new Point(x, y)}; - } - - // 检查是否起点或终点重合 - if ((x1 == x3 && y1 == y3) || (x1 == x4 && y1 == y4)) { - return new Object[]{true, new Point(x1, y1)}; + return (left + right) / 2; // Return the average of left and right as the final parameter + } + + public static List<Point> readFromFile( + String input, boolean hasHeader, int timeIdx, int valueIdx, int N) { + List<Point> res = new ArrayList<>(); + + // Using Files.lines() for memory-efficient line-by-line reading + try (Stream<String> lines = Files.lines(Paths.get(input))) { + Iterator<String> iterator = lines.iterator(); + int lineNumber = 0; + + // Skip the header line if necessary + if (hasHeader && iterator.hasNext()) { + iterator.next(); // Skip the header + } + + // Process each line + while (iterator.hasNext()) { + lineNumber++; + String line = iterator.next(); + String[] columns = line.split(","); + + if (columns.length > Math.max(timeIdx, valueIdx)) { + double time; + if (timeIdx < 0) { + time = lineNumber; + } else { + time = Double.parseDouble(columns[timeIdx]); + } + double value = Double.parseDouble(columns[valueIdx]); + res.add(new Point(time, value)); + // System.out.println("Line " + lineNumber + " - Time: " + time + ", + // Value: " + value); + } else { + System.out.println("Line " + lineNumber + " is malformed (not enough columns)."); } - if ((x2 == x3 && y2 == y3) || (x2 == x4 && y2 == y4)) { - return new Object[]{true, new Point(x2, y2)}; + if (N > 0 && lineNumber >= N) { + // N>0控制点数,否则N<=0表示读全部数据 + break; } + } + } catch (IOException e) { + e.printStackTrace(); + } + return res; + } + + // 计算三角形面积 + public static double triArea(Point d0, Point d1, Point d2) { + double dArea = ((d1.x - d0.x) * (d2.y - d0.y) - (d2.x - d0.x) * (d1.y - d0.y)) / 2.0; + return (dArea > 0.0) ? dArea : -dArea; // abs + } + + // 计算简单多边形(边不交叉)的面积(鞋带公式) + public static double calculatePolygonArea(List<Point> points) { + // points多边形顶点,要求按照逆时针或者顺时针的顺序枚举,否则鞋带公式无法给出正确结果 + int n = points.size(); + double area = 0; + for (int i = 0; i < n; i++) { + int next = (i + 1) % n; + area += points.get(i).x * points.get(next).y - points.get(next).x * points.get(i).y; + } + return Math.abs(area) / 2.0; + } + + // 计算两个向量的叉积 + public static double crossProduct(double x1, double y1, double x2, double y2) { + // >0: (x2,y2)在(x1,y1)的逆时针方向 + // <0: (x2,y2)在(x1,y1)的顺时针方向 + // =0: 平行或共线 + return x1 * y2 - y1 * x2; + } + + // 判断两条线段是否相交并计算交点 + // L1包含一个线段的两个端点,L2包含另一个线段的两个端点 + public static Object[] lineIntersection(Point[] L1, Point[] L2) { + double x1 = L1[0].x, y1 = L1[0].y, x2 = L1[1].x, y2 = L1[1].y; + double x3 = L2[0].x, y3 = L2[0].y, x4 = L2[1].x, y4 = L2[1].y; + + // 判断是否相交(叉积) + double d1 = crossProduct(x2 - x1, y2 - y1, x3 - x1, y3 - y1); + double d2 = crossProduct(x2 - x1, y2 - y1, x4 - x1, y4 - y1); + double d3 = crossProduct(x4 - x3, y4 - y3, x1 - x3, y1 - y3); + double d4 = crossProduct(x4 - x3, y4 - y3, x2 - x3, y2 - y3); + + // 如果叉积条件满足,表示有交点 + // d1*d2<0意味着P3、P4分别在L12的两边 + // d3*d4<0意味着P1、P2分别在L34的两边 + // 两个同时满足说明有交点 + if (d1 * d2 < 0 && d3 * d4 < 0) { + double denominator = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1); // 不可能为0(平行或共线),因为已经判断相交了 + double t1 = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / denominator; + double x = x1 + t1 * (x2 - x1); + double y = y1 + t1 * (y2 - y1); + return new Object[] {true, new Point(x, y)}; + } - return new Object[]{false, null}; + // 检查是否起点或终点重合 + if ((x1 == x3 && y1 == y3) || (x1 == x4 && y1 == y4)) { + return new Object[] {true, new Point(x1, y1)}; + } + if ((x2 == x3 && y2 == y3) || (x2 == x4 && y2 == y4)) { + return new Object[] {true, new Point(x2, y2)}; } - // 计算总的多边形面积(通过时间序列扫描交点),默认要求点按照时间戳严格递增排列 - public static double total_areal_displacement( - List<Point> points, List<Point> points2, boolean debug) { - double totalArea = 0; - int i = 0, j = 0; // i for points, j for points2 - Point prevIntersection = null; - int prevI = -1, prevJ = -1; - - // List<double[]> intersectionCoords = new ArrayList<>(); - List<Double> areaList = new ArrayList<>(); - - while (i < points.size() - 1 && j < points2.size() - 1) { - if (debug) { - System.out.println("--------- " + i + " " + j + " ------------"); - } - - // 当前线段 - Point[] L1 = {points.get(i), points.get(i + 1)}; - Point[] L2 = {points2.get(j), points2.get(j + 1)}; - - // 判断是否有交点 - Object[] result = lineIntersection(L1, L2); - boolean isIntersect = (boolean) result[0]; - Point intersection = (Point) result[1]; - - if (isIntersect) { - // intersectionCoords.add(intersection); - - if (prevIntersection != null) { - // 构造多边形点集 - List<Point> polygon = new ArrayList<>(); // 按照顺时针/逆时针几何连接顺序枚举多边形的顶点 - polygon.add(prevIntersection); - if (debug) { - System.out.println("- start intersection: " + prevIntersection); - } - polygon.addAll(points.subList(prevI, i + 1)); // 添加当前线段的点,左闭右开 - // polygon.addAll(Arrays.asList(Arrays.copyOfRange(points, prevI, i + - // 1))); // 添加当前线段的点,左闭右开 - if (debug) { - System.out.println("- one side: " + points.subList(prevI, i + 1)); - } - polygon.add(intersection); - if (debug) { - System.out.println("- end intersection: " + intersection); - } - List<Point> tempPoints2 = points2.subList(prevJ, j + 1); - Collections.reverse(tempPoints2); // 添加另一序列的点 - polygon.addAll(tempPoints2); - if (debug) { - System.out.println("- another side: " + tempPoints2); - } - - // double[][] polygonArray = new double[polygon.size()][2]; - // for (int k = 0; k < polygon.size(); k++) { - // polygonArray[k] = polygon.get(k); - // } - - // 计算多边形面积,注意polygon里的点一定是按照顺时针/逆时针几何连接顺序枚举的多边形顶点 - double area = calculatePolygonArea(polygon); - if (debug) { - System.out.println("Area = " + area); - } - totalArea += area; - areaList.add(area); - } - - prevIntersection = intersection; - prevI = i + 1; - prevJ = j + 1; - if (debug) { - System.out.println( - "This intersection = " - + intersection - + ", next polygon: side1 = " - + prevI - + ", side2 = " - + prevJ); - } - } - - int currentI = i; // 临时存储i - int currentJ = j; // 临时存储j - if (points.get(currentI + 1).x <= points2.get(currentJ + 1).x) { - i += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 - } - if (points.get(currentI + 1).x >= points2.get(currentJ + 1).x) { - j += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 - } - } // end of while + return new Object[] {false, null}; + } + + // 计算总的多边形面积(通过时间序列扫描交点),默认要求点按照时间戳严格递增排列 + public static double total_areal_displacement( + List<Point> points, List<Point> points2, boolean debug) { + double totalArea = 0; + int i = 0, j = 0; // i for points, j for points2 + Point prevIntersection = null; + int prevI = -1, prevJ = -1; + + // List<double[]> intersectionCoords = new ArrayList<>(); + List<Double> areaList = new ArrayList<>(); + + while (i < points.size() - 1 && j < points2.size() - 1) { + if (debug) { + System.out.println("--------- " + i + " " + j + " ------------"); + } + + // 当前线段 + Point[] L1 = {points.get(i), points.get(i + 1)}; + Point[] L2 = {points2.get(j), points2.get(j + 1)}; + + // 判断是否有交点 + Object[] result = lineIntersection(L1, L2); + boolean isIntersect = (boolean) result[0]; + Point intersection = (Point) result[1]; + + if (isIntersect) { + // intersectionCoords.add(intersection); + + if (prevIntersection != null) { + // 构造多边形点集 + List<Point> polygon = new ArrayList<>(); // 按照顺时针/逆时针几何连接顺序枚举多边形的顶点 + polygon.add(prevIntersection); + if (debug) { + System.out.println("- start intersection: " + prevIntersection); + } + polygon.addAll(points.subList(prevI, i + 1)); // 添加当前线段的点,左闭右开 + // polygon.addAll(Arrays.asList(Arrays.copyOfRange(points, prevI, i + + // 1))); // 添加当前线段的点,左闭右开 + if (debug) { + System.out.println("- one side: " + points.subList(prevI, i + 1)); + } + polygon.add(intersection); + if (debug) { + System.out.println("- end intersection: " + intersection); + } + List<Point> tempPoints2 = points2.subList(prevJ, j + 1); + Collections.reverse(tempPoints2); // 添加另一序列的点 + polygon.addAll(tempPoints2); + if (debug) { + System.out.println("- another side: " + tempPoints2); + } + + // double[][] polygonArray = new double[polygon.size()][2]; + // for (int k = 0; k < polygon.size(); k++) { + // polygonArray[k] = polygon.get(k); + // } + + // 计算多边形面积,注意polygon里的点一定是按照顺时针/逆时针几何连接顺序枚举的多边形顶点 + double area = calculatePolygonArea(polygon); + if (debug) { + System.out.println("Area = " + area); + } + totalArea += area; + areaList.add(area); + } + prevIntersection = intersection; + prevI = i + 1; + prevJ = j + 1; if (debug) { - System.out.println(areaList); + System.out.println( + "This intersection = " + + intersection + + ", next polygon: side1 = " + + prevI + + ", side2 = " + + prevJ); } - - return totalArea; + } + + int currentI = i; // 临时存储i + int currentJ = j; // 临时存储j + if (points.get(currentI + 1).x <= points2.get(currentJ + 1).x) { + i += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 + } + if (points.get(currentI + 1).x >= points2.get(currentJ + 1).x) { + j += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 + } + } // end of while + + if (debug) { + System.out.println(areaList); } - // 测试方法 - public static void main(String[] args) { - // 示例数据 - List<Point> points = new ArrayList<>(); - points.add(new Point(0, 0)); - points.add(new Point(1, 2)); - points.add(new Point(2, -20)); - points.add(new Point(3, 0)); - points.add(new Point(4, -1)); - points.add(new Point(5, -1.5)); - points.add(new Point(6, 0)); - - List<Point> points2 = new ArrayList<>(); - // points2.add(points.get(0)); - // points2.add(points.get(3)); - // points2.add(points.get(6)); - points2.add(new Point(1, -10)); - points2.add(new Point(3, 0)); - - double area = total_areal_displacement(points, points2, true); - System.out.println("Total area: " + area); - - // points = new ArrayList<>(); - // points.add(new Point(0, 0)); - // points.add(new Point(1, 2)); - // points.add(new Point(1.5, -10)); - // double area = calculatePolygonArea(points); - // System.out.println(area); - // - // Point[] L1 = new Point[2]; - // L1[0] = new Point(1, 2); - // L1[1] = new Point(2, -2); - // Point[] L2 = new Point[2]; - // L2[0] = new Point(0, 0); - // L2[1] = new Point(3, 0); - // Object[] res = lineIntersection(L1, L2); - // System.out.println(res[1]); - } + return totalArea; + } + + // 测试方法 + public static void main(String[] args) { + // 示例数据 + List<Point> points = new ArrayList<>(); + points.add(new Point(0, 0)); + points.add(new Point(1, 2)); + points.add(new Point(2, -20)); + points.add(new Point(3, 0)); + points.add(new Point(4, -1)); + points.add(new Point(5, -1.5)); + points.add(new Point(6, 0)); + + List<Point> points2 = new ArrayList<>(); + // points2.add(points.get(0)); + // points2.add(points.get(3)); + // points2.add(points.get(6)); + points2.add(new Point(1, -10)); + points2.add(new Point(3, 0)); + + double area = total_areal_displacement(points, points2, true); + System.out.println("Total area: " + area); + + // points = new ArrayList<>(); + // points.add(new Point(0, 0)); + // points.add(new Point(1, 2)); + // points.add(new Point(1.5, -10)); + // double area = calculatePolygonArea(points); + // System.out.println(area); + // + // Point[] L1 = new Point[2]; + // L1[0] = new Point(1, 2); + // L1[1] = new Point(2, -2); + // Point[] L2 = new Point[2]; + // L2[0] = new Point(0, 0); + // L2[1] = new Point(3, 0); + // Object[] res = lineIntersection(L1, L2); + // System.out.println(res[1]); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_bottomUpL2.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_bottomUpYdiff.java similarity index 67% rename from server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_bottomUpL2.java rename to server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_bottomUpYdiff.java index 62c4b5ffe30..ec96d1d45e8 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_bottomUpL2.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_bottomUpYdiff.java @@ -7,11 +7,11 @@ import java.util.List; import static org.apache.iotdb.db.query.eBUG.Tool.generateOutputFileName; -public class sample_bottomUpL2 { +public class sample_bottomUpYdiff { public static void main(String[] args) throws IOException { - if (args.length < 6) { + if (args.length < 7) { System.out.println( - "Usage: Please provide arguments: inputFilePath,hasHeader,timeIdx,valueIdx,N,m,(outDir)"); + "Usage: Please provide arguments: inputFilePath,hasHeader,timeIdx,valueIdx,N,m,errorType,(outDir)"); } String input = args[0]; boolean hasHeader = Boolean.parseBoolean(args[1]); @@ -19,9 +19,20 @@ public class sample_bottomUpL2 { int valueIdx = Integer.parseInt(args[3]); int N = Integer.parseInt(args[4]); // N<=0表示读全部行,N>0表示读最多N行 int m = Integer.parseInt(args[5]); + String errorTypeStr = args[6]; + DP.ERRORtype errorType; + if (errorTypeStr.equals("L1")) { + errorType = DP.ERRORtype.L1; + } else if (errorTypeStr.equals("L2")) { + errorType = DP.ERRORtype.L2; + } else if (errorTypeStr.equals("L_infy")) { + errorType = DP.ERRORtype.L_infy; + } else { + throw new IOException("please input errorType as L1/L2/L_infy"); + } String outDir; - if (args.length > 6) { - outDir = args[6]; // 表示输出文件保存到指定的文件夹 + if (args.length > 7) { + outDir = args[7]; // 表示输出文件保存到指定的文件夹 } else { outDir = null; // 表示输出文件保存到input文件所在的文件夹 } @@ -33,18 +44,20 @@ public class sample_bottomUpL2 { System.out.println("Value index: " + valueIdx); System.out.println("N: " + N); System.out.println("m: " + m); + System.out.println("errorType: " + errorType); System.out.println("outDir: " + outDir); // 读取原始序列 List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); String outputFile = - generateOutputFileName(input, outDir, "-BUL2" + "-n" + points.size() + "-m" + m); - System.out.println("Output file: " + outputFile); + generateOutputFileName(input, outDir, "-BU" + errorType + "-n" + points.size() + "-m" + m); + System.out.println("Output file: " + outputFile); // do not modify this hint string log // 采样 long startTime = System.currentTimeMillis(); - List<Point> results = SWAB.seg_bottomUp_m_withTimestamps(points, m, null, false); + // List<Point> results = SWAB.seg_bottomUp_m_withTimestamps(points, m, null, false); + List<Point> results = BottomUpYdiff.reducePoints(points, m, errorType, false); long endTime = System.currentTimeMillis(); System.out.println("result point number: " + results.size()); @@ -53,7 +66,7 @@ public class sample_bottomUpL2 { + points.size() + ", m=" + m - + ", Time taken to reduce points: " + + ", Time taken to reduce points: " // do not modify this hint string log + (endTime - startTime) + "ms");
