This is an automated email from the ASF dual-hosted git repository. leirui pushed a commit to branch research/area-visualization in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 4f8e6f3264b995b27646cd86c495ed03a32d6b38 Author: Lei Rui <[email protected]> AuthorDate: Sun Jan 26 22:51:47 2025 +0800 swab --- .../java/org/apache/iotdb/db/query/eBUG/Point.java | 60 +- .../org/apache/iotdb/db/query/eBUG/Polyline.java | 56 +- .../java/org/apache/iotdb/db/query/eBUG/SWAB.java | 536 ++++++++++++++++ .../java/org/apache/iotdb/db/query/eBUG/Test1.java | 2 +- .../java/org/apache/iotdb/db/query/eBUG/Test4.java | 79 +++ .../java/org/apache/iotdb/db/query/eBUG/Test5.java | 64 ++ .../java/org/apache/iotdb/db/query/eBUG/Test6.java | 89 +++ .../java/org/apache/iotdb/db/query/eBUG/Tool.java | 418 +++++++------ .../java/org/apache/iotdb/db/query/eBUG/eBUG.java | 696 ++++++++++----------- .../org/apache/iotdb/db/query/eBUG/eBUG_Build.java | 236 +++---- 10 files changed, 1517 insertions(+), 719 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java index e9bcf241f0f..3d47e88f422 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java @@ -2,33 +2,35 @@ package org.apache.iotdb.db.query.eBUG; public class Point { - double x, y, z; - - public Point prev; // 指向T'_max{0,k-e}里当前点的前一个点 for eBUG usage - public Point next; // 指向T'_max{0,k-e}里当前点的后一个点 for eBUG usage - // 注意Triangle里的prev&next用于最新状态下存在三角形之间的连接关系, - // 而这里Point的prev&next用于滞后e个淘汰点(也就是最近的e个淘汰点先不施加)状态下的未淘汰点之间的连接关系 - - public Point(double x, double y) { - this.x = x; - this.y = y; - this.z = Double.POSITIVE_INFINITY; // effective area - - // for eBUG usage - // this.eliminated = false; - this.prev = null; - this.next = null; - } - - public void markEliminated() { - // to avoid traversing each point between pa to pb, - // instead only traversing at most e most recently eliminated points lagged - prev.next = next; - next.prev = prev; - } - - @Override - public String toString() { - return "Point: (" + x + ", " + y + ", " + z + ")"; - } + double x, y, z; + + public Point prev; // 指向T'_max{0,k-e}里当前点的前一个点 for eBUG usage + public Point next; // 指向T'_max{0,k-e}里当前点的后一个点 for eBUG usage + // 注意Triangle里的prev&next用于最新状态下存在三角形之间的连接关系, + // 而这里Point的prev&next用于滞后e个淘汰点(也就是最近的e个淘汰点先不施加)状态下的未淘汰点之间的连接关系 + + public int index; // for swab usage + + public Point(double x, double y) { + this.x = x; + this.y = y; + this.z = Double.POSITIVE_INFINITY; // effective area + + // for eBUG usage + // this.eliminated = false; + this.prev = null; + this.next = null; + } + + public void markEliminated() { + // to avoid traversing each point between pa to pb, + // instead only traversing at most e most recently eliminated points lagged + prev.next = next; + next.prev = prev; + } + + @Override + public String toString() { + return "Point: (" + x + ", " + y + ", " + z + ")"; + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java index 534c7a6f67e..5b1f5354871 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java @@ -5,30 +5,34 @@ import java.util.List; public class Polyline { - private List<Point> vertices = new ArrayList<>(); - - public void addVertex(Point point) { - // if (!vertices.isEmpty()) { //before adding this point - // vertices.get(vertices.size() - 1).next = point; - // point.prev = vertices.get(vertices.size() - 1); - // } - vertices.add(point); - } - - public List<Point> getVertices() { - // return new ArrayList<>(vertices); - return vertices; - } - - public int size() { - return vertices.size(); - } - - public Point get(int index) { - return vertices.get(index); - } - - public void clear() { - vertices.clear(); - } + private List<Point> vertices = new ArrayList<>(); + + public void addVertex(Point point) { + // if (!vertices.isEmpty()) { //before adding this point + // vertices.get(vertices.size() - 1).next = point; + // point.prev = vertices.get(vertices.size() - 1); + // } + vertices.add(point); + } + + public List<Point> getVertices() { + // return new ArrayList<>(vertices); + return vertices; + } + + public void setVertices(List<Point> points) { + this.vertices = points; + } + + public int size() { + return vertices.size(); + } + + public Point get(int index) { + return vertices.get(index); + } + + public void clear() { + vertices.clear(); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/SWAB.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/SWAB.java new file mode 100644 index 00000000000..574f59e3bf4 --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/SWAB.java @@ -0,0 +1,536 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.*; +import java.util.*; + + +public class SWAB { + public static Object[] prefixSum(List<Point> points) { + int n = points.size(); + + // 预计算prefix sum从而可以加速L2误差计算 + double[] prefixSumX = new double[n]; + double[] prefixSumY = new double[n]; + double[] prefixSumX2 = new double[n]; + double[] prefixSumY2 = new double[n]; + double[] prefixSumXY = new double[n]; + + Point pend = points.get(points.size() - 1); // 最后一个点,按照时间戳递增排序所以应该baseX是最大时间戳,从而归一化 + double baseX = pend.x; // 归一化避免数值溢出,主要就是这个时间戳太大 + double baseY = pend.y; // 归一化避免数值溢出 + if (baseX == 0) { + baseX = 1; + } + if (baseY == 0) { + baseY = 1; + } + + Point p0 = points.get(0); + prefixSumX[0] = p0.x / baseX; + prefixSumY[0] = p0.y / baseY; + prefixSumX2[0] = (p0.x * p0.x) / (baseX * baseX); + prefixSumY2[0] = (p0.y * p0.y) / (baseY * baseY); + prefixSumXY[0] = (p0.x * p0.y) / (baseX * baseY); + + for (int i = 1; i < n; i++) { + Point p = points.get(i); + prefixSumX[i] = prefixSumX[i - 1] + (p.x / baseX); + prefixSumY[i] = prefixSumY[i - 1] + (p.y / baseY); + prefixSumX2[i] = prefixSumX2[i - 1] + (p.x * p.x) / (baseX * baseX); + prefixSumY2[i] = prefixSumY2[i - 1] + (p.y * p.y) / (baseY * baseY); + prefixSumXY[i] = prefixSumXY[i - 1] + (p.x * p.y) / (baseX * baseY); + } + + return new Object[]{prefixSumX, prefixSumY, prefixSumX2, prefixSumY2, prefixSumXY, baseX, baseY}; + } + + public static double myLA_withTimestamps(List<Point> points, Object[] prefixSum, int lx, int rx) { + // 默认joint=true, residual=true,即使用linear interpolation近似分段,且使用L2误差 + // lx~rx 左闭右闭 + // 使用prefix sum加速,但是注意防止溢出处理 + double x1 = points.get(lx).x; + double y1 = points.get(lx).y; + double x2 = points.get(rx).x; + double y2 = points.get(rx).y; + + double k = (y2 - y1) / (x2 - x1); + double b = (y1 * x2 - y2 * x1) / (x2 - x1); + + // 计算L2误差,使用prefix sum加速计算 + double[] prefixSumX = (double[]) prefixSum[0]; + double[] prefixSumY = (double[]) prefixSum[1]; + double[] prefixSumX2 = (double[]) prefixSum[2]; + double[] prefixSumY2 = (double[]) prefixSum[3]; + double[] prefixSumXY = (double[]) prefixSum[4]; + double baseX = (double) prefixSum[5]; + double baseY = (double) prefixSum[6]; + + double sumX, sumY, sumX2, sumY2, sumXY; + + if (lx > 0) { + sumX = prefixSumX[rx] - prefixSumX[lx - 1]; + sumY = prefixSumY[rx] - prefixSumY[lx - 1]; + sumX2 = prefixSumX2[rx] - prefixSumX2[lx - 1]; + sumY2 = prefixSumY2[rx] - prefixSumY2[lx - 1]; + sumXY = prefixSumXY[rx] - prefixSumXY[lx - 1]; + } else { // lx=0 + sumX = prefixSumX[rx]; + sumY = prefixSumY[rx]; + sumX2 = prefixSumX2[rx]; + sumY2 = prefixSumY2[rx]; + sumXY = prefixSumXY[rx]; + } + + // 计算L2误差,注意归一化还原 + return sumY2 * baseY * baseY + + k * k * sumX2 * baseX * baseX + + 2 * k * b * sumX * baseX + - 2 * b * sumY * baseY + - 2 * k * sumXY * baseX * baseY + + b * b * (rx - lx + 1); + } + + public static double myLA_withTimestamps(List<Point> points, int lx, int rx) { + // 默认joint=true, residual=true,即使用linear interpolation近似分段,且使用L2误差 + // lx~rx 左闭右闭 + // 没有使用prefix sum加速 + double x1 = points.get(lx).x; + double y1 = points.get(lx).y; + double x2 = points.get(rx).x; + double y2 = points.get(rx).y; + + double k = (y2 - y1) / (x2 - x1); + double b = (y1 * x2 - y2 * x1) / (x2 - x1); + + double tmp = 0; + for (int i = lx; i <= rx; i++) { + double e = (k * points.get(i).x + b - points.get(i).y) * (k * points.get(i).x + b - points.get(i).y); + tmp += e; + } + return tmp; + } + + public static List<Point> seg_bottomUp_maxerror_withTimestamps( + List<Point> points, double maxError, Object[] prefixSum, boolean debug) { + int total = points.size(); + if (total < 3) { + return points; // 不足 3 个点无法形成三角形 + } + + // 每个triangle对应两个相邻的待合并的分段 + int nTriangles = total - 2; + Triangle[] triangles = new Triangle[nTriangles]; + + // 创建所有三角形并计算初始面积 + points.get(0).prev = null; + points.get(0).next = points.get(1); +// points.get(0).index = 0; + points.get(total - 1).next = null; + points.get(total - 1).prev = points.get(total - 2); +// points.get(total - 1).index = points.size() - 1; + for (int i = 1; i < total - 1; i++) { + int index1 = i - 1, index2 = i, index3 = i + 1; + double mc; + if (prefixSum == null) { + mc = myLA_withTimestamps(points, index1, index3); + } else { + mc = myLA_withTimestamps(points, prefixSum, index1, index3); + } + triangles[i - 1] = new Triangle(index1, index2, index3, mc); + + // 初始化点的状态 用于最后在线采样结果顺序输出未淘汰点 + points.get(i).prev = points.get(i - 1); + points.get(i).next = points.get(i + 1); +// points.get(i).index = i; // for swab usage + } + + // 设置三角形的前后关系 + for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 + triangles[i].prev = (i == 0 ? null : triangles[i - 1]); + triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); + } + + // 使用优先队列构建 minHeap + PriorityQueue<Triangle> triangleHeap = + new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); + Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? + + int fakeCnt = 0; + while (!triangleHeap.isEmpty() && triangleHeap.peek().area < maxError) { // TODO + // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 + // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 + Triangle tri = triangleHeap.poll(); // O(logn) + + // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity + // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 + if (tri.isDeleted) { + fakeCnt--; // 取出了一个被标记删除点 + if (debug) { + System.out.println( + ">>>bottom-up, remaining " + + triangleHeap.size() + + " triangles (including those marked for removal)"); + } + continue; + } + + // 真正的淘汰点 + // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 + if (debug) { + System.out.println("(1) eliminate " + points.get(tri.indices[1])); + } + points.get(tri.indices[1]).markEliminated(); + + // 更新相邻三角形 + if (tri.prev != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + + // 1. 处理旧的tri.prev被标记删除的事情(角色替代) + // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) complexity! + tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 + + Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection + // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) + + // 2. 处理pi被淘汰引起tri.prev被更新的事情 + // 前一个三角形连到后一个三角形 + tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! + tri.prev.indices[2] = tri.indices[2]; + + double mc; + if (prefixSum == null) { + mc = myLA_withTimestamps(points, + tri.prev.indices[0], tri.prev.indices[2]); + } else { + mc = myLA_withTimestamps(points, + prefixSum, tri.prev.indices[0], tri.prev.indices[2]); + } + tri.prev.area = mc; + if (debug) { + System.out.println("(2) updating point on the left " + + points.get(tri.prev.indices[1]) + + ", ranging [" + tri.prev.indices[0] + "," + tri.prev.indices[2] + "]"); + } + + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add来显式重新插入修改后的元素 + triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } + + if (tri.next != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + + // 1. 处理旧的tri.next被标记删除的事情(角色替代) + // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) complexity + tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 + + Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection + // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) + + if (tri.prev != null) { + tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! + } + + // 2. 处理pi被淘汰引起tri.next被更新的事情 + tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 + tri.next.indices[0] = tri.indices[0]; + + double mc; + if (prefixSum == null) { + mc = myLA_withTimestamps(points, + tri.next.indices[0], tri.next.indices[2]); + } else { + mc = myLA_withTimestamps(points, + prefixSum, tri.next.indices[0], tri.next.indices[2]); + } + tri.next.area = mc; + if (debug) { + System.out.println("(3) updating point on the right " + + points.get(tri.next.indices[1]) + + ", ranging [" + tri.next.indices[0] + "," + tri.next.indices[2] + "]"); + } + + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add 来显式重新插入修改后的元素 + triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } + if (debug) { + System.out.println( + ">>>bottom-up, remaining " + + triangleHeap.size() + + " triangles (including those marked for removal)"); + } + } + + List<Point> onlineSampled = new ArrayList<>(); + Point start = points.get(0); + Point end = points.get(points.size() - 1); + while (start != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 + onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity + start = start.next; // when e=0, only traversing three points pa pi pb + } + onlineSampled.add(end); + return onlineSampled; + } + + public static int nextSlidingWindowWithTimestamps(List<Point> points, double maxError, Object[] prefixSum) { + if (points.size() <= 1) { + return points.size() - 1; + } + int i = 2; // 从第二个点开始 + if (prefixSum == null) { + while (i < points.size() && myLA_withTimestamps(points, 0, i) < maxError) { + i++; // 窗口扩大 + } + } else { + while (i < points.size() && myLA_withTimestamps(points, prefixSum, 0, i) < maxError) { + i++; // 窗口扩大 + } + } + return i - 1; // 从0开始,并且是从0到i-1的左闭右闭 + } + + + public static List<Point> swab_framework(List<Point> points, double maxError, int m, boolean debug) { + // 在这里给Point添加全局排序idx, 用于全局定位 + for (int i = 0; i < points.size(); i++) { + points.get(i).index = i; + } + + if (debug) { + System.out.println("data length=" + points.size() + ", max_error=" + maxError + ", m=" + m); + } + // m在这里的含义是分段数,虽然SWAB不能直接控制分段数,但是也需要这个参数来确定buffer大小 + int bs = (int) Math.floor((double) points.size() / m * 6); // buffer size + int lb = bs / 2, ub = bs * 2; + if (debug) { + System.out.println("bs=" + bs + ", lb=" + lb + ", ub=" + ub); + } + + int buffer_startIdx = 0; // 左闭 + int buffer_endIdx = bs; // 右开。 + // 注意虽然joint,但是buffer和remaining points是断开的 + // buffer: [0:bs] 左闭右开 + // remaining: [bs:] 左闭 + +// List<Point> w = points.subList(0, bs); // 浅复制 左闭右开 +// List<Point> remaining_points = points.subList(bs, points.size()); // TODO + + List<Point> segTs = new ArrayList<>(); // TODO + segTs.add(points.get(0)); //全局首点 + +// int base = 0; // global base +// if (debug) { +// System.out.println("base=" + base); +// } + boolean needBottomUp = true; // false when only output, no input + int reuseIdx = 0; // TODO + List<Point> sampled = Collections.emptyList(); // TODO + while (true) { + if (debug) { + System.out.println("#####################################################"); + System.out.println("buffer data=" + (buffer_endIdx - buffer_startIdx) + + ", remaining data=" + (points.size() - buffer_endIdx)); + System.out.println("reBottomUp:" + needBottomUp); + } + + if (needBottomUp) { + + if (debug) { + System.out.println(">>>>bottom up on the buffer"); + } + reuseIdx = 0; + sampled = seg_bottomUp_maxerror_withTimestamps( + points.subList(buffer_startIdx, buffer_endIdx), + maxError, null, false); + // TODO prefix sum acc + + + // TODO eBUG also +// sampled = eBUG.buildEffectiveArea(points.subList(buffer_startIdx, buffer_endIdx), 0, false, m); + } + + if (sampled.size() - reuseIdx >= 2) { // 左边输出一个segment + if (debug) { + System.out.println(">>>>left output a segment:[" + + sampled.get(0 + reuseIdx).index + "," + sampled.get(1 + reuseIdx) + "]"); + } +// Map<String, Integer> firstSegment = segment.get(0); +// firstSegment.put("lx", firstSegment.get("lx") + base); +// firstSegment.put("rx", firstSegment.get("rx") + base); +// sampled.get(1).index += base; // # note that no need to add base later for segment(1) + segTs.add(sampled.get(1 + reuseIdx)); + + buffer_startIdx = sampled.get(1 + reuseIdx).index; + +// if (sampled.size() == 2) { +// // 更新buffer w +// buffer_startIdx = buffer_endIdx - 1; // 注意左开右闭 +//// w = w.subList(w.size() - 1, w.size()); // last point in buffer for joint +// +// // 更新base +// // NOTE python start from 0 not 1 +// // NOTE segment(0) rx already add base! i.e., segment(0) rx is already global position +//// base = sampled.get(1).index; +// } else { +// // 更新buffer w +// int out = sampled.get(1).index - sampled.get(0).index + 1; +// +// // 更新base +// w = w.subList(out - 1, w.size()); // last point in buffer for joint +// buffer_startIdx = sampled.get(1).index; +//// base = base + sampled.get(1).index; // 对于joint来说,这个既是第一段的结尾又是第二段的开始位置 +// } + } + + if (debug) { + System.out.println("buffer data: [" + buffer_startIdx + "," + buffer_endIdx + "]"); + System.out.println("---------------------------------"); + } + if (buffer_endIdx >= points.size()) { // TODO no more point added into buffer w + if (debug) { + System.out.println(">>>>no more data remaining"); + } + // Add remaining segments and break if no more input data + if (sampled.size() - reuseIdx > 2) { + // 注意此时segment并没有把上面“左边输出一个segment”的第一个segment pop出去 + // 更新base + // 这次不需要re-bottomUp了,所以回退到本次分段起点,这样后面的相对位置加上base才是绝对位置 + // NOTE segments does NOT have their index recalculated by bottom-up to start from 0, + // therefore minus the previously added segment[1]["lx"] +// base = base - sampled.get(1).index; + for (int k = 2 + reuseIdx; k < sampled.size(); k++) { +// Map<String, Integer> seg = segment.get(k); +// seg.put("lx", seg.get("lx") + base); +// seg.put("rx", seg.get("rx") + base); + // 注意此时segment并没有把上面“左边输出一个segment”的第一个segment pop出去! + // 所以要从第二个segment开始继续输出,同时注意python从0开始计数 +// sampled.get(k).index += base; + segTs.add(sampled.get(k)); + } + } + break; + } + + if ((buffer_endIdx - buffer_startIdx) >= lb) { + // buffer w上面已经更新了,已经是去除了左边输出的第一个segment之后的剩余的点 + if (debug) { + System.out.println(">>>>no need adding the sliding segment from right because len(w) >= lb"); + } + needBottomUp = false; + + // 更新base + // 这次不需要re-bottomUp了,所以回退到本次分段起点,这样后面的相对位置加上base才是绝对位置 + // NOTE segments does NOT have their index recalculated by bottom-up to start from 0, + // therefore minus the previously added segment[1]["lx"] +// base = base - sampled.get(1).index; + + // 注意此步骤不要在更新base之前执行!!!因为上面更新base时假设“左边输出一个segment”的第一个segment还没有pop + // 注意这里真的把上面“左边输出一个segment”的第一个segment pop出去了, + // 因为下一轮迭代要直接用剩余的segment +// segment.remove(0); // Pop the first segment +// sampled.remove(0); // TODO 这个复杂度? + reuseIdx++; // TODO + if (debug) { + System.out.println("next iteration uses the remaining segment without re-bottomUp, reuseIdx=" + reuseIdx); + } + + // # assume lb>1, at least two segments + } else { + if (debug) { + System.out.println(">>>>adding the sliding segment from right because len(w) < lb"); + } + needBottomUp = true; // 加了新的数据进来意味着下一轮里就要re-bottomUp + + while ((buffer_endIdx - buffer_startIdx) < lb && (points.size() - buffer_endIdx) > 0) { + // 从0开始,并且rx是从0开始的右闭 [0,rx]一共rx+1个点 + List<Point> remaining_points = points.subList(buffer_endIdx, points.size()); + int rx = nextSlidingWindowWithTimestamps(remaining_points, maxError, null); // TODO prefixsum加速 + if ((buffer_endIdx - buffer_startIdx) + (rx + 1) > ub) { + // # avoid more than ub + rx = ub - (buffer_endIdx - buffer_startIdx) - 1; + } + buffer_endIdx += (rx + 1); // TODO +// w.addAll(remaining_points.subList(0, rx + 1)); // TODO +// remaining_points = remaining_points.subList(rx + 1, remaining_points.size()); + if (debug) { + System.out.println("input len=" + (rx + 1)); + } + } + } + + if (debug) { + System.out.println("buffer data: [" + buffer_startIdx + "," + buffer_endIdx + "), " + + "remaining data: [" + buffer_endIdx + "," + points.size() + ")"); + if ((buffer_endIdx - buffer_startIdx) < lb) { + System.out.println("warn less"); + } + if ((buffer_endIdx - buffer_startIdx) > ub) { + System.out.println("warn more"); + } + } + } + return segTs; + } + + public static void main(String[] args) { + Random rand = new Random(10); + String input = "D:\\datasets\\regular\\tmp2.csv"; + boolean hasHeader = false; + int timeIdx = 0; + int valueIdx = 1; + int N = 100; +// Polyline polyline = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (int i = 0; i < polyline.size(); i++) { + Point point = polyline.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(polyline.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + + long startTime = System.currentTimeMillis(); + Object[] prefixSum = prefixSum(polyline.getVertices()); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken to precompute prefix sum: " + (endTime - startTime) + "ms"); + + double maxError = 1000000000; + int m = 10; + + // 33s worst-case O(n2)因为没有prefix sum加速计算L2误差,但是实际达不到worst-case所以实际运行不会那么大耗时 + startTime = System.currentTimeMillis(); + List<Point> sampled = swab_framework(polyline.getVertices(), maxError, m, true); + endTime = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime - startTime) + "ms"); + + for (Point p : sampled) { + System.out.println(p); + } + System.out.println(sampled.size()); + + try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // 写入字符串 + for (int i = 0; i < sampled.size(); i++) { + writer.println(sampled.get(i).x + "," + sampled.get(i).y); + } + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java index 385362b9e6b..5823ca86c3d 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java @@ -15,7 +15,7 @@ public class Test1 { Polyline polyline = new Polyline(); List<Polyline> polylineList = new ArrayList<>(); Random rand = new Random(10); - int n = 10000; + int n = 20000; int eParam = 1; int p = 10; diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test4.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test4.java new file mode 100644 index 00000000000..ada4b47b14e --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test4.java @@ -0,0 +1,79 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import static org.apache.iotdb.db.query.eBUG.SWAB.myLA_withTimestamps; +import static org.apache.iotdb.db.query.eBUG.SWAB.prefixSum; + +public class Test4 { + // 用于测试使用prefix sum加速L2误差计算的加速效果 + public static void main(String[] args) { + Random rand = new Random(10); + String input = "D:\\datasets\\regular\\tmp2.csv"; + boolean hasHeader = false; + int timeIdx = 0; + int valueIdx = 1; + int N = 2000_0000; +// Polyline polyline = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } +// try (FileWriter writer = new FileWriter("raw.csv")) { +// // 写入CSV头部 +// writer.append("x,y,z\n"); +// +// // 写入每个点的数据 +// for (int i = 0; i < polyline.size(); i++) { +// Point point = polyline.get(i); +// writer.append(point.x + "," + point.y + "," + point.z + "\n"); +// } +// System.out.println(polyline.size() + " Data has been written"); +// } catch (IOException e) { +// System.out.println("Error writing to CSV file: " + e.getMessage()); +// } + + long startTime = System.currentTimeMillis(); + Object[] prefixSum = prefixSum(polyline.getVertices()); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken to precompute prefix sum: " + (endTime - startTime) + "ms"); + + double error; + + List<Double> error1 = new ArrayList<>(); + startTime = System.currentTimeMillis(); + for (int lx = 0; lx < polyline.size() / 2; lx += 10000) { + error = myLA_withTimestamps(polyline.getVertices(), prefixSum, lx, polyline.size() - 1); + System.out.println(lx + "," + error); + error1.add(error); + } + endTime = System.currentTimeMillis(); + System.out.println("Time taken to compute L2 error with prefix sum: " + (endTime - startTime) + "ms"); + + List<Double> error2 = new ArrayList<>(); + startTime = System.currentTimeMillis(); + for (int lx = 0; lx < polyline.size() / 2; lx += 10000) { + error = myLA_withTimestamps(polyline.getVertices(), lx, polyline.size() - 1); + System.out.println(lx + "," + error); + error2.add(error); + } + endTime = System.currentTimeMillis(); + System.out.println("Time taken to compute L2 error without prefix sum: " + (endTime - startTime) + "ms"); + + try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // 写入字符串 + for (int i = 0; i < error1.size(); i++) { + writer.println(error1.get(i) + "," + error2.get(i) + "," + (error2.get(i) - error1.get(i))); + } + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test5.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test5.java new file mode 100644 index 00000000000..c9ac846a0b2 --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test5.java @@ -0,0 +1,64 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.*; +import java.util.List; +import java.util.Random; + +import static org.apache.iotdb.db.query.eBUG.SWAB.seg_bottomUp_maxerror_withTimestamps; + +public class Test5 { + // 用于验证java bottomUpMaxError实现正确性 + public static void main(String[] args) { + Random rand = new Random(10); + String input = "D:\\datasets\\regular\\tmp2.csv"; + boolean hasHeader = false; + int timeIdx = 0; + int valueIdx = 1; + int N = 2000; +// Polyline polyline = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (int i = 0; i < polyline.size(); i++) { + Point point = polyline.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(polyline.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + + long startTime = System.currentTimeMillis(); + Object[] prefixSum = SWAB.prefixSum(polyline.getVertices()); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken to precompute prefix sum: " + (endTime - startTime) + "ms"); + + startTime = System.currentTimeMillis(); + double maxError = 5000; + List<Point> sampled = seg_bottomUp_maxerror_withTimestamps(polyline.getVertices(), maxError, prefixSum, false); + endTime = System.currentTimeMillis(); + System.out.println("Time taken to : " + (endTime - startTime) + "ms"); + + for (Point p : sampled) { + System.out.println(p); + } + System.out.println(sampled.size()); + + try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // 写入字符串 + for (int i = 0; i < sampled.size(); i++) { + writer.println(sampled.get(i).x + "," + sampled.get(i).y); + } + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test6.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test6.java new file mode 100644 index 00000000000..75f288a061c --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test6.java @@ -0,0 +1,89 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.*; +import java.util.List; +import java.util.Random; + +import static org.apache.iotdb.db.query.eBUG.SWAB.prefixSum; +import static org.apache.iotdb.db.query.eBUG.SWAB.seg_bottomUp_maxerror_withTimestamps; + +public class Test6 { + // 用于实测bottomUp-L2-with/without prefix sum acceleration和eBUG算法的耗时比较 + public static void main(String[] args) { + Random rand = new Random(10); + String input = "D:\\datasets\\regular\\tmp2.csv"; + boolean hasHeader = false; + int timeIdx = 0; + int valueIdx = 1; + int N = 1000_0000; +// Polyline polyline = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (int i = 0; i < polyline.size(); i++) { + Point point = polyline.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(polyline.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + + long startTime = System.currentTimeMillis(); + Object[] prefixSum = prefixSum(polyline.getVertices()); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken to precompute prefix sum: " + (endTime - startTime) + "ms"); + + double maxError = 500000; + + // 33s worst-case O(n2)因为没有prefix sum加速计算L2误差,但是实际达不到worst-case所以实际运行不会那么大耗时 + startTime = System.currentTimeMillis(); + List<Point> sampled = seg_bottomUp_maxerror_withTimestamps(polyline.getVertices(), maxError, null, false); + endTime = System.currentTimeMillis(); + System.out.println("Time taken to bottomUp-L2 without prefix sum acceleration: " + (endTime - startTime) + "ms"); + System.out.println(sampled.size()); + + // 30s worst case O(nlogn)主要就是heap的操作耗时了 + startTime = System.currentTimeMillis(); + sampled = seg_bottomUp_maxerror_withTimestamps(polyline.getVertices(), maxError, prefixSum, false); + endTime = System.currentTimeMillis(); + System.out.println("Time taken to bottomUp-L2 with prefix sum acceleration:" + (endTime - startTime) + "ms"); + System.out.println(sampled.size()); + + // 39s worst-case O(n2)但是实际达不到worst-case所以实际运行不会那么大耗时。 + // 不过还是会比L2误差的耗时要大,毕竟AD计算比L2更复杂一些虽然都是线性,但是常数大一些 + startTime = System.currentTimeMillis(); + sampled = eBUG.buildEffectiveArea(polyline, 1000_0000, false, 3591486); + endTime = System.currentTimeMillis(); + System.out.println("Time taken to eBUG with e>n-3: " + (endTime - startTime) + "ms"); + System.out.println(sampled.size()); + + // 33s worst case O(nlogn)主要就是heap的操作耗时了 + startTime = System.currentTimeMillis(); + sampled = eBUG.buildEffectiveArea(polyline, 0, false, 3591486); + endTime = System.currentTimeMillis(); + System.out.println("Time taken to eBUG with e=0: " + (endTime - startTime) + "ms"); + System.out.println(sampled.size()); + +// for (Point p : sampled) { +// System.out.println(p); +// } + + try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // 写入字符串 + for (int i = 0; i < sampled.size(); i++) { + writer.println(sampled.get(i).x + "," + sampled.get(i).y); + } + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java index 86658e24385..2f76097918e 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java @@ -1,204 +1,254 @@ package org.apache.iotdb.db.query.eBUG; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.List; +import java.util.stream.Stream; public class Tool { - // 计算三角形面积 - public static double triArea(Point d0, Point d1, Point d2) { - double dArea = ((d1.x - d0.x) * (d2.y - d0.y) - (d2.x - d0.x) * (d1.y - d0.y)) / 2.0; - return (dArea > 0.0) ? dArea : -dArea; // abs - } - - // 计算简单多边形(边不交叉)的面积(鞋带公式) - public static double calculatePolygonArea(List<Point> points) { - // points多边形顶点,要求按照逆时针或者顺时针的顺序枚举,否则鞋带公式无法给出正确结果 - int n = points.size(); - double area = 0; - for (int i = 0; i < n; i++) { - int next = (i + 1) % n; - area += points.get(i).x * points.get(next).y - points.get(next).x * points.get(i).y; + + public static Polyline readFromFile(String input, boolean hasHeader, int timeIdx, int valueIdx, int N) { + Polyline polyline = new Polyline(); + + // Using Files.lines() for memory-efficient line-by-line reading + try (Stream<String> lines = Files.lines(Paths.get(input))) { + Iterator<String> iterator = lines.iterator(); + int lineNumber = 0; + + // Skip the header line if necessary + if (hasHeader && iterator.hasNext()) { + iterator.next(); // Skip the header + } + + // Process each line + while (iterator.hasNext()) { + lineNumber++; + String line = iterator.next(); + String[] columns = line.split(","); + + if (columns.length > Math.max(timeIdx, valueIdx)) { + double time; + if (timeIdx < 0) { + time = lineNumber; + } else { + time = Double.parseDouble(columns[timeIdx]); + } + double value = Double.parseDouble(columns[valueIdx]); + polyline.addVertex(new Point(time, value)); + // System.out.println("Line " + lineNumber + " - Time: " + time + ", + // Value: " + value); + } else { + System.out.println("Line " + lineNumber + " is malformed (not enough columns)."); + } + if (N > 0 && lineNumber >= N) { + // N>0控制点数,否则N<=0表示读全部数据 + break; + } + } + } catch (IOException e) { + e.printStackTrace(); + } + return polyline; } - return Math.abs(area) / 2.0; - } - - // 计算两个向量的叉积 - public static double crossProduct(double x1, double y1, double x2, double y2) { - // >0: (x2,y2)在(x1,y1)的逆时针方向 - // <0: (x2,y2)在(x1,y1)的顺时针方向 - // =0: 平行或共线 - return x1 * y2 - y1 * x2; - } - - // 判断两条线段是否相交并计算交点 - // L1包含一个线段的两个端点,L2包含另一个线段的两个端点 - public static Object[] lineIntersection(Point[] L1, Point[] L2) { - double x1 = L1[0].x, y1 = L1[0].y, x2 = L1[1].x, y2 = L1[1].y; - double x3 = L2[0].x, y3 = L2[0].y, x4 = L2[1].x, y4 = L2[1].y; - - // 判断是否相交(叉积) - double d1 = crossProduct(x2 - x1, y2 - y1, x3 - x1, y3 - y1); - double d2 = crossProduct(x2 - x1, y2 - y1, x4 - x1, y4 - y1); - double d3 = crossProduct(x4 - x3, y4 - y3, x1 - x3, y1 - y3); - double d4 = crossProduct(x4 - x3, y4 - y3, x2 - x3, y2 - y3); - - // 如果叉积条件满足,表示有交点 - // d1*d2<0意味着P3、P4分别在L12的两边 - // d3*d4<0意味着P1、P2分别在L34的两边 - // 两个同时满足说明有交点 - if (d1 * d2 < 0 && d3 * d4 < 0) { - double denominator = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1); // 不可能为0(平行或共线),因为已经判断相交了 - double t1 = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / denominator; - double x = x1 + t1 * (x2 - x1); - double y = y1 + t1 * (y2 - y1); - return new Object[] {true, new Point(x, y)}; + + // 计算三角形面积 + public static double triArea(Point d0, Point d1, Point d2) { + double dArea = ((d1.x - d0.x) * (d2.y - d0.y) - (d2.x - d0.x) * (d1.y - d0.y)) / 2.0; + return (dArea > 0.0) ? dArea : -dArea; // abs } - // 检查是否起点或终点重合 - if ((x1 == x3 && y1 == y3) || (x1 == x4 && y1 == y4)) { - return new Object[] {true, new Point(x1, y1)}; + // 计算简单多边形(边不交叉)的面积(鞋带公式) + public static double calculatePolygonArea(List<Point> points) { + // points多边形顶点,要求按照逆时针或者顺时针的顺序枚举,否则鞋带公式无法给出正确结果 + int n = points.size(); + double area = 0; + for (int i = 0; i < n; i++) { + int next = (i + 1) % n; + area += points.get(i).x * points.get(next).y - points.get(next).x * points.get(i).y; + } + return Math.abs(area) / 2.0; } - if ((x2 == x3 && y2 == y3) || (x2 == x4 && y2 == y4)) { - return new Object[] {true, new Point(x2, y2)}; + + // 计算两个向量的叉积 + public static double crossProduct(double x1, double y1, double x2, double y2) { + // >0: (x2,y2)在(x1,y1)的逆时针方向 + // <0: (x2,y2)在(x1,y1)的顺时针方向 + // =0: 平行或共线 + return x1 * y2 - y1 * x2; } - return new Object[] {false, null}; - } - - // 计算总的多边形面积(通过时间序列扫描交点),默认要求点按照时间戳严格递增排列 - public static double total_areal_displacement( - List<Point> points, List<Point> points2, boolean debug) { - double totalArea = 0; - int i = 0, j = 0; // i for points, j for points2 - Point prevIntersection = null; - int prevI = -1, prevJ = -1; - - // List<double[]> intersectionCoords = new ArrayList<>(); - List<Double> areaList = new ArrayList<>(); - - while (i < points.size() - 1 && j < points2.size() - 1) { - if (debug) { - System.out.println("--------- " + i + " " + j + " ------------"); - } - - // 当前线段 - Point[] L1 = {points.get(i), points.get(i + 1)}; - Point[] L2 = {points2.get(j), points2.get(j + 1)}; - - // 判断是否有交点 - Object[] result = lineIntersection(L1, L2); - boolean isIntersect = (boolean) result[0]; - Point intersection = (Point) result[1]; - - if (isIntersect) { - // intersectionCoords.add(intersection); - - if (prevIntersection != null) { - // 构造多边形点集 - List<Point> polygon = new ArrayList<>(); // 按照顺时针/逆时针几何连接顺序枚举多边形的顶点 - polygon.add(prevIntersection); - if (debug) { - System.out.println("- start intersection: " + prevIntersection); - } - polygon.addAll(points.subList(prevI, i + 1)); // 添加当前线段的点,左闭右开 - // polygon.addAll(Arrays.asList(Arrays.copyOfRange(points, prevI, i + - // 1))); // 添加当前线段的点,左闭右开 - if (debug) { - System.out.println("- one side: " + points.subList(prevI, i + 1)); - } - polygon.add(intersection); - if (debug) { - System.out.println("- end intersection: " + intersection); - } - List<Point> tempPoints2 = points2.subList(prevJ, j + 1); - Collections.reverse(tempPoints2); // 添加另一序列的点 - polygon.addAll(tempPoints2); - if (debug) { - System.out.println("- another side: " + tempPoints2); - } - - // double[][] polygonArray = new double[polygon.size()][2]; - // for (int k = 0; k < polygon.size(); k++) { - // polygonArray[k] = polygon.get(k); - // } - - // 计算多边形面积,注意polygon里的点一定是按照顺时针/逆时针几何连接顺序枚举的多边形顶点 - double area = calculatePolygonArea(polygon); - if (debug) { - System.out.println("Area = " + area); - } - totalArea += area; - areaList.add(area); + // 判断两条线段是否相交并计算交点 + // L1包含一个线段的两个端点,L2包含另一个线段的两个端点 + public static Object[] lineIntersection(Point[] L1, Point[] L2) { + double x1 = L1[0].x, y1 = L1[0].y, x2 = L1[1].x, y2 = L1[1].y; + double x3 = L2[0].x, y3 = L2[0].y, x4 = L2[1].x, y4 = L2[1].y; + + // 判断是否相交(叉积) + double d1 = crossProduct(x2 - x1, y2 - y1, x3 - x1, y3 - y1); + double d2 = crossProduct(x2 - x1, y2 - y1, x4 - x1, y4 - y1); + double d3 = crossProduct(x4 - x3, y4 - y3, x1 - x3, y1 - y3); + double d4 = crossProduct(x4 - x3, y4 - y3, x2 - x3, y2 - y3); + + // 如果叉积条件满足,表示有交点 + // d1*d2<0意味着P3、P4分别在L12的两边 + // d3*d4<0意味着P1、P2分别在L34的两边 + // 两个同时满足说明有交点 + if (d1 * d2 < 0 && d3 * d4 < 0) { + double denominator = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1); // 不可能为0(平行或共线),因为已经判断相交了 + double t1 = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / denominator; + double x = x1 + t1 * (x2 - x1); + double y = y1 + t1 * (y2 - y1); + return new Object[]{true, new Point(x, y)}; } - prevIntersection = intersection; - prevI = i + 1; - prevJ = j + 1; + // 检查是否起点或终点重合 + if ((x1 == x3 && y1 == y3) || (x1 == x4 && y1 == y4)) { + return new Object[]{true, new Point(x1, y1)}; + } + if ((x2 == x3 && y2 == y3) || (x2 == x4 && y2 == y4)) { + return new Object[]{true, new Point(x2, y2)}; + } + + return new Object[]{false, null}; + } + + // 计算总的多边形面积(通过时间序列扫描交点),默认要求点按照时间戳严格递增排列 + public static double total_areal_displacement( + List<Point> points, List<Point> points2, boolean debug) { + double totalArea = 0; + int i = 0, j = 0; // i for points, j for points2 + Point prevIntersection = null; + int prevI = -1, prevJ = -1; + + // List<double[]> intersectionCoords = new ArrayList<>(); + List<Double> areaList = new ArrayList<>(); + + while (i < points.size() - 1 && j < points2.size() - 1) { + if (debug) { + System.out.println("--------- " + i + " " + j + " ------------"); + } + + // 当前线段 + Point[] L1 = {points.get(i), points.get(i + 1)}; + Point[] L2 = {points2.get(j), points2.get(j + 1)}; + + // 判断是否有交点 + Object[] result = lineIntersection(L1, L2); + boolean isIntersect = (boolean) result[0]; + Point intersection = (Point) result[1]; + + if (isIntersect) { + // intersectionCoords.add(intersection); + + if (prevIntersection != null) { + // 构造多边形点集 + List<Point> polygon = new ArrayList<>(); // 按照顺时针/逆时针几何连接顺序枚举多边形的顶点 + polygon.add(prevIntersection); + if (debug) { + System.out.println("- start intersection: " + prevIntersection); + } + polygon.addAll(points.subList(prevI, i + 1)); // 添加当前线段的点,左闭右开 + // polygon.addAll(Arrays.asList(Arrays.copyOfRange(points, prevI, i + + // 1))); // 添加当前线段的点,左闭右开 + if (debug) { + System.out.println("- one side: " + points.subList(prevI, i + 1)); + } + polygon.add(intersection); + if (debug) { + System.out.println("- end intersection: " + intersection); + } + List<Point> tempPoints2 = points2.subList(prevJ, j + 1); + Collections.reverse(tempPoints2); // 添加另一序列的点 + polygon.addAll(tempPoints2); + if (debug) { + System.out.println("- another side: " + tempPoints2); + } + + // double[][] polygonArray = new double[polygon.size()][2]; + // for (int k = 0; k < polygon.size(); k++) { + // polygonArray[k] = polygon.get(k); + // } + + // 计算多边形面积,注意polygon里的点一定是按照顺时针/逆时针几何连接顺序枚举的多边形顶点 + double area = calculatePolygonArea(polygon); + if (debug) { + System.out.println("Area = " + area); + } + totalArea += area; + areaList.add(area); + } + + prevIntersection = intersection; + prevI = i + 1; + prevJ = j + 1; + if (debug) { + System.out.println( + "This intersection = " + + intersection + + ", next polygon: side1 = " + + prevI + + ", side2 = " + + prevJ); + } + } + + int currentI = i; // 临时存储i + int currentJ = j; // 临时存储j + if (points.get(currentI + 1).x <= points2.get(currentJ + 1).x) { + i += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 + } + if (points.get(currentI + 1).x >= points2.get(currentJ + 1).x) { + j += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 + } + } // end of while + if (debug) { - System.out.println( - "This intersection = " - + intersection - + ", next polygon: side1 = " - + prevI - + ", side2 = " - + prevJ); + System.out.println(areaList); } - } - - int currentI = i; // 临时存储i - int currentJ = j; // 临时存储j - if (points.get(currentI + 1).x <= points2.get(currentJ + 1).x) { - i += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 - } - if (points.get(currentI + 1).x >= points2.get(currentJ + 1).x) { - j += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 - } - } // end of while - - if (debug) { - System.out.println(areaList); + + return totalArea; } - return totalArea; - } - - // 测试方法 - public static void main(String[] args) { - // 示例数据 - List<Point> points = new ArrayList<>(); - points.add(new Point(0, 0)); - points.add(new Point(1, 2)); - points.add(new Point(2, -20)); - points.add(new Point(3, 0)); - points.add(new Point(4, -1)); - points.add(new Point(5, -1.5)); - points.add(new Point(6, 0)); - - List<Point> points2 = new ArrayList<>(); - // points2.add(points.get(0)); - // points2.add(points.get(3)); - // points2.add(points.get(6)); - points2.add(new Point(1, -10)); - points2.add(new Point(3, 0)); - - double area = total_areal_displacement(points, points2, true); - System.out.println("Total area: " + area); - - // points = new ArrayList<>(); - // points.add(new Point(0, 0)); - // points.add(new Point(1, 2)); - // points.add(new Point(1.5, -10)); - // double area = calculatePolygonArea(points); - // System.out.println(area); - // - // Point[] L1 = new Point[2]; - // L1[0] = new Point(1, 2); - // L1[1] = new Point(2, -2); - // Point[] L2 = new Point[2]; - // L2[0] = new Point(0, 0); - // L2[1] = new Point(3, 0); - // Object[] res = lineIntersection(L1, L2); - // System.out.println(res[1]); - } + // 测试方法 + public static void main(String[] args) { + // 示例数据 + List<Point> points = new ArrayList<>(); + points.add(new Point(0, 0)); + points.add(new Point(1, 2)); + points.add(new Point(2, -20)); + points.add(new Point(3, 0)); + points.add(new Point(4, -1)); + points.add(new Point(5, -1.5)); + points.add(new Point(6, 0)); + + List<Point> points2 = new ArrayList<>(); + // points2.add(points.get(0)); + // points2.add(points.get(3)); + // points2.add(points.get(6)); + points2.add(new Point(1, -10)); + points2.add(new Point(3, 0)); + + double area = total_areal_displacement(points, points2, true); + System.out.println("Total area: " + area); + + // points = new ArrayList<>(); + // points.add(new Point(0, 0)); + // points.add(new Point(1, 2)); + // points.add(new Point(1.5, -10)); + // double area = calculatePolygonArea(points); + // System.out.println(area); + // + // Point[] L1 = new Point[2]; + // L1[0] = new Point(1, 2); + // L1[1] = new Point(2, -2); + // Point[] L2 = new Point[2]; + // L2[0] = new Point(0, 0); + // L2[1] = new Point(3, 0); + // Object[] res = lineIntersection(L1, L2); + // System.out.println(res[1]); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java index 201f99d2313..cc86a3fe8f9 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java @@ -28,389 +28,363 @@ import static org.apache.iotdb.db.query.eBUG.Tool.total_areal_displacement; import static org.apache.iotdb.db.query.eBUG.Tool.triArea; public class eBUG { - public static List<Point> findEliminated(Polyline lineToSimplify, int pa_idx, int pb_idx) { - // pa: left adjacent non-eliminated point of pi - // pb: right adjacent non-eliminated point of pi - // return a list of points, T'_max{0,k-e}[a:b] order by time in ascending order - - // 性质:最近淘汰的那一个点一定是位于pa~pb之间,因为正是这个点的淘汰才使得pi的significance要更新!至于其他全局更早淘汰的点则不一定位于a:b之间 - // pa,xxx,pi,xxx,pb - // when e=0,遍历点数就是3 - // when e>0,遍历点数大于等于4、小于等于e+3 - - List<Point> res = new ArrayList<>(); - Point start = lineToSimplify.get(pa_idx); - Point end = lineToSimplify.get(pb_idx); - // int cnt = 0; - while (start - != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 - res.add(start); - start = start.next; // when e=0, only traversing three points pa pi pb - // cnt += 1; - } - res.add(end); - // cnt += 1; - // System.out.println(cnt); // 3<=cnt<=e+3 - - return res; - } - - public static List<Point> buildEffectiveArea(Polyline lineToSimplify, int e, boolean debug) { - // precomputation mode - return buildEffectiveArea(lineToSimplify, e, debug, 0); - } - - public static List<Point> buildEffectiveArea( - Polyline lineToSimplify, int e, boolean debug, int m) { - if (e < 0) { - throw new IllegalArgumentException("Parameter e must be non-negative."); - } + public static List<Point> findEliminated(Polyline lineToSimplify, int pa_idx, int pb_idx) { + // pa: left adjacent non-eliminated point of pi + // pb: right adjacent non-eliminated point of pi + // return a list of points, T'_max{0,k-e}[a:b] order by time in ascending order + + // 性质:最近淘汰的那一个点一定是位于pa~pb之间,因为正是这个点的淘汰才使得pi的significance要更新!至于其他全局更早淘汰的点则不一定位于a:b之间 + // pa,xxx,pi,xxx,pb + // when e=0,遍历点数就是3 + // when e>0,遍历点数大于等于4、小于等于e+3 + + List<Point> res = new ArrayList<>(); + Point start = lineToSimplify.get(pa_idx); + Point end = lineToSimplify.get(pb_idx); + // int cnt = 0; + while (start != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 + res.add(start); + start = start.next; // when e=0, only traversing three points pa pi pb + // cnt += 1; + } + res.add(end); + // cnt += 1; + // System.out.println(cnt); // 3<=cnt<=e+3 - if (m > 2) { - System.out.println( - "online sampling mode, " - + "returning " - + m - + " sampled points sorted by time in ascending order"); - } else { - System.out.println( - "offline precomputation mode, " - + "returning each point sorted by dominated significance (DS) in ascending order"); + return res; } - // List<Point> results = lineToSimplify.getVertices(); // 浅复制 - // TODO 预计算结果改成按照bottom-up逐点淘汰顺序(DS递增)排列而不是按照时间戳,这样省去在线时对DS排序的过程 - // add的是Point引用,所以没有多用一倍的空间 - List<Point> resultsBottomUpEliminated = new ArrayList<>(); - - // 存储的是点的引用,这样可以修改原来序列里点的淘汰状态 - // 存储的是距离当前最新状态的滞后的尚未施加、待施加的e个淘汰点 - LinkedList<Point> laggedEliminatedPoints = new LinkedList<>(); - - int total = lineToSimplify.size(); - if (total < 3) { - return lineToSimplify.getVertices(); // 不足 3 个点无法形成三角形 + public static List<Point> buildEffectiveArea(Polyline lineToSimplify, int e, boolean debug) { + // precomputation mode + return buildEffectiveArea(lineToSimplify, e, debug, 0); } - int nTriangles = total - 2; - Triangle[] triangles = new Triangle[nTriangles]; - - // 创建所有三角形并计算初始面积 - lineToSimplify.get(0).prev = null; - lineToSimplify.get(0).next = lineToSimplify.get(1); - lineToSimplify.get(total - 1).next = null; - lineToSimplify.get(total - 1).prev = lineToSimplify.get(total - 2); - for (int i = 1; i < total - 1; i++) { - int index1 = i - 1, index2 = i, index3 = i + 1; - double area = - triArea( - lineToSimplify.get(index1), lineToSimplify.get(index2), lineToSimplify.get(index3)); - triangles[i - 1] = new Triangle(index1, index2, index3, area); - - // 初始化点的状态 for eBUG usage - lineToSimplify.get(i).prev = lineToSimplify.get(i - 1); - lineToSimplify.get(i).next = lineToSimplify.get(i + 1); + public static List<Point> buildEffectiveArea(List<Point> points, int e, boolean debug) { + // precomputation mode + Polyline polyline = new Polyline(); + polyline.setVertices(points); + return buildEffectiveArea(polyline, e, debug, 0); } - // 设置三角形的前后关系 - for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 - triangles[i].prev = (i == 0 ? null : triangles[i - 1]); - triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); + public static List<Point> buildEffectiveArea(List<Point> points, int e, boolean debug, int m) { + // precomputation mode + Polyline polyline = new Polyline(); + polyline.setVertices(points); + return buildEffectiveArea(polyline, e, debug, m); } - // 使用优先队列构建 minHeap - PriorityQueue<Triangle> triangleHeap = - new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); - Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? - - double previousEA = -1; - // while (!triangleHeap.isEmpty()) { // TODO 注意triangleHeap里装的是non-terminal point对应的三角形 - // TODO 在线采样m个点,也就是说最后留下m-2个non-terminal point - // 注意:triangleHeap里装的包括了标记删除的点!所以triangleHeap.size()不是真正的留下的未被淘汰点数! - // 因此需要用fakeCnt来统计heap里的非真实点数,这样triangleHeap.size()-fakeCnt就是真正的留下的未被淘汰点数 - int remainNonTerminalPoint = Math.max(0, m - 2); - int fakeCnt = 0; - while (triangleHeap.size() - fakeCnt > remainNonTerminalPoint) { - // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 - // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 - Triangle tri = triangleHeap.poll(); // O(logn) - - // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity - // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 - if (tri.isDeleted) { - fakeCnt--; // 取出了一个被标记删除点 - if (debug) { - System.out.println( - ">>>bottom-up, remaining " - + triangleHeap.size() - + " triangles (including those marked for removal)"); - } - continue; - } - - // 真正的淘汰点 - // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 - if (debug) { - System.out.println("(1) eliminate " + lineToSimplify.get(tri.indices[1])); - } - if (e > 0) { - if (laggedEliminatedPoints.size() == e) { - // 已经有e个滞后淘汰点了,为了把最近淘汰点加入,得先把最老的淘汰点施加上去 - Point removedPoint = laggedEliminatedPoints.removeFirst(); // 取出最早加入的淘汰点 复杂度1 - removedPoint.markEliminated(); // 注意是引用,所以改的内容后面可以看到 - } - laggedEliminatedPoints.addLast(lineToSimplify.get(tri.indices[1])); // 加入最新的淘汰点,滞后在这里先不施加 - } else { // e=0 没有滞后 立即标记删除 T'_k - lineToSimplify.get(tri.indices[1]).markEliminated(); - } - if (debug) { - System.out.println( - "the e most recently eliminated points (lagged):" + laggedEliminatedPoints); - } - - // 更新当前点的重要性(z 轴存储effective area,这是一个单调增的指标) - if (tri.area > previousEA) { - previousEA = tri.area; - } - // results.get(tri.indices[1]).z = previousEA; // dominated significance - lineToSimplify.get(tri.indices[1]).z = previousEA; // dominated significance - resultsBottomUpEliminated.add( - lineToSimplify.get(tri.indices[1])); // TODO add的是Point引用,所以没有多用一倍的空间 - if (debug) { - System.out.println(Arrays.toString(tri.indices) + ", Dominated Sig=" + previousEA); - } - - // 更新相邻三角形 - if (tri.prev != null) { - // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 - - // 1. 处理旧的tri.prev被标记删除的事情(角色替代) - // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) complexity! - tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 - - Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection - // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) - - // 2. 处理pi被淘汰引起tri.prev被更新的事情 - // 前一个三角形连到后一个三角形 - tri.prev.next = - tri.next; // ATTENTION!!!: 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! - tri.prev.indices[2] = tri.indices[2]; - - // e parameter - List<Point> pointsForSig = - findEliminated(lineToSimplify, tri.prev.indices[0], tri.prev.indices[2]); - if (debug) { - System.out.println( - "(2) update point on the left " + lineToSimplify.get(tri.prev.indices[1])); - System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); - for (Point point : pointsForSig) { - System.out.println("\t" + point); - } - } - List<Point> baseLine = new ArrayList<>(); - baseLine.add(pointsForSig.get(0)); - baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 - double sig = total_areal_displacement(pointsForSig, baseLine, false); - tri.prev.area = sig; - - if (debug) { - System.out.println("sig=" + sig); - double tmpTri = - triArea( - lineToSimplify.get(tri.prev.indices[0]), - lineToSimplify.get(tri.prev.indices[1]), - lineToSimplify.get(tri.prev.indices[2])); - System.out.println( - "\t" - + "tri=" - + tmpTri - + ", " - + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); + public static List<Point> buildEffectiveArea(Polyline lineToSimplify, int e, boolean debug, int m) { + if (e < 0) { + throw new IllegalArgumentException("Parameter e must be non-negative."); } - // 重新加入堆 - // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 - // 所以必须通过add来显式重新插入修改后的元素 - triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false - fakeCnt++; // 表示heap里多了一个被标记删除的假点 - } - - if (tri.next != null) { - // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + if (m > 2) { + System.out.println("online sampling mode, " + "returning " + m + " sampled points sorted by time in ascending order"); + } else { + System.out.println("offline precomputation mode, " + "returning each point sorted by dominated significance (DS) in ascending order"); + } - // 1. 处理旧的tri.next被标记删除的事情(角色替代) - // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) complexity - tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 + // List<Point> results = lineToSimplify.getVertices(); // 浅复制 + // TODO 预计算结果改成按照bottom-up逐点淘汰顺序(DS递增)排列而不是按照时间戳,这样省去在线时对DS排序的过程 + // add的是Point引用,所以没有多用一倍的空间 + List<Point> resultsBottomUpEliminated = new ArrayList<>(); - Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection - // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) + // 存储的是点的引用,这样可以修改原来序列里点的淘汰状态 + // 存储的是距离当前最新状态的滞后的尚未施加、待施加的e个淘汰点 + LinkedList<Point> laggedEliminatedPoints = new LinkedList<>(); - if (tri.prev != null) { - tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! + int total = lineToSimplify.size(); + if (total < 3) { + return lineToSimplify.getVertices(); // 不足 3 个点无法形成三角形 } - // 2. 处理pi被淘汰引起tri.next被更新的事情 - tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 - tri.next.indices[0] = tri.indices[0]; - - // e parameter - List<Point> pointsForSig = - findEliminated(lineToSimplify, tri.next.indices[0], tri.next.indices[2]); - if (debug) { - System.out.println( - "(2) updating point on the right " + lineToSimplify.get(tri.next.indices[1])); - System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); - for (Point point : pointsForSig) { - System.out.println("\t" + point); - } + int nTriangles = total - 2; + Triangle[] triangles = new Triangle[nTriangles]; + + // 创建所有三角形并计算初始面积 + lineToSimplify.get(0).prev = null; + lineToSimplify.get(0).next = lineToSimplify.get(1); + lineToSimplify.get(total - 1).next = null; + lineToSimplify.get(total - 1).prev = lineToSimplify.get(total - 2); + for (int i = 1; i < total - 1; i++) { + int index1 = i - 1, index2 = i, index3 = i + 1; + double area = triArea(lineToSimplify.get(index1), lineToSimplify.get(index2), lineToSimplify.get(index3)); + triangles[i - 1] = new Triangle(index1, index2, index3, area); + + // 初始化点的状态 for eBUG usage 用于快速按照按照时间顺序排列的滞后k个淘汰点状态下位于pa~pb的点;也用于最后在线采样结果顺序输出未淘汰点 + lineToSimplify.get(i).prev = lineToSimplify.get(i - 1); + lineToSimplify.get(i).next = lineToSimplify.get(i + 1); } - List<Point> baseLine = new ArrayList<>(); - baseLine.add(pointsForSig.get(0)); - baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 - double sig = total_areal_displacement(pointsForSig, baseLine, false); - tri.next.area = sig; - - if (debug) { - System.out.println("sig=" + sig); - double tmpTri = - triArea( - lineToSimplify.get(tri.next.indices[0]), - lineToSimplify.get(tri.next.indices[1]), - lineToSimplify.get(tri.next.indices[2])); - System.out.println( - "\t" - + "tri=" - + tmpTri - + ", " - + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); + + // 设置三角形的前后关系 + for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 + triangles[i].prev = (i == 0 ? null : triangles[i - 1]); + triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); } - // 重新加入堆 - // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 - // 所以必须通过add 来显式重新插入修改后的元素 - triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false - fakeCnt++; // 表示heap里多了一个被标记删除的假点 - } - if (debug) { - System.out.println( - ">>>bottom-up, remaining " - + triangleHeap.size() - + " triangles (including those marked for removal)"); - } - } + // 使用优先队列构建 minHeap + PriorityQueue<Triangle> triangleHeap = new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); + Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? + + double previousEA = -1; + // while (!triangleHeap.isEmpty()) { // TODO 注意triangleHeap里装的是non-terminal point对应的三角形 + // TODO 在线采样m个点,也就是说最后留下m-2个non-terminal point + // 注意:triangleHeap里装的包括了标记删除的点!所以triangleHeap.size()不是真正的留下的未被淘汰点数! + // 因此需要用fakeCnt来统计heap里的非真实点数,这样triangleHeap.size()-fakeCnt就是真正的留下的未被淘汰点数 + int remainNonTerminalPoint = Math.max(0, m - 2); + int fakeCnt = 0; + while (triangleHeap.size() - fakeCnt > remainNonTerminalPoint) { + // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 + // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 + Triangle tri = triangleHeap.poll(); // O(logn) + + // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity + // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 + if (tri.isDeleted) { + fakeCnt--; // 取出了一个被标记删除点 + if (debug) { + System.out.println(">>>bottom-up, remaining " + triangleHeap.size() + " triangles (including those marked for removal)"); + } + continue; + } + + // 真正的淘汰点 + // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 + if (debug) { + System.out.println("(1) eliminate " + lineToSimplify.get(tri.indices[1])); + } + if (e > 0) { + if (laggedEliminatedPoints.size() == e) { + // 已经有e个滞后淘汰点了,为了把最近淘汰点加入,得先把最老的淘汰点施加上去 + Point removedPoint = laggedEliminatedPoints.removeFirst(); // 取出最早加入的淘汰点 复杂度1 + removedPoint.markEliminated(); // 注意是引用,所以改的内容后面可以看到 + } + laggedEliminatedPoints.addLast(lineToSimplify.get(tri.indices[1])); // 加入最新的淘汰点,滞后在这里先不施加 + } else { // e=0 没有滞后 立即标记删除 T'_k + lineToSimplify.get(tri.indices[1]).markEliminated(); + } + if (debug) { + System.out.println("the e most recently eliminated points (lagged):" + laggedEliminatedPoints); + } + + // 更新当前点的重要性(z 轴存储effective area,这是一个单调增的指标) + if (tri.area > previousEA) { + previousEA = tri.area; + } + // results.get(tri.indices[1]).z = previousEA; // dominated significance + lineToSimplify.get(tri.indices[1]).z = previousEA; // dominated significance + resultsBottomUpEliminated.add(lineToSimplify.get(tri.indices[1])); // TODO add的是Point引用,所以没有多用一倍的空间 + if (debug) { + System.out.println(Arrays.toString(tri.indices) + ", Dominated Sig=" + previousEA); + } + + // 更新相邻三角形 + if (tri.prev != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + + // 1. 处理旧的tri.prev被标记删除的事情(角色替代) + // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) complexity! + tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 + + Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection + // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) + + // 2. 处理pi被淘汰引起tri.prev被更新的事情 + // 前一个三角形连到后一个三角形 + tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! + tri.prev.indices[2] = tri.indices[2]; + + // e parameter + double sig; + if (e > 0) { + List<Point> pointsForSig = findEliminated(lineToSimplify, tri.prev.indices[0], tri.prev.indices[2]); + if (debug) { + System.out.println("(2) update point on the left " + lineToSimplify.get(tri.prev.indices[1])); + System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); + for (Point point : pointsForSig) { + System.out.println("\t" + point); + } + } + List<Point> baseLine = new ArrayList<>(); + baseLine.add(pointsForSig.get(0)); + baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 + sig = total_areal_displacement(pointsForSig, baseLine, false); + } else { // 直接用三角形面积 + sig = triArea(lineToSimplify.get(tri.prev.indices[0]), lineToSimplify.get(tri.prev.indices[1]), lineToSimplify.get(tri.prev.indices[2])); + } + tri.prev.area = sig; + + if (debug) { + System.out.println("sig=" + sig); + double tmpTri = triArea(lineToSimplify.get(tri.prev.indices[0]), lineToSimplify.get(tri.prev.indices[1]), lineToSimplify.get(tri.prev.indices[2])); + System.out.println("\t" + "tri=" + tmpTri + ", " + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); + } + + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add来显式重新插入修改后的元素 + triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } + + if (tri.next != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + + // 1. 处理旧的tri.next被标记删除的事情(角色替代) + // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) complexity + tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 + + Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection + // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) + + if (tri.prev != null) { + tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! + } + + // 2. 处理pi被淘汰引起tri.next被更新的事情 + tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 + tri.next.indices[0] = tri.indices[0]; + + // e parameter + double sig; + if (e > 0) { + List<Point> pointsForSig = findEliminated(lineToSimplify, tri.next.indices[0], tri.next.indices[2]); + if (debug) { + System.out.println("(2) updating point on the right " + lineToSimplify.get(tri.next.indices[1])); + System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); + for (Point point : pointsForSig) { + System.out.println("\t" + point); + } + } + List<Point> baseLine = new ArrayList<>(); + baseLine.add(pointsForSig.get(0)); + baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 + sig = total_areal_displacement(pointsForSig, baseLine, false); + } else { // 直接用三角形面积 + sig = triArea(lineToSimplify.get(tri.next.indices[0]), lineToSimplify.get(tri.next.indices[1]), lineToSimplify.get(tri.next.indices[2])); + } + tri.next.area = sig; + + if (debug) { + System.out.println("sig=" + sig); + double tmpTri = triArea(lineToSimplify.get(tri.next.indices[0]), lineToSimplify.get(tri.next.indices[1]), lineToSimplify.get(tri.next.indices[2])); + System.out.println("\t" + "tri=" + tmpTri + ", " + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); + } + + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add 来显式重新插入修改后的元素 + triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } + if (debug) { + System.out.println(">>>bottom-up, remaining " + triangleHeap.size() + " triangles (including those marked for removal)"); + } + } - if (m > 2) { // online sampling mode - // 把滞后的淘汰点施加上去,然后返回在线采样结果(也就是返回剩余未被淘汰的点) - for (Point p : laggedEliminatedPoints) { - p.markEliminated(); - if (debug) { - System.out.println("apply lagged elimination of " + p); + if (m > 2) { // online sampling mode + // 把滞后的淘汰点施加上去,然后返回在线采样结果(也就是返回剩余未被淘汰的点) + for (Point p : laggedEliminatedPoints) { + p.markEliminated(); + if (debug) { + System.out.println("apply lagged elimination of " + p); + } + } + List<Point> onlineSampled = new ArrayList<>(); + Point start = lineToSimplify.get(0); + Point end = lineToSimplify.get(lineToSimplify.size() - 1); + while (start != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 + onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity + start = start.next; // when e=0, only traversing three points pa pi pb + } + onlineSampled.add(end); + return onlineSampled; + } else { // offline precomputation mode, for precomputing the dominated significance of each + // point + // return results; // 注意这就是lineToSimplify.getVertices() + // TODO + resultsBottomUpEliminated.add(lineToSimplify.get(0)); // 全局首点 + resultsBottomUpEliminated.add(lineToSimplify.get(lineToSimplify.size() - 1)); // 全局尾点 + return resultsBottomUpEliminated; } - } - List<Point> onlineSampled = new ArrayList<>(); - Point start = lineToSimplify.get(0); - Point end = lineToSimplify.get(lineToSimplify.size() - 1); - while (start - != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 - onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity - start = start.next; // when e=0, only traversing three points pa pi pb - } - onlineSampled.add(end); - return onlineSampled; - } else { // offline precomputation mode, for precomputing the dominated significance of each - // point - // return results; // 注意这就是lineToSimplify.getVertices() - // TODO - resultsBottomUpEliminated.add(lineToSimplify.get(0)); // 全局首点 - resultsBottomUpEliminated.add(lineToSimplify.get(lineToSimplify.size() - 1)); // 全局尾点 - return resultsBottomUpEliminated; } - } - - public static void main(String[] args) { - // 实验:预计算耗时关于两个参数e、n的变化规律,是否符合复杂度理论建模 - - // int eParam = 0; - // int[] eParamList = {0, 0, 1, 2, 3, 4, 5, 10, 14, 15, 16, 20, 30, 40, 50, 100, 200, - // 500, 1000, 2000, 5000, - // 10000, 50000, 10_0000, 50_0000, 100_0000, 200_0000, 300_0000, - // 500_0000, 800_0000, 1000_0000, - // 1500_0000, 2000_0000, 2500_0000, 3000_0000}; - int[] eParamList = {100_0000}; - for (int eParam : eParamList) { - try (PrintWriter writer = new PrintWriter(new File("exp_varyN_e" + eParam + ".csv"))) { - for (int n = 100_0000; n < 2000_0000; n += 200_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 - // TODO 注意 要有一个点是n=300w - - Polyline polyline = new Polyline(); - int seed = 1; - Random rand = new Random(seed); // TODO 注意seed在每轮里重设 - for (int i = 0; i < n; i++) { - double v = rand.nextInt(1000000); - polyline.addVertex(new Point(i, v)); - } - - long startTime = System.currentTimeMillis(); - List<Point> results = buildEffectiveArea(polyline, eParam, false, 0); - long endTime = System.currentTimeMillis(); - - System.out.println( - "n=" - + n - + ", e=" - + eParam - + ", Time taken to reduce points: " - + (endTime - startTime) - + "ms"); - writer.println(n + "," + eParam + "," + (endTime - startTime)); - System.out.println(results.size()); - - // // clear - // polyline.clear(); - // results.clear(); - // polyline = null; - // results = null; - // System.gc(); + + public static void main(String[] args) { + // 实验:预计算耗时关于两个参数e、n的变化规律,是否符合复杂度理论建模 + + // int eParam = 0; + // int[] eParamList = {0, 0, 1, 2, 3, 4, 5, 10, 14, 15, 16, 20, 30, 40, 50, 100, 200, + // 500, 1000, 2000, 5000, + // 10000, 50000, 10_0000, 50_0000, 100_0000, 200_0000, 300_0000, + // 500_0000, 800_0000, 1000_0000, + // 1500_0000, 2000_0000, 2500_0000, 3000_0000}; + int[] eParamList = {100_0000}; + for (int eParam : eParamList) { + try (PrintWriter writer = new PrintWriter(new File("exp_varyN_e" + eParam + ".csv"))) { + for (int n = 100_0000; n < 2000_0000; n += 200_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 + // TODO 注意 要有一个点是n=300w + + Polyline polyline = new Polyline(); + int seed = 1; + Random rand = new Random(seed); // TODO 注意seed在每轮里重设 + for (int i = 0; i < n; i++) { + double v = rand.nextInt(1000000); + polyline.addVertex(new Point(i, v)); + } + + long startTime = System.currentTimeMillis(); + List<Point> results = buildEffectiveArea(polyline, eParam, false, 0); + long endTime = System.currentTimeMillis(); + + System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); + writer.println(n + "," + eParam + "," + (endTime - startTime)); + System.out.println(results.size()); + + // // clear + // polyline.clear(); + // results.clear(); + // polyline = null; + // results = null; + // System.gc(); + } + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } } - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - } - // int n = 300_0000; - // - // int seed = 1; - // Random rand = new Random(seed); // TODO 注意seed在每轮里重设 - // Polyline polyline = new Polyline(); // 注意:如果是固定n变化e实验,这个数据要一次性生成,不然每次随机不一样 - // for (int i = 0; i < n; i++) { - // double v = rand.nextInt(1000000); - // polyline.addVertex(new Point(i, v)); // - // point的状态会在buildEffectiveArea里重置的,所以在下面的e遍历里可以复用这一份数据 - // } - // - // try (PrintWriter writer = new PrintWriter(new File("exp_varyE_n" + n + ".csv"))) { - // int[] eParamList = {0, 1, 2, 10, 1000, 10000, 10_0000, 30_0000, 50_0000, 60_0000, - // 80_0000, 100_0000, 150_0000, 200_0000}; - // for (int eParam : eParamList) { - // eParam *= 3; // TODO 注意 因为n=300w而不是100w - // - // long startTime = System.currentTimeMillis(); - // List<Point> results = buildEffectiveArea(polyline, eParam, false); - // long endTime = System.currentTimeMillis(); - // - // System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce - // points: " + (endTime - startTime) + "ms"); - // writer.println(n + "," + eParam + "," + (endTime - startTime)); - // System.out.println(results.size()); - // - // // clear 注意不能把原始数据清除,还要接着用 - //// System.gc(); - // } - // } catch (FileNotFoundException e) { - // throw new RuntimeException(e); - // } - - System.out.println("finish"); - } + // int n = 300_0000; + // + // int seed = 1; + // Random rand = new Random(seed); // TODO 注意seed在每轮里重设 + // Polyline polyline = new Polyline(); // 注意:如果是固定n变化e实验,这个数据要一次性生成,不然每次随机不一样 + // for (int i = 0; i < n; i++) { + // double v = rand.nextInt(1000000); + // polyline.addVertex(new Point(i, v)); // + // point的状态会在buildEffectiveArea里重置的,所以在下面的e遍历里可以复用这一份数据 + // } + // + // try (PrintWriter writer = new PrintWriter(new File("exp_varyE_n" + n + ".csv"))) { + // int[] eParamList = {0, 1, 2, 10, 1000, 10000, 10_0000, 30_0000, 50_0000, 60_0000, + // 80_0000, 100_0000, 150_0000, 200_0000}; + // for (int eParam : eParamList) { + // eParam *= 3; // TODO 注意 因为n=300w而不是100w + // + // long startTime = System.currentTimeMillis(); + // List<Point> results = buildEffectiveArea(polyline, eParam, false); + // long endTime = System.currentTimeMillis(); + // + // System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce + // points: " + (endTime - startTime) + "ms"); + // writer.println(n + "," + eParam + "," + (endTime - startTime)); + // System.out.println(results.size()); + // + // // clear 注意不能把原始数据清除,还要接着用 + //// System.gc(); + // } + // } catch (FileNotFoundException e) { + // throw new RuntimeException(e); + // } + + System.out.println("finish"); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_Build.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_Build.java index 94713f71ec8..fd8c6399c8f 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_Build.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_Build.java @@ -3,137 +3,137 @@ package org.apache.iotdb.db.query.eBUG; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.Iterator; import java.util.List; -import java.util.stream.Stream; import static org.apache.iotdb.db.query.eBUG.eBUG.buildEffectiveArea; public class eBUG_Build { - // 输入一条时间序列 t,v - // 输出按照bottom-up淘汰顺序排列的dominated significance,t,v。 - // 用于后期在线采样时选取倒数m个点(也就是DS最大的m个点,或者最晚淘汰的m个点)作为采样结果(选出之后要自行把这m个点重新按照时间戳x递增排列) - public static void main(String[] args) { - if (args.length < 5) { - System.out.println( - "Usage: Please provide arguments: inputFilePath,hasHeader,timeIdx,valueIdx,eParam,(outDir)"); - } - String input = args[0]; - boolean hasHeader = Boolean.parseBoolean(args[1]); - int timeIdx = Integer.parseInt(args[2]); - int valueIdx = Integer.parseInt(args[3]); - int eParam = Integer.parseInt(args[4]); - String outDir; - if (args.length > 5) { - outDir = args[5]; // 表示输出文件保存到指定的文件夹 - } else { - outDir = null; // 表示输出文件保存到input文件所在的文件夹 - } - String outputFile = generateOutputFileName(input, eParam, outDir); - - // 打印信息供用户确认 - System.out.println("Input file: " + input); - System.out.println("Has header: " + hasHeader); - System.out.println("Time index: " + timeIdx); - System.out.println("Value index: " + valueIdx); - System.out.println("eParam: " + eParam); - System.out.println("outDir: " + outDir); - System.out.println("Output file: " + outputFile); - - // 开始运算 - Polyline polyline = new Polyline(); - - // Using Files.lines() for memory-efficient line-by-line reading - try (Stream<String> lines = Files.lines(Paths.get(input))) { - Iterator<String> iterator = lines.iterator(); - int lineNumber = 0; - - // Skip the header line if necessary - if (hasHeader && iterator.hasNext()) { - iterator.next(); // Skip the header - } - - // Process each line - while (iterator.hasNext()) { - lineNumber++; - String line = iterator.next(); - String[] columns = line.split(","); - - if (columns.length > Math.max(timeIdx, valueIdx)) { - double time; - if (timeIdx < 0) { - time = lineNumber; - } else { - time = Double.parseDouble(columns[timeIdx]); - } - double value = Double.parseDouble(columns[valueIdx]); - polyline.addVertex(new Point(time, value)); - // System.out.println("Line " + lineNumber + " - Time: " + time + ", - // Value: " + value); + // 输入一条时间序列 t,v + // 输出按照bottom-up淘汰顺序排列的dominated significance,t,v。 + // 用于后期在线采样时选取倒数m个点(也就是DS最大的m个点,或者最晚淘汰的m个点)作为采样结果(选出之后要自行把这m个点重新按照时间戳x递增排列) + public static void main(String[] args) { + if (args.length < 6) { + System.out.println( + "Usage: Please provide arguments: inputFilePath,hasHeader,timeIdx,valueIdx,N,eParam,(outDir)"); + } + String input = args[0]; + boolean hasHeader = Boolean.parseBoolean(args[1]); + int timeIdx = Integer.parseInt(args[2]); + int valueIdx = Integer.parseInt(args[3]); + int N = Integer.parseInt(args[4]); // N<=0表示读全部行,N>0表示读最多N行 + int eParam = Integer.parseInt(args[5]); + String outDir; + if (args.length > 6) { + outDir = args[6]; // 表示输出文件保存到指定的文件夹 } else { - System.out.println("Line " + lineNumber + " is malformed (not enough columns)."); + outDir = null; // 表示输出文件保存到input文件所在的文件夹 + } + String outputFile = generateOutputFileName(input, eParam, outDir); + + // 打印信息供用户确认 + System.out.println("Input file: " + input); + System.out.println("Has header: " + hasHeader); + System.out.println("Time index: " + timeIdx); + System.out.println("Value index: " + valueIdx); + System.out.println("N: " + N); + System.out.println("eParam: " + eParam); + System.out.println("outDir: " + outDir); + System.out.println("Output file: " + outputFile); + + // 开始运算 + Polyline polyline = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); +// Polyline polyline = new Polyline(); +// +// // Using Files.lines() for memory-efficient line-by-line reading +// try (Stream<String> lines = Files.lines(Paths.get(input))) { +// Iterator<String> iterator = lines.iterator(); +// int lineNumber = 0; +// +// // Skip the header line if necessary +// if (hasHeader && iterator.hasNext()) { +// iterator.next(); // Skip the header +// } +// +// // Process each line +// while (iterator.hasNext()) { +// lineNumber++; +// String line = iterator.next(); +// String[] columns = line.split(","); +// +// if (columns.length > Math.max(timeIdx, valueIdx)) { +// double time; +// if (timeIdx < 0) { +// time = lineNumber; +// } else { +// time = Double.parseDouble(columns[timeIdx]); +// } +// double value = Double.parseDouble(columns[valueIdx]); +// polyline.addVertex(new Point(time, value)); +// // System.out.println("Line " + lineNumber + " - Time: " + time + ", +// // Value: " + value); +// } else { +// System.out.println("Line " + lineNumber + " is malformed (not enough columns)."); +// } +// } +// } catch (IOException e) { +// e.printStackTrace(); +// } + + long startTime = System.currentTimeMillis(); + List<Point> results = buildEffectiveArea(polyline, eParam, false); // precomputation mode + long endTime = System.currentTimeMillis(); + + // System.out.println("result point number: " + results.size()); // precomputation + // mode所以自然是等于n个点 + System.out.println( + "n=" + + polyline.getVertices().size() + + ", e=" + + eParam + + ", Time taken to reduce points: " + + (endTime - startTime) + + "ms"); + + // 输出结果到csv,按照z,x,y三列,因为results结果已经按照z(即DS)递增排序,对应bottom-up的淘汰顺序,越小代表越早被淘汰 + try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { + // 写入表头 + writer.write("z,x,y"); + writer.newLine(); + + // 写入数据行,按顺序 z, x, y + for (Point point : results) { + writer.write(point.z + "," + point.x + "," + point.y); + writer.newLine(); + } + + System.out.println("CSV file written successfully to " + outputFile); + } catch (IOException e) { + e.printStackTrace(); } - } - } catch (IOException e) { - e.printStackTrace(); - } - - long startTime = System.currentTimeMillis(); - List<Point> results = buildEffectiveArea(polyline, eParam, false); // precomputation mode - long endTime = System.currentTimeMillis(); - - // System.out.println("result point number: " + results.size()); // precomputation - // mode所以自然是等于n个点 - System.out.println( - "n=" - + polyline.getVertices().size() - + ", e=" - + eParam - + ", Time taken to reduce points: " - + (endTime - startTime) - + "ms"); - - // 输出结果到csv,按照z,x,y三列,因为results结果已经按照z(即DS)递增排序,对应bottom-up的淘汰顺序,越小代表越早被淘汰 - try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { - // 写入表头 - writer.write("z,x,y"); - writer.newLine(); - - // 写入数据行,按顺序 z, x, y - for (Point point : results) { - writer.write(point.z + "," + point.x + "," + point.y); - writer.newLine(); - } - - System.out.println("CSV file written successfully to " + outputFile); - } catch (IOException e) { - e.printStackTrace(); } - } - // Method to generate the output file name based on input path - private static String generateOutputFileName(String inputFilePath, int e, String outDir) { - // Get the file name without extension - Path path = Paths.get(inputFilePath); - String fileNameWithoutExtension = path.getFileName().toString(); - String name = fileNameWithoutExtension.substring(0, fileNameWithoutExtension.lastIndexOf('.')); + // Method to generate the output file name based on input path + private static String generateOutputFileName(String inputFilePath, int e, String outDir) { + // Get the file name without extension + Path path = Paths.get(inputFilePath); + String fileNameWithoutExtension = path.getFileName().toString(); + String name = fileNameWithoutExtension.substring(0, fileNameWithoutExtension.lastIndexOf('.')); - // Get the file extension - String extension = - path.getFileName().toString().substring(fileNameWithoutExtension.lastIndexOf('.')); + // Get the file extension + String extension = + path.getFileName().toString().substring(fileNameWithoutExtension.lastIndexOf('.')); - // Create output file path by appending '-ds' before the extension - String outputFile = name + "-ds-e" + e + extension; + // Create output file path by appending '-ds' before the extension + String outputFile = name + "-ds-e" + e + extension; - if (outDir == null) { // 表示使用input文件所在的文件夹 - // Handle path compatibility for different operating systems (Linux/Windows) - return path.getParent().resolve(outputFile).toString(); - } else { - Path out = Paths.get(outDir); - return out.resolve(outputFile).toString(); + if (outDir == null) { // 表示使用input文件所在的文件夹 + // Handle path compatibility for different operating systems (Linux/Windows) + return path.getParent().resolve(outputFile).toString(); + } else { + Path out = Paths.get(outDir); + return out.resolve(outputFile).toString(); + } } - } }
