This is an automated email from the ASF dual-hosted git repository. leirui pushed a commit to branch research/area-visualization in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit d545f8db5f54b4a53d7018e446127d1ed1e889c4 Author: Lei Rui <[email protected]> AuthorDate: Thu Jan 23 22:35:46 2025 +0800 eBUG_build --- server/pom.xml | 23 + .../java/org/apache/iotdb/db/query/eBUG/Point.java | 48 +- .../org/apache/iotdb/db/query/eBUG/Polyline.java | 53 +- .../java/org/apache/iotdb/db/query/eBUG/Test1.java | 126 ++-- .../java/org/apache/iotdb/db/query/eBUG/Test2.java | 86 +-- .../java/org/apache/iotdb/db/query/eBUG/Test3.java | 91 +++ .../java/org/apache/iotdb/db/query/eBUG/Tmp.java | 58 +- .../java/org/apache/iotdb/db/query/eBUG/Tool.java | 365 +++++------ .../org/apache/iotdb/db/query/eBUG/Triangle.java | 76 +-- .../java/org/apache/iotdb/db/query/eBUG/eBUG.java | 681 +++++++++++---------- .../org/apache/iotdb/db/query/eBUG/eBUG_Build.java | 139 +++++ 11 files changed, 1035 insertions(+), 711 deletions(-) diff --git a/server/pom.xml b/server/pom.xml index 3ca3b695285..51f9a56c69a 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -278,6 +278,29 @@ </execution> </executions> </plugin> + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <configuration> + <finalName>eBUG_Build</finalName> + <archive> + <manifest> + <mainClass>org.apache.iotdb.db.query.eBUG.eBUG_Build</mainClass> + </manifest> + </archive> + <descriptorRefs> + <descriptorRef>jar-with-dependencies</descriptorRef> + </descriptorRefs> + </configuration> + <executions> + <execution> + <id>make-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + </execution> + </executions> + </plugin> </plugins> </build> <profiles> diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java index 6e6fc3aa91e..e9bcf241f0f 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Point.java @@ -2,33 +2,33 @@ package org.apache.iotdb.db.query.eBUG; public class Point { - double x, y, z; + double x, y, z; - public Point prev; // 指向T'_max{0,k-e}里当前点的前一个点 for eBUG usage - public Point next; // 指向T'_max{0,k-e}里当前点的后一个点 for eBUG usage - // 注意Triangle里的prev&next用于最新状态下存在三角形之间的连接关系, - // 而这里Point的prev&next用于滞后e个淘汰点(也就是最近的e个淘汰点先不施加)状态下的未淘汰点之间的连接关系 + public Point prev; // 指向T'_max{0,k-e}里当前点的前一个点 for eBUG usage + public Point next; // 指向T'_max{0,k-e}里当前点的后一个点 for eBUG usage + // 注意Triangle里的prev&next用于最新状态下存在三角形之间的连接关系, + // 而这里Point的prev&next用于滞后e个淘汰点(也就是最近的e个淘汰点先不施加)状态下的未淘汰点之间的连接关系 - public Point(double x, double y) { - this.x = x; - this.y = y; - this.z = Double.POSITIVE_INFINITY; // effective area + public Point(double x, double y) { + this.x = x; + this.y = y; + this.z = Double.POSITIVE_INFINITY; // effective area - // for eBUG usage -// this.eliminated = false; - this.prev = null; - this.next = null; - } + // for eBUG usage + // this.eliminated = false; + this.prev = null; + this.next = null; + } - public void markEliminated() { - // to avoid traversing each point between pa to pb, - // instead only traversing at most e most recently eliminated points lagged - prev.next = next; - next.prev = prev; - } + public void markEliminated() { + // to avoid traversing each point between pa to pb, + // instead only traversing at most e most recently eliminated points lagged + prev.next = next; + next.prev = prev; + } - @Override - public String toString() { - return "Point: (" + x + ", " + y + ", " + z + ")"; - } + @Override + public String toString() { + return "Point: (" + x + ", " + y + ", " + z + ")"; + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java index 1ddfe2ebbe1..534c7a6f67e 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java @@ -5,31 +5,30 @@ import java.util.List; public class Polyline { - private List<Point> vertices = new ArrayList<>(); - - public void addVertex(Point point) { -// if (!vertices.isEmpty()) { //before adding this point -// vertices.get(vertices.size() - 1).next = point; -// point.prev = vertices.get(vertices.size() - 1); -// } - vertices.add(point); - } - - public List<Point> getVertices() { -// return new ArrayList<>(vertices); - return vertices; - } - - - public int size() { - return vertices.size(); - } - - public Point get(int index) { - return vertices.get(index); - } - - public void clear() { - vertices.clear(); - } + private List<Point> vertices = new ArrayList<>(); + + public void addVertex(Point point) { + // if (!vertices.isEmpty()) { //before adding this point + // vertices.get(vertices.size() - 1).next = point; + // point.prev = vertices.get(vertices.size() - 1); + // } + vertices.add(point); + } + + public List<Point> getVertices() { + // return new ArrayList<>(vertices); + return vertices; + } + + public int size() { + return vertices.size(); + } + + public Point get(int index) { + return vertices.get(index); + } + + public void clear() { + vertices.clear(); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java index 91df7751e50..385362b9e6b 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test1.java @@ -10,74 +10,82 @@ import java.util.Random; import static org.apache.iotdb.db.query.eBUG.eBUG.buildEffectiveArea; public class Test1 { - // 用于验证Java eBUG实现和python版本(e=0/1)结果的一致性 - public static void main(String[] args) { - Polyline polyline = new Polyline(); - List<Polyline> polylineList = new ArrayList<>(); - Random rand = new Random(10); - int n = 10000; - int eParam = 0; + // 用于验证Java eBUG实现和python版本(e=0/1)结果的一致性 + public static void main(String[] args) { + Polyline polyline = new Polyline(); + List<Polyline> polylineList = new ArrayList<>(); + Random rand = new Random(10); + int n = 10000; + int eParam = 1; - int p = 10; - for (int i = 0; i < n; i += p) { - Polyline polylineBatch = new Polyline(); - for (int j = i; j < Math.min(i + p, n); j++) { - double v = rand.nextInt(1000000); + int p = 10; + for (int i = 0; i < n; i += p) { + Polyline polylineBatch = new Polyline(); + for (int j = i; j < Math.min(i + p, n); j++) { + double v = rand.nextInt(1000000); - polyline.addVertex(new Point(j, v)); + polyline.addVertex(new Point(j, v)); - polylineBatch.addVertex(new Point(j, v)); - } - polylineList.add(polylineBatch); - } + polylineBatch.addVertex(new Point(j, v)); + } + polylineList.add(polylineBatch); + } - try (FileWriter writer = new FileWriter("raw.csv")) { - // 写入CSV头部 - writer.append("x,y,z\n"); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); - // 写入每个点的数据 - for (int i = 0; i < polyline.size(); i++) { - Point point = polyline.get(i); - writer.append(point.x + "," + point.y + "," + point.z + "\n"); - } - System.out.println("Data has been written"); - } catch (IOException e) { - System.out.println("Error writing to CSV file: " + e.getMessage()); - } + // 写入每个点的数据 + for (int i = 0; i < polyline.size(); i++) { + Point point = polyline.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println("Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } - System.out.println("---------------------------------"); - long startTime = System.currentTimeMillis(); - // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 - List<Point> results = buildEffectiveArea(polyline, eParam, false); - // 输出结果 - long endTime = System.currentTimeMillis(); - System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); - System.out.println(results.size()); + System.out.println("---------------------------------"); + long startTime = System.currentTimeMillis(); + // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 + List<Point> results = buildEffectiveArea(polyline, eParam, false); + // 输出结果 + long endTime = System.currentTimeMillis(); + System.out.println( + "n=" + + n + + ", e=" + + eParam + + ", Time taken to reduce points: " + + (endTime - startTime) + + "ms"); + System.out.println(results.size()); - // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 - // 按照时间戳递增排序整理: - results.sort(Comparator.comparingDouble(point -> point.x)); + // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 + // 按照时间戳递增排序整理: + System.out.println("make the bottom-up results sorted by time:"); + results.sort(Comparator.comparingDouble(point -> point.x)); - if (results.size() <= 100) { - System.out.println("+++++++++++++++++++"); - for (int i = 0; i < results.size(); i++) { - Point point = results.get(i); - System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); - } - } + if (results.size() <= 100) { + System.out.println("+++++++++++++++++++"); + for (int i = 0; i < results.size(); i++) { + Point point = results.get(i); + System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); + } + } - try (FileWriter writer = new FileWriter("fast.csv")) { - // 写入CSV头部 - writer.append("x,y,z\n"); + try (FileWriter writer = new FileWriter("fast.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); - // 写入每个点的数据 - for (int i = 0; i < results.size(); i++) { - Point point = results.get(i); - writer.append(point.x + "," + point.y + "," + point.z + "\n"); - } - System.out.println("Data has been written"); - } catch (IOException e) { - System.out.println("Error writing to CSV file: " + e.getMessage()); - } + // 写入每个点的数据 + for (int i = 0; i < results.size(); i++) { + Point point = results.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println("Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); } + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test2.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test2.java index fb7de582f17..6cb0accefaa 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test2.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test2.java @@ -9,44 +9,52 @@ import java.util.Random; import static org.apache.iotdb.db.query.eBUG.eBUG.buildEffectiveArea; public class Test2 { - // 用于测试在线采样 - public static void main(String[] args) { - int n = 20; - int eParam = 1; - - int m = 2; // >2代表在线采样,<=2代表预计算 -// for (int m = 9900; m <= n; m++) { - - Polyline polyline = new Polyline(); - Random rand = new Random(2); - for (int i = 0; i < n; i++) { - double v = rand.nextInt(1000000); - polyline.addVertex(new Point(i, v)); - } - - long startTime = System.currentTimeMillis(); - List<Point> results = buildEffectiveArea(polyline, eParam, false, m); - long endTime = System.currentTimeMillis(); - System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); - - System.out.println(results.size()); - if (m > 2 && m < n) { - Assert.isTrue(results.size() == m); - } else { - Assert.isTrue(results.size() == n); - } - if (results.size() <= 100) { - System.out.println("+++++++++++++++++++"); - for (Point point : results) { - System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); - } - // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 - // 按照时间戳递增排序整理: - results.sort(Comparator.comparingDouble(point -> point.x)); - System.out.println("+++++++++++++++++++"); - for (Point point : results) { - System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); - } - } + // 用于测试在线采样 + public static void main(String[] args) { + int n = 20; + int eParam = 1; + + int m = 2; // >2代表在线采样,<=2代表预计算 + // for (int m = 9900; m <= n; m++) { + + Polyline polyline = new Polyline(); + Random rand = new Random(2); + for (int i = 0; i < n; i++) { + double v = rand.nextInt(1000000); + polyline.addVertex(new Point(i, v)); } + + long startTime = System.currentTimeMillis(); + List<Point> results = buildEffectiveArea(polyline, eParam, false, m); + long endTime = System.currentTimeMillis(); + System.out.println( + "n=" + + n + + ", e=" + + eParam + + ", Time taken to reduce points: " + + (endTime - startTime) + + "ms"); + + System.out.println(results.size()); + if (m > 2 && m < n) { + Assert.isTrue(results.size() == m); + } else { + Assert.isTrue(results.size() == n); + } + if (results.size() <= 100) { + System.out.println("+++++++++++++++++++"); + for (Point point : results) { + System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); + } + // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 + // 按照时间戳递增排序整理: + System.out.println("make the bottom-up results sorted by time:"); + results.sort(Comparator.comparingDouble(point -> point.x)); + System.out.println("+++++++++++++++++++"); + for (Point point : results) { + System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); + } + } + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test3.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test3.java new file mode 100644 index 00000000000..d17ea8fcc65 --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test3.java @@ -0,0 +1,91 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Random; + +import static org.apache.iotdb.db.query.eBUG.eBUG.buildEffectiveArea; + +public class Test3 { + // 用于验证Java eBUG实现和python版本(e=0/1)结果的一致性 + public static void main(String[] args) { + Polyline polyline = new Polyline(); + List<Polyline> polylineList = new ArrayList<>(); + Random rand = new Random(10); + int n = 10000; + int eParam = 0; + + int p = 10; + for (int i = 0; i < n; i += p) { + Polyline polylineBatch = new Polyline(); + for (int j = i; j < Math.min(i + p, n); j++) { + double v = rand.nextInt(1000000); + + polyline.addVertex(new Point(j, v)); + + polylineBatch.addVertex(new Point(j, v)); + } + polylineList.add(polylineBatch); + } + + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (int i = 0; i < polyline.size(); i++) { + Point point = polyline.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println("Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + + System.out.println("---------------------------------"); + long startTime = System.currentTimeMillis(); + // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 + List<Point> results = buildEffectiveArea(polyline, eParam, false); + // 输出结果 + long endTime = System.currentTimeMillis(); + System.out.println( + "n=" + + n + + ", e=" + + eParam + + ", Time taken to reduce points: " + + (endTime - startTime) + + "ms"); + System.out.println(results.size()); + + // 注意现在返回的结果是按照Sig递增也就是bottom-up淘汰的顺序排列的,而不是按照时间戳递增排列 + // 按照时间戳递增排序整理: + System.out.println("make the bottom-up results sorted by time:"); + results.sort(Comparator.comparingDouble(point -> point.x)); + + if (results.size() <= 100) { + System.out.println("+++++++++++++++++++"); + for (int i = 0; i < results.size(); i++) { + Point point = results.get(i); + System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); + } + } + + try (FileWriter writer = new FileWriter("fast.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (int i = 0; i < results.size(); i++) { + Point point = results.get(i); + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println("Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tmp.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tmp.java index 9cfb8ac3d5f..270aca1dee7 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tmp.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tmp.java @@ -8,35 +8,37 @@ import java.util.PriorityQueue; import java.util.Random; public class Tmp { - // 用于debug为什么n>2kw的耗时增大明显 - public static void main(String[] args) { - int seed = 10; - Random rand = new Random(seed); - int eParam = 1; - try (PrintWriter writer = new PrintWriter(new File("tmp.csv"))) { - for (int n = 100_0000; n < 100000_0000; n += 5000_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 -// for (int n = 19000000; n < 3000_0000; n +=200_0000) { - PriorityQueue<Point> heap = new PriorityQueue<>(Comparator.comparingDouble(t -> t.y)); - for (int i = 0; i < n; i++) { - double v = rand.nextInt(1000000); - heap.add(new Point(i, v)); - } - - long startTime = System.currentTimeMillis(); - long sum = 0; - while (!heap.isEmpty()) { - Point p = heap.poll(); - sum += p.x; - } - long endTime = System.currentTimeMillis(); - System.out.println(n + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); - writer.println(n + "," + "0" + "," + (endTime - startTime)); - System.out.println(sum); - } - } catch (FileNotFoundException e) { - throw new RuntimeException(e); + // 用于debug为什么n>2kw的耗时增大明显 + public static void main(String[] args) { + int seed = 10; + Random rand = new Random(seed); + int eParam = 1; + try (PrintWriter writer = new PrintWriter(new File("tmp.csv"))) { + for (int n = 100_0000; + n < 100000_0000; + n += 5000_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 + // for (int n = 19000000; n < 3000_0000; n +=200_0000) { + PriorityQueue<Point> heap = new PriorityQueue<>(Comparator.comparingDouble(t -> t.y)); + for (int i = 0; i < n; i++) { + double v = rand.nextInt(1000000); + heap.add(new Point(i, v)); } - System.out.println("finish"); + long startTime = System.currentTimeMillis(); + long sum = 0; + while (!heap.isEmpty()) { + Point p = heap.poll(); + sum += p.x; + } + long endTime = System.currentTimeMillis(); + System.out.println(n + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); + writer.println(n + "," + "0" + "," + (endTime - startTime)); + System.out.println(sum); + } + } catch (FileNotFoundException e) { + throw new RuntimeException(e); } + + System.out.println("finish"); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java index f7c5bbfb668..86658e24385 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tool.java @@ -1,199 +1,204 @@ package org.apache.iotdb.db.query.eBUG; - import java.util.ArrayList; import java.util.Collections; import java.util.List; public class Tool { - // 计算三角形面积 - public static double triArea(Point d0, Point d1, Point d2) { - double dArea = ((d1.x - d0.x) * (d2.y - d0.y) - (d2.x - d0.x) * (d1.y - d0.y)) / 2.0; - return (dArea > 0.0) ? dArea : -dArea; // abs + // 计算三角形面积 + public static double triArea(Point d0, Point d1, Point d2) { + double dArea = ((d1.x - d0.x) * (d2.y - d0.y) - (d2.x - d0.x) * (d1.y - d0.y)) / 2.0; + return (dArea > 0.0) ? dArea : -dArea; // abs + } + + // 计算简单多边形(边不交叉)的面积(鞋带公式) + public static double calculatePolygonArea(List<Point> points) { + // points多边形顶点,要求按照逆时针或者顺时针的顺序枚举,否则鞋带公式无法给出正确结果 + int n = points.size(); + double area = 0; + for (int i = 0; i < n; i++) { + int next = (i + 1) % n; + area += points.get(i).x * points.get(next).y - points.get(next).x * points.get(i).y; } - - // 计算简单多边形(边不交叉)的面积(鞋带公式) - public static double calculatePolygonArea(List<Point> points) { - // points多边形顶点,要求按照逆时针或者顺时针的顺序枚举,否则鞋带公式无法给出正确结果 - int n = points.size(); - double area = 0; - for (int i = 0; i < n; i++) { - int next = (i + 1) % n; - area += points.get(i).x * points.get(next).y - points.get(next).x * points.get(i).y; - } - return Math.abs(area) / 2.0; + return Math.abs(area) / 2.0; + } + + // 计算两个向量的叉积 + public static double crossProduct(double x1, double y1, double x2, double y2) { + // >0: (x2,y2)在(x1,y1)的逆时针方向 + // <0: (x2,y2)在(x1,y1)的顺时针方向 + // =0: 平行或共线 + return x1 * y2 - y1 * x2; + } + + // 判断两条线段是否相交并计算交点 + // L1包含一个线段的两个端点,L2包含另一个线段的两个端点 + public static Object[] lineIntersection(Point[] L1, Point[] L2) { + double x1 = L1[0].x, y1 = L1[0].y, x2 = L1[1].x, y2 = L1[1].y; + double x3 = L2[0].x, y3 = L2[0].y, x4 = L2[1].x, y4 = L2[1].y; + + // 判断是否相交(叉积) + double d1 = crossProduct(x2 - x1, y2 - y1, x3 - x1, y3 - y1); + double d2 = crossProduct(x2 - x1, y2 - y1, x4 - x1, y4 - y1); + double d3 = crossProduct(x4 - x3, y4 - y3, x1 - x3, y1 - y3); + double d4 = crossProduct(x4 - x3, y4 - y3, x2 - x3, y2 - y3); + + // 如果叉积条件满足,表示有交点 + // d1*d2<0意味着P3、P4分别在L12的两边 + // d3*d4<0意味着P1、P2分别在L34的两边 + // 两个同时满足说明有交点 + if (d1 * d2 < 0 && d3 * d4 < 0) { + double denominator = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1); // 不可能为0(平行或共线),因为已经判断相交了 + double t1 = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / denominator; + double x = x1 + t1 * (x2 - x1); + double y = y1 + t1 * (y2 - y1); + return new Object[] {true, new Point(x, y)}; } - // 计算两个向量的叉积 - public static double crossProduct(double x1, double y1, double x2, double y2) { - // >0: (x2,y2)在(x1,y1)的逆时针方向 - // <0: (x2,y2)在(x1,y1)的顺时针方向 - // =0: 平行或共线 - return x1 * y2 - y1 * x2; + // 检查是否起点或终点重合 + if ((x1 == x3 && y1 == y3) || (x1 == x4 && y1 == y4)) { + return new Object[] {true, new Point(x1, y1)}; } - - // 判断两条线段是否相交并计算交点 - // L1包含一个线段的两个端点,L2包含另一个线段的两个端点 - public static Object[] lineIntersection(Point[] L1, Point[] L2) { - double x1 = L1[0].x, y1 = L1[0].y, x2 = L1[1].x, y2 = L1[1].y; - double x3 = L2[0].x, y3 = L2[0].y, x4 = L2[1].x, y4 = L2[1].y; - - // 判断是否相交(叉积) - double d1 = crossProduct(x2 - x1, y2 - y1, x3 - x1, y3 - y1); - double d2 = crossProduct(x2 - x1, y2 - y1, x4 - x1, y4 - y1); - double d3 = crossProduct(x4 - x3, y4 - y3, x1 - x3, y1 - y3); - double d4 = crossProduct(x4 - x3, y4 - y3, x2 - x3, y2 - y3); - - // 如果叉积条件满足,表示有交点 - // d1*d2<0意味着P3、P4分别在L12的两边 - // d3*d4<0意味着P1、P2分别在L34的两边 - // 两个同时满足说明有交点 - if (d1 * d2 < 0 && d3 * d4 < 0) { - double denominator = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1); // 不可能为0(平行或共线),因为已经判断相交了 - double t1 = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / denominator; - double x = x1 + t1 * (x2 - x1); - double y = y1 + t1 * (y2 - y1); - return new Object[]{true, new Point(x, y)}; - } - - // 检查是否起点或终点重合 - if ((x1 == x3 && y1 == y3) || (x1 == x4 && y1 == y4)) { - return new Object[]{true, new Point(x1, y1)}; - } - if ((x2 == x3 && y2 == y3) || (x2 == x4 && y2 == y4)) { - return new Object[]{true, new Point(x2, y2)}; - } - - return new Object[]{false, null}; + if ((x2 == x3 && y2 == y3) || (x2 == x4 && y2 == y4)) { + return new Object[] {true, new Point(x2, y2)}; } - // 计算总的多边形面积(通过时间序列扫描交点),默认要求点按照时间戳严格递增排列 - public static double total_areal_displacement(List<Point> points, List<Point> points2, boolean debug) { - double totalArea = 0; - int i = 0, j = 0; // i for points, j for points2 - Point prevIntersection = null; - int prevI = -1, prevJ = -1; - -// List<double[]> intersectionCoords = new ArrayList<>(); - List<Double> areaList = new ArrayList<>(); - - while (i < points.size() - 1 && j < points2.size() - 1) { - if (debug) { - System.out.println("--------- " + i + " " + j + " ------------"); - } - - // 当前线段 - Point[] L1 = {points.get(i), points.get(i + 1)}; - Point[] L2 = {points2.get(j), points2.get(j + 1)}; - - // 判断是否有交点 - Object[] result = lineIntersection(L1, L2); - boolean isIntersect = (boolean) result[0]; - Point intersection = (Point) result[1]; - - if (isIntersect) { -// intersectionCoords.add(intersection); - - if (prevIntersection != null) { - // 构造多边形点集 - List<Point> polygon = new ArrayList<>(); // 按照顺时针/逆时针几何连接顺序枚举多边形的顶点 - polygon.add(prevIntersection); - if (debug) { - System.out.println("- start intersection: " + prevIntersection); - } - polygon.addAll(points.subList(prevI, i + 1)); // 添加当前线段的点,左闭右开 -// polygon.addAll(Arrays.asList(Arrays.copyOfRange(points, prevI, i + 1))); // 添加当前线段的点,左闭右开 - if (debug) { - System.out.println("- one side: " + points.subList(prevI, i + 1)); - } - polygon.add(intersection); - if (debug) { - System.out.println("- end intersection: " + intersection); - } - List<Point> tempPoints2 = points2.subList(prevJ, j + 1); - Collections.reverse(tempPoints2); // 添加另一序列的点 - polygon.addAll(tempPoints2); - if (debug) { - System.out.println("- another side: " + tempPoints2); - } - -// double[][] polygonArray = new double[polygon.size()][2]; -// for (int k = 0; k < polygon.size(); k++) { -// polygonArray[k] = polygon.get(k); -// } - - // 计算多边形面积,注意polygon里的点一定是按照顺时针/逆时针几何连接顺序枚举的多边形顶点 - double area = calculatePolygonArea(polygon); - if (debug) { - System.out.println("Area = " + area); - } - totalArea += area; - areaList.add(area); - } - - prevIntersection = intersection; - prevI = i + 1; - prevJ = j + 1; - if (debug) { - System.out.println("This intersection = " + intersection - + ", next polygon: side1 = " + prevI + ", side2 = " + prevJ); - } - } - - - int currentI = i; // 临时存储i - int currentJ = j; // 临时存储j - if (points.get(currentI + 1).x <= points2.get(currentJ + 1).x) { - i += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 - } - if (points.get(currentI + 1).x >= points2.get(currentJ + 1).x) { - j += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 - } - } // end of while + return new Object[] {false, null}; + } + + // 计算总的多边形面积(通过时间序列扫描交点),默认要求点按照时间戳严格递增排列 + public static double total_areal_displacement( + List<Point> points, List<Point> points2, boolean debug) { + double totalArea = 0; + int i = 0, j = 0; // i for points, j for points2 + Point prevIntersection = null; + int prevI = -1, prevJ = -1; + + // List<double[]> intersectionCoords = new ArrayList<>(); + List<Double> areaList = new ArrayList<>(); + + while (i < points.size() - 1 && j < points2.size() - 1) { + if (debug) { + System.out.println("--------- " + i + " " + j + " ------------"); + } + + // 当前线段 + Point[] L1 = {points.get(i), points.get(i + 1)}; + Point[] L2 = {points2.get(j), points2.get(j + 1)}; + + // 判断是否有交点 + Object[] result = lineIntersection(L1, L2); + boolean isIntersect = (boolean) result[0]; + Point intersection = (Point) result[1]; + + if (isIntersect) { + // intersectionCoords.add(intersection); + + if (prevIntersection != null) { + // 构造多边形点集 + List<Point> polygon = new ArrayList<>(); // 按照顺时针/逆时针几何连接顺序枚举多边形的顶点 + polygon.add(prevIntersection); + if (debug) { + System.out.println("- start intersection: " + prevIntersection); + } + polygon.addAll(points.subList(prevI, i + 1)); // 添加当前线段的点,左闭右开 + // polygon.addAll(Arrays.asList(Arrays.copyOfRange(points, prevI, i + + // 1))); // 添加当前线段的点,左闭右开 + if (debug) { + System.out.println("- one side: " + points.subList(prevI, i + 1)); + } + polygon.add(intersection); + if (debug) { + System.out.println("- end intersection: " + intersection); + } + List<Point> tempPoints2 = points2.subList(prevJ, j + 1); + Collections.reverse(tempPoints2); // 添加另一序列的点 + polygon.addAll(tempPoints2); + if (debug) { + System.out.println("- another side: " + tempPoints2); + } + + // double[][] polygonArray = new double[polygon.size()][2]; + // for (int k = 0; k < polygon.size(); k++) { + // polygonArray[k] = polygon.get(k); + // } + + // 计算多边形面积,注意polygon里的点一定是按照顺时针/逆时针几何连接顺序枚举的多边形顶点 + double area = calculatePolygonArea(polygon); + if (debug) { + System.out.println("Area = " + area); + } + totalArea += area; + areaList.add(area); + } + prevIntersection = intersection; + prevI = i + 1; + prevJ = j + 1; if (debug) { - System.out.println(areaList); + System.out.println( + "This intersection = " + + intersection + + ", next polygon: side1 = " + + prevI + + ", side2 = " + + prevJ); } - - return totalArea; + } + + int currentI = i; // 临时存储i + int currentJ = j; // 临时存储j + if (points.get(currentI + 1).x <= points2.get(currentJ + 1).x) { + i += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 + } + if (points.get(currentI + 1).x >= points2.get(currentJ + 1).x) { + j += 1; // 基于时间戳严格递增的假设,Line不会回头或者垂直 + } + } // end of while + + if (debug) { + System.out.println(areaList); } - // 测试方法 - public static void main(String[] args) { - // 示例数据 - List<Point> points = new ArrayList<>(); - points.add(new Point(0, 0)); - points.add(new Point(1, 2)); - points.add(new Point(2, -20)); - points.add(new Point(3, 0)); - points.add(new Point(4, -1)); - points.add(new Point(5, -1.5)); - points.add(new Point(6, 0)); - - List<Point> points2 = new ArrayList<>(); -// points2.add(points.get(0)); -// points2.add(points.get(3)); -// points2.add(points.get(6)); - points2.add(new Point(1,-10)); - points2.add(new Point(3,0)); - - double area = total_areal_displacement(points, points2, true); - System.out.println("Total area: " + area); - -// points = new ArrayList<>(); -// points.add(new Point(0, 0)); -// points.add(new Point(1, 2)); -// points.add(new Point(1.5, -10)); -// double area = calculatePolygonArea(points); -// System.out.println(area); -// -// Point[] L1 = new Point[2]; -// L1[0] = new Point(1, 2); -// L1[1] = new Point(2, -2); -// Point[] L2 = new Point[2]; -// L2[0] = new Point(0, 0); -// L2[1] = new Point(3, 0); -// Object[] res = lineIntersection(L1, L2); -// System.out.println(res[1]); - } + return totalArea; + } + + // 测试方法 + public static void main(String[] args) { + // 示例数据 + List<Point> points = new ArrayList<>(); + points.add(new Point(0, 0)); + points.add(new Point(1, 2)); + points.add(new Point(2, -20)); + points.add(new Point(3, 0)); + points.add(new Point(4, -1)); + points.add(new Point(5, -1.5)); + points.add(new Point(6, 0)); + + List<Point> points2 = new ArrayList<>(); + // points2.add(points.get(0)); + // points2.add(points.get(3)); + // points2.add(points.get(6)); + points2.add(new Point(1, -10)); + points2.add(new Point(3, 0)); + + double area = total_areal_displacement(points, points2, true); + System.out.println("Total area: " + area); + + // points = new ArrayList<>(); + // points.add(new Point(0, 0)); + // points.add(new Point(1, 2)); + // points.add(new Point(1.5, -10)); + // double area = calculatePolygonArea(points); + // System.out.println(area); + // + // Point[] L1 = new Point[2]; + // L1[0] = new Point(1, 2); + // L1[1] = new Point(2, -2); + // Point[] L2 = new Point[2]; + // L2[0] = new Point(0, 0); + // L2[1] = new Point(3, 0); + // Object[] res = lineIntersection(L1, L2); + // System.out.println(res[1]); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Triangle.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Triangle.java index 79f7d440c99..b4c52ebfe3f 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Triangle.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Triangle.java @@ -4,46 +4,46 @@ package org.apache.iotdb.db.query.eBUG; // https://github.com/ofZach/Visvalingam-Whyatt/blob/master/src/testApp.cpp public class Triangle { - int[] indices = new int[3]; - double area; - Triangle prev; - Triangle next; - boolean isDeleted; - - public Triangle(int index1, int index2, int index3, double area) { - this.indices[0] = index1; - this.indices[1] = index2; - this.indices[2] = index3; - this.area = area; - this.isDeleted = false; // flag for removal. Avoid using heap.remove(x) as it is O(n) complexity + int[] indices = new int[3]; + double area; + Triangle prev; + Triangle next; + boolean isDeleted; + + public Triangle(int index1, int index2, int index3, double area) { + this.indices[0] = index1; + this.indices[1] = index2; + this.indices[2] = index3; + this.area = area; + this.isDeleted = false; // flag for removal. Avoid using heap.remove(x) as it is O(n) complexity + } + + public Triangle(Triangle oldTri) { + // deep copy and inherit connection + + this.indices[0] = oldTri.indices[0]; + this.indices[1] = oldTri.indices[1]; + this.indices[2] = oldTri.indices[2]; + this.area = oldTri.area; + this.prev = oldTri.prev; + this.next = oldTri.next; + + // TODO important! inherit connection relationship to this new point + if (this.prev != null) { // previous point to this new point + this.prev.next = this; } - - public Triangle(Triangle oldTri) { - // deep copy and inherit connection - - this.indices[0] = oldTri.indices[0]; - this.indices[1] = oldTri.indices[1]; - this.indices[2] = oldTri.indices[2]; - this.area = oldTri.area; - this.prev = oldTri.prev; - this.next = oldTri.next; - - // TODO important! inherit connection relationship to this new point - if (this.prev != null) { // previous point to this new point - this.prev.next = this; - } - if (this.next != null) { // next point to this new point - this.next.prev = this; - } - - this.isDeleted = false; // this new triangle is not deleted + if (this.next != null) { // next point to this new point + this.next.prev = this; } - public boolean isValid() { - return !isDeleted; - } + this.isDeleted = false; // this new triangle is not deleted + } - public void markDeleted() { - this.isDeleted = true; - } + public boolean isValid() { + return !isDeleted; + } + + public void markDeleted() { + this.isDeleted = true; + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java index e177b1dcf20..201f99d2313 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java @@ -27,341 +27,390 @@ import java.util.*; import static org.apache.iotdb.db.query.eBUG.Tool.total_areal_displacement; import static org.apache.iotdb.db.query.eBUG.Tool.triArea; - public class eBUG { - public static List<Point> findEliminated(Polyline lineToSimplify, int pa_idx, int pb_idx) { - // pa: left adjacent non-eliminated point of pi - // pb: right adjacent non-eliminated point of pi - // return a list of points, T'_max{0,k-e}[a:b] order by time in ascending order - - // 性质:最近淘汰的那一个点一定是位于pa~pb之间,因为正是这个点的淘汰才使得pi的significance要更新!至于其他全局更早淘汰的点则不一定位于a:b之间 - // pa,xxx,pi,xxx,pb - // when e=0,遍历点数就是3 - // when e>0,遍历点数大于等于4、小于等于e+3 - - List<Point> res = new ArrayList<>(); - Point start = lineToSimplify.get(pa_idx); - Point end = lineToSimplify.get(pb_idx); -// int cnt = 0; - while (start != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 - res.add(start); - start = start.next; // when e=0, only traversing three points pa pi pb -// cnt += 1; - } - res.add(end); -// cnt += 1; -// System.out.println(cnt); // 3<=cnt<=e+3 + public static List<Point> findEliminated(Polyline lineToSimplify, int pa_idx, int pb_idx) { + // pa: left adjacent non-eliminated point of pi + // pb: right adjacent non-eliminated point of pi + // return a list of points, T'_max{0,k-e}[a:b] order by time in ascending order + + // 性质:最近淘汰的那一个点一定是位于pa~pb之间,因为正是这个点的淘汰才使得pi的significance要更新!至于其他全局更早淘汰的点则不一定位于a:b之间 + // pa,xxx,pi,xxx,pb + // when e=0,遍历点数就是3 + // when e>0,遍历点数大于等于4、小于等于e+3 + + List<Point> res = new ArrayList<>(); + Point start = lineToSimplify.get(pa_idx); + Point end = lineToSimplify.get(pb_idx); + // int cnt = 0; + while (start + != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 + res.add(start); + start = start.next; // when e=0, only traversing three points pa pi pb + // cnt += 1; + } + res.add(end); + // cnt += 1; + // System.out.println(cnt); // 3<=cnt<=e+3 + + return res; + } + + public static List<Point> buildEffectiveArea(Polyline lineToSimplify, int e, boolean debug) { + // precomputation mode + return buildEffectiveArea(lineToSimplify, e, debug, 0); + } + + public static List<Point> buildEffectiveArea( + Polyline lineToSimplify, int e, boolean debug, int m) { + if (e < 0) { + throw new IllegalArgumentException("Parameter e must be non-negative."); + } - return res; + if (m > 2) { + System.out.println( + "online sampling mode, " + + "returning " + + m + + " sampled points sorted by time in ascending order"); + } else { + System.out.println( + "offline precomputation mode, " + + "returning each point sorted by dominated significance (DS) in ascending order"); } - public static List<Point> buildEffectiveArea(Polyline lineToSimplify, int e, boolean debug) { - // precomputation mode - return buildEffectiveArea(lineToSimplify, e, debug, 0); + // List<Point> results = lineToSimplify.getVertices(); // 浅复制 + // TODO 预计算结果改成按照bottom-up逐点淘汰顺序(DS递增)排列而不是按照时间戳,这样省去在线时对DS排序的过程 + // add的是Point引用,所以没有多用一倍的空间 + List<Point> resultsBottomUpEliminated = new ArrayList<>(); + + // 存储的是点的引用,这样可以修改原来序列里点的淘汰状态 + // 存储的是距离当前最新状态的滞后的尚未施加、待施加的e个淘汰点 + LinkedList<Point> laggedEliminatedPoints = new LinkedList<>(); + + int total = lineToSimplify.size(); + if (total < 3) { + return lineToSimplify.getVertices(); // 不足 3 个点无法形成三角形 } - public static List<Point> buildEffectiveArea(Polyline lineToSimplify, int e, boolean debug, int m) { - if (e < 0) { - throw new IllegalArgumentException("Parameter e must be non-negative."); - } + int nTriangles = total - 2; + Triangle[] triangles = new Triangle[nTriangles]; + + // 创建所有三角形并计算初始面积 + lineToSimplify.get(0).prev = null; + lineToSimplify.get(0).next = lineToSimplify.get(1); + lineToSimplify.get(total - 1).next = null; + lineToSimplify.get(total - 1).prev = lineToSimplify.get(total - 2); + for (int i = 1; i < total - 1; i++) { + int index1 = i - 1, index2 = i, index3 = i + 1; + double area = + triArea( + lineToSimplify.get(index1), lineToSimplify.get(index2), lineToSimplify.get(index3)); + triangles[i - 1] = new Triangle(index1, index2, index3, area); + + // 初始化点的状态 for eBUG usage + lineToSimplify.get(i).prev = lineToSimplify.get(i - 1); + lineToSimplify.get(i).next = lineToSimplify.get(i + 1); + } - if (m > 2) { - System.out.println("online sampling mode, " + - "returning " + m + " sampled points sorted by time in ascending order"); - } else { - System.out.println("offline precomputation mode, " + - "returning each point sorted by dominated significance (DS) in ascending order"); + // 设置三角形的前后关系 + for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 + triangles[i].prev = (i == 0 ? null : triangles[i - 1]); + triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); + } + + // 使用优先队列构建 minHeap + PriorityQueue<Triangle> triangleHeap = + new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); + Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? + + double previousEA = -1; + // while (!triangleHeap.isEmpty()) { // TODO 注意triangleHeap里装的是non-terminal point对应的三角形 + // TODO 在线采样m个点,也就是说最后留下m-2个non-terminal point + // 注意:triangleHeap里装的包括了标记删除的点!所以triangleHeap.size()不是真正的留下的未被淘汰点数! + // 因此需要用fakeCnt来统计heap里的非真实点数,这样triangleHeap.size()-fakeCnt就是真正的留下的未被淘汰点数 + int remainNonTerminalPoint = Math.max(0, m - 2); + int fakeCnt = 0; + while (triangleHeap.size() - fakeCnt > remainNonTerminalPoint) { + // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 + // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 + Triangle tri = triangleHeap.poll(); // O(logn) + + // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity + // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 + if (tri.isDeleted) { + fakeCnt--; // 取出了一个被标记删除点 + if (debug) { + System.out.println( + ">>>bottom-up, remaining " + + triangleHeap.size() + + " triangles (including those marked for removal)"); + } + continue; + } + + // 真正的淘汰点 + // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 + if (debug) { + System.out.println("(1) eliminate " + lineToSimplify.get(tri.indices[1])); + } + if (e > 0) { + if (laggedEliminatedPoints.size() == e) { + // 已经有e个滞后淘汰点了,为了把最近淘汰点加入,得先把最老的淘汰点施加上去 + Point removedPoint = laggedEliminatedPoints.removeFirst(); // 取出最早加入的淘汰点 复杂度1 + removedPoint.markEliminated(); // 注意是引用,所以改的内容后面可以看到 + } + laggedEliminatedPoints.addLast(lineToSimplify.get(tri.indices[1])); // 加入最新的淘汰点,滞后在这里先不施加 + } else { // e=0 没有滞后 立即标记删除 T'_k + lineToSimplify.get(tri.indices[1]).markEliminated(); + } + if (debug) { + System.out.println( + "the e most recently eliminated points (lagged):" + laggedEliminatedPoints); + } + + // 更新当前点的重要性(z 轴存储effective area,这是一个单调增的指标) + if (tri.area > previousEA) { + previousEA = tri.area; + } + // results.get(tri.indices[1]).z = previousEA; // dominated significance + lineToSimplify.get(tri.indices[1]).z = previousEA; // dominated significance + resultsBottomUpEliminated.add( + lineToSimplify.get(tri.indices[1])); // TODO add的是Point引用,所以没有多用一倍的空间 + if (debug) { + System.out.println(Arrays.toString(tri.indices) + ", Dominated Sig=" + previousEA); + } + + // 更新相邻三角形 + if (tri.prev != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 + + // 1. 处理旧的tri.prev被标记删除的事情(角色替代) + // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) complexity! + tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 + + Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection + // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) + + // 2. 处理pi被淘汰引起tri.prev被更新的事情 + // 前一个三角形连到后一个三角形 + tri.prev.next = + tri.next; // ATTENTION!!!: 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! + tri.prev.indices[2] = tri.indices[2]; + + // e parameter + List<Point> pointsForSig = + findEliminated(lineToSimplify, tri.prev.indices[0], tri.prev.indices[2]); + if (debug) { + System.out.println( + "(2) update point on the left " + lineToSimplify.get(tri.prev.indices[1])); + System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); + for (Point point : pointsForSig) { + System.out.println("\t" + point); + } + } + List<Point> baseLine = new ArrayList<>(); + baseLine.add(pointsForSig.get(0)); + baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 + double sig = total_areal_displacement(pointsForSig, baseLine, false); + tri.prev.area = sig; + + if (debug) { + System.out.println("sig=" + sig); + double tmpTri = + triArea( + lineToSimplify.get(tri.prev.indices[0]), + lineToSimplify.get(tri.prev.indices[1]), + lineToSimplify.get(tri.prev.indices[2])); + System.out.println( + "\t" + + "tri=" + + tmpTri + + ", " + + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); } -// List<Point> results = lineToSimplify.getVertices(); // 浅复制 - // TODO 预计算结果改成按照bottom-up逐点淘汰顺序(DS递增)排列而不是按照时间戳,这样省去在线时对DS排序的过程 - // add的是Point引用,所以没有多用一倍的空间 - List<Point> resultsBottomUpEliminated = new ArrayList<>(); + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add来显式重新插入修改后的元素 + triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } - // 存储的是点的引用,这样可以修改原来序列里点的淘汰状态 - // 存储的是距离当前最新状态的滞后的尚未施加、待施加的e个淘汰点 - LinkedList<Point> laggedEliminatedPoints = new LinkedList<>(); + if (tri.next != null) { + // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 - int total = lineToSimplify.size(); - if (total < 3) { - return lineToSimplify.getVertices(); // 不足 3 个点无法形成三角形 - } + // 1. 处理旧的tri.next被标记删除的事情(角色替代) + // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) complexity + tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 - int nTriangles = total - 2; - Triangle[] triangles = new Triangle[nTriangles]; - - // 创建所有三角形并计算初始面积 - lineToSimplify.get(0).prev = null; - lineToSimplify.get(0).next = lineToSimplify.get(1); - lineToSimplify.get(total - 1).next = null; - lineToSimplify.get(total - 1).prev = lineToSimplify.get(total - 2); - for (int i = 1; i < total - 1; i++) { - int index1 = i - 1, index2 = i, index3 = i + 1; - double area = triArea(lineToSimplify.get(index1), lineToSimplify.get(index2), lineToSimplify.get(index3)); - triangles[i - 1] = new Triangle(index1, index2, index3, area); - - // 初始化点的状态 for eBUG usage - lineToSimplify.get(i).prev = lineToSimplify.get(i - 1); - lineToSimplify.get(i).next = lineToSimplify.get(i + 1); - } + Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection + // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) - // 设置三角形的前后关系 - for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以 - triangles[i].prev = (i == 0 ? null : triangles[i - 1]); - triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 1]); + if (tri.prev != null) { + tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! } - // 使用优先队列构建 minHeap - PriorityQueue<Triangle> triangleHeap = new PriorityQueue<>(Comparator.comparingDouble(t -> t.area)); - Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) or O(nlogn)? - - double previousEA = -1; -// while (!triangleHeap.isEmpty()) { // TODO 注意triangleHeap里装的是non-terminal point对应的三角形 - // TODO 在线采样m个点,也就是说最后留下m-2个non-terminal point - // 注意:triangleHeap里装的包括了标记删除的点!所以triangleHeap.size()不是真正的留下的未被淘汰点数! - // 因此需要用fakeCnt来统计heap里的非真实点数,这样triangleHeap.size()-fakeCnt就是真正的留下的未被淘汰点数 - int remainNonTerminalPoint = Math.max(0, m - 2); - int fakeCnt = 0; - while (triangleHeap.size() - fakeCnt > remainNonTerminalPoint) { - // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作 - // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小 - Triangle tri = triangleHeap.poll(); // O(logn) - - // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) complexity - // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上 - if (tri.isDeleted) { - fakeCnt--; // 取出了一个被标记删除点 - if (debug) { - System.out.println(">>>bottom-up, remaining " + triangleHeap.size() + " triangles (including those marked for removal)"); - } - continue; - } - - // 真正的淘汰点 - // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰 - if (debug) { - System.out.println("(1) eliminate " + lineToSimplify.get(tri.indices[1])); - } - if (e > 0) { - if (laggedEliminatedPoints.size() == e) { - // 已经有e个滞后淘汰点了,为了把最近淘汰点加入,得先把最老的淘汰点施加上去 - Point removedPoint = laggedEliminatedPoints.removeFirst(); // 取出最早加入的淘汰点 复杂度1 - removedPoint.markEliminated(); // 注意是引用,所以改的内容后面可以看到 - } - laggedEliminatedPoints.addLast(lineToSimplify.get(tri.indices[1])); // 加入最新的淘汰点,滞后在这里先不施加 - } else { // e=0 没有滞后 立即标记删除 T'_k - lineToSimplify.get(tri.indices[1]).markEliminated(); - } - if (debug) { - System.out.println("the e most recently eliminated points (lagged):" + laggedEliminatedPoints); - } - - // 更新当前点的重要性(z 轴存储effective area,这是一个单调增的指标) - if (tri.area > previousEA) { - previousEA = tri.area; - } -// results.get(tri.indices[1]).z = previousEA; // dominated significance - lineToSimplify.get(tri.indices[1]).z = previousEA; // dominated significance - resultsBottomUpEliminated.add(lineToSimplify.get(tri.indices[1])); // TODO add的是Point引用,所以没有多用一倍的空间 - if (debug) { - System.out.println(Arrays.toString(tri.indices) + ", Dominated Sig=" + previousEA); - } - - // 更新相邻三角形 - if (tri.prev != null) { - // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 - - // 1. 处理旧的tri.prev被标记删除的事情(角色替代) - // triangleHeap.remove(tri.prev); // Avoid using heap.remove(x) as it is O(n) complexity! - tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 - - Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection - // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) - - // 2. 处理pi被淘汰引起tri.prev被更新的事情 - // 前一个三角形连到后一个三角形 - tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值! - tri.prev.indices[2] = tri.indices[2]; - - // e parameter - List<Point> pointsForSig = findEliminated(lineToSimplify, tri.prev.indices[0], tri.prev.indices[2]); - if (debug) { - System.out.println("(2) update point on the left " + lineToSimplify.get(tri.prev.indices[1])); - System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); - for (Point point : pointsForSig) { - System.out.println("\t" + point); - } - } - List<Point> baseLine = new ArrayList<>(); - baseLine.add(pointsForSig.get(0)); - baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 - double sig = total_areal_displacement(pointsForSig, baseLine, false); - tri.prev.area = sig; - - if (debug) { - System.out.println("sig=" + sig); - double tmpTri = triArea(lineToSimplify.get(tri.prev.indices[0]), - lineToSimplify.get(tri.prev.indices[1]), - lineToSimplify.get(tri.prev.indices[2])); - System.out.println("\t" + "tri=" + tmpTri + ", " + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); - } - - // 重新加入堆 - // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 - // 所以必须通过add来显式重新插入修改后的元素 - triangleHeap.add(tri.prev); // O(logn) 注意加入的是一个新的对象isDeleted=false - fakeCnt++; // 表示heap里多了一个被标记删除的假点 - } - - if (tri.next != null) { - // 标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序 - - // 1. 处理旧的tri.next被标记删除的事情(角色替代) - // triangleHeap.remove(tri.next); // Avoid using heap.remove(x) as it is O(n) complexity - tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 - - Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection - // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) - - if (tri.prev != null) { - tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! - } - - // 2. 处理pi被淘汰引起tri.next被更新的事情 - tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 - tri.next.indices[0] = tri.indices[0]; - - // e parameter - List<Point> pointsForSig = findEliminated(lineToSimplify, tri.next.indices[0], tri.next.indices[2]); - if (debug) { - System.out.println("(2) updating point on the right " + lineToSimplify.get(tri.next.indices[1])); - System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); - for (Point point : pointsForSig) { - System.out.println("\t" + point); - } - } - List<Point> baseLine = new ArrayList<>(); - baseLine.add(pointsForSig.get(0)); - baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 - double sig = total_areal_displacement(pointsForSig, baseLine, false); - tri.next.area = sig; - - if (debug) { - System.out.println("sig=" + sig); - double tmpTri = triArea(lineToSimplify.get(tri.next.indices[0]), - lineToSimplify.get(tri.next.indices[1]), - lineToSimplify.get(tri.next.indices[2])); - System.out.println("\t" + "tri=" + tmpTri + ", " + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); - } - - // 重新加入堆 - // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 - // 所以必须通过add 来显式重新插入修改后的元素 - triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false - fakeCnt++; // 表示heap里多了一个被标记删除的假点 - } - if (debug) { - System.out.println(">>>bottom-up, remaining " + triangleHeap.size() + " triangles (including those marked for removal)"); - } + // 2. 处理pi被淘汰引起tri.next被更新的事情 + tri.next.prev = tri.prev; // 注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断 + tri.next.indices[0] = tri.indices[0]; + + // e parameter + List<Point> pointsForSig = + findEliminated(lineToSimplify, tri.next.indices[0], tri.next.indices[2]); + if (debug) { + System.out.println( + "(2) updating point on the right " + lineToSimplify.get(tri.next.indices[1])); + System.out.println("3<=cnt<=e+3. cnt=" + pointsForSig.size()); + for (Point point : pointsForSig) { + System.out.println("\t" + point); + } } - - if (m > 2) { // online sampling mode - // 把滞后的淘汰点施加上去,然后返回在线采样结果(也就是返回剩余未被淘汰的点) - for (Point p : laggedEliminatedPoints) { - p.markEliminated(); - if (debug) { - System.out.println("apply lagged elimination of " + p); - } - } - List<Point> onlineSampled = new ArrayList<>(); - Point start = lineToSimplify.get(0); - Point end = lineToSimplify.get(lineToSimplify.size() - 1); - while (start != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 - onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity - start = start.next; // when e=0, only traversing three points pa pi pb - } - onlineSampled.add(end); - return onlineSampled; - } else { // offline precomputation mode, for precomputing the dominated significance of each point -// return results; // 注意这就是lineToSimplify.getVertices() - // TODO - resultsBottomUpEliminated.add(lineToSimplify.get(0)); // 全局首点 - resultsBottomUpEliminated.add(lineToSimplify.get(lineToSimplify.size() - 1)); // 全局尾点 - return resultsBottomUpEliminated; + List<Point> baseLine = new ArrayList<>(); + baseLine.add(pointsForSig.get(0)); + baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 直接连接左右两边最近的未被淘汰的点 + double sig = total_areal_displacement(pointsForSig, baseLine, false); + tri.next.area = sig; + + if (debug) { + System.out.println("sig=" + sig); + double tmpTri = + triArea( + lineToSimplify.get(tri.next.indices[0]), + lineToSimplify.get(tri.next.indices[1]), + lineToSimplify.get(tri.next.indices[2])); + System.out.println( + "\t" + + "tri=" + + tmpTri + + ", " + + ((tmpTri > sig) ? "over-estimated" : "equal/less-estimated")); } + + // 重新加入堆 + // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序 + // 所以必须通过add 来显式重新插入修改后的元素 + triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false + fakeCnt++; // 表示heap里多了一个被标记删除的假点 + } + if (debug) { + System.out.println( + ">>>bottom-up, remaining " + + triangleHeap.size() + + " triangles (including those marked for removal)"); + } } - public static void main(String[] args) { - -// int eParam = 0; -// int[] eParamList = {0, 0, 1, 2, 3, 4, 5, 10, 14, 15, 16, 20, 30, 40, 50, 100, 200, 500, 1000, 2000, 5000, -// 10000, 50000, 10_0000, 50_0000, 100_0000, 200_0000, 300_0000, -// 500_0000, 800_0000, 1000_0000, -// 1500_0000, 2000_0000, 2500_0000, 3000_0000}; - int[] eParamList = {100_0000}; - for (int eParam : eParamList) { - try (PrintWriter writer = new PrintWriter(new File("exp_varyN_e" + eParam + ".csv"))) { - for (int n = 100_0000; n < 2000_0000; n += 200_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 - // TODO 注意 要有一个点是n=300w - - Polyline polyline = new Polyline(); - int seed = 1; - Random rand = new Random(seed); // TODO 注意seed在每轮里重设 - for (int i = 0; i < n; i++) { - double v = rand.nextInt(1000000); - polyline.addVertex(new Point(i, v)); - } - - long startTime = System.currentTimeMillis(); - List<Point> results = buildEffectiveArea(polyline, eParam, false, 0); - long endTime = System.currentTimeMillis(); - - System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); - writer.println(n + "," + eParam + "," + (endTime - startTime)); - System.out.println(results.size()); - -// // clear -// polyline.clear(); -// results.clear(); -// polyline = null; -// results = null; -// System.gc(); - } - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } + if (m > 2) { // online sampling mode + // 把滞后的淘汰点施加上去,然后返回在线采样结果(也就是返回剩余未被淘汰的点) + for (Point p : laggedEliminatedPoints) { + p.markEliminated(); + if (debug) { + System.out.println("apply lagged elimination of " + p); } - -// int n = 300_0000; -// -// int seed = 1; -// Random rand = new Random(seed); // TODO 注意seed在每轮里重设 -// Polyline polyline = new Polyline(); // 注意:如果是固定n变化e实验,这个数据要一次性生成,不然每次随机不一样 -// for (int i = 0; i < n; i++) { -// double v = rand.nextInt(1000000); -// polyline.addVertex(new Point(i, v)); // point的状态会在buildEffectiveArea里重置的,所以在下面的e遍历里可以复用这一份数据 -// } -// -// try (PrintWriter writer = new PrintWriter(new File("exp_varyE_n" + n + ".csv"))) { -// int[] eParamList = {0, 1, 2, 10, 1000, 10000, 10_0000, 30_0000, 50_0000, 60_0000, 80_0000, 100_0000, 150_0000, 200_0000}; -// for (int eParam : eParamList) { -// eParam *= 3; // TODO 注意 因为n=300w而不是100w -// -// long startTime = System.currentTimeMillis(); -// List<Point> results = buildEffectiveArea(polyline, eParam, false); -// long endTime = System.currentTimeMillis(); -// -// System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); -// writer.println(n + "," + eParam + "," + (endTime - startTime)); -// System.out.println(results.size()); -// -// // clear 注意不能把原始数据清除,还要接着用 -//// System.gc(); -// } -// } catch (FileNotFoundException e) { -// throw new RuntimeException(e); -// } - - System.out.println("finish"); + } + List<Point> onlineSampled = new ArrayList<>(); + Point start = lineToSimplify.get(0); + Point end = lineToSimplify.get(lineToSimplify.size() - 1); + while (start + != end) { // Point类里增加prev&next指针,这样T'_max{0,k-e}里点的连接关系就有了,这样从Pa开始沿着指针,遍历点数一定不超过e+3 + onlineSampled.add(start); // 注意未淘汰的点的Dominated significance尚未赋值,还都是infinity + start = start.next; // when e=0, only traversing three points pa pi pb + } + onlineSampled.add(end); + return onlineSampled; + } else { // offline precomputation mode, for precomputing the dominated significance of each + // point + // return results; // 注意这就是lineToSimplify.getVertices() + // TODO + resultsBottomUpEliminated.add(lineToSimplify.get(0)); // 全局首点 + resultsBottomUpEliminated.add(lineToSimplify.get(lineToSimplify.size() - 1)); // 全局尾点 + return resultsBottomUpEliminated; } + } + + public static void main(String[] args) { + // 实验:预计算耗时关于两个参数e、n的变化规律,是否符合复杂度理论建模 + + // int eParam = 0; + // int[] eParamList = {0, 0, 1, 2, 3, 4, 5, 10, 14, 15, 16, 20, 30, 40, 50, 100, 200, + // 500, 1000, 2000, 5000, + // 10000, 50000, 10_0000, 50_0000, 100_0000, 200_0000, 300_0000, + // 500_0000, 800_0000, 1000_0000, + // 1500_0000, 2000_0000, 2500_0000, 3000_0000}; + int[] eParamList = {100_0000}; + for (int eParam : eParamList) { + try (PrintWriter writer = new PrintWriter(new File("exp_varyN_e" + eParam + ".csv"))) { + for (int n = 100_0000; n < 2000_0000; n += 200_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 + // TODO 注意 要有一个点是n=300w + + Polyline polyline = new Polyline(); + int seed = 1; + Random rand = new Random(seed); // TODO 注意seed在每轮里重设 + for (int i = 0; i < n; i++) { + double v = rand.nextInt(1000000); + polyline.addVertex(new Point(i, v)); + } + + long startTime = System.currentTimeMillis(); + List<Point> results = buildEffectiveArea(polyline, eParam, false, 0); + long endTime = System.currentTimeMillis(); + + System.out.println( + "n=" + + n + + ", e=" + + eParam + + ", Time taken to reduce points: " + + (endTime - startTime) + + "ms"); + writer.println(n + "," + eParam + "," + (endTime - startTime)); + System.out.println(results.size()); + + // // clear + // polyline.clear(); + // results.clear(); + // polyline = null; + // results = null; + // System.gc(); + } + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } + } + + // int n = 300_0000; + // + // int seed = 1; + // Random rand = new Random(seed); // TODO 注意seed在每轮里重设 + // Polyline polyline = new Polyline(); // 注意:如果是固定n变化e实验,这个数据要一次性生成,不然每次随机不一样 + // for (int i = 0; i < n; i++) { + // double v = rand.nextInt(1000000); + // polyline.addVertex(new Point(i, v)); // + // point的状态会在buildEffectiveArea里重置的,所以在下面的e遍历里可以复用这一份数据 + // } + // + // try (PrintWriter writer = new PrintWriter(new File("exp_varyE_n" + n + ".csv"))) { + // int[] eParamList = {0, 1, 2, 10, 1000, 10000, 10_0000, 30_0000, 50_0000, 60_0000, + // 80_0000, 100_0000, 150_0000, 200_0000}; + // for (int eParam : eParamList) { + // eParam *= 3; // TODO 注意 因为n=300w而不是100w + // + // long startTime = System.currentTimeMillis(); + // List<Point> results = buildEffectiveArea(polyline, eParam, false); + // long endTime = System.currentTimeMillis(); + // + // System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce + // points: " + (endTime - startTime) + "ms"); + // writer.println(n + "," + eParam + "," + (endTime - startTime)); + // System.out.println(results.size()); + // + // // clear 注意不能把原始数据清除,还要接着用 + //// System.gc(); + // } + // } catch (FileNotFoundException e) { + // throw new RuntimeException(e); + // } + + System.out.println("finish"); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_Build.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_Build.java new file mode 100644 index 00000000000..94713f71ec8 --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_Build.java @@ -0,0 +1,139 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Stream; + +import static org.apache.iotdb.db.query.eBUG.eBUG.buildEffectiveArea; + +public class eBUG_Build { + // 输入一条时间序列 t,v + // 输出按照bottom-up淘汰顺序排列的dominated significance,t,v。 + // 用于后期在线采样时选取倒数m个点(也就是DS最大的m个点,或者最晚淘汰的m个点)作为采样结果(选出之后要自行把这m个点重新按照时间戳x递增排列) + public static void main(String[] args) { + if (args.length < 5) { + System.out.println( + "Usage: Please provide arguments: inputFilePath,hasHeader,timeIdx,valueIdx,eParam,(outDir)"); + } + String input = args[0]; + boolean hasHeader = Boolean.parseBoolean(args[1]); + int timeIdx = Integer.parseInt(args[2]); + int valueIdx = Integer.parseInt(args[3]); + int eParam = Integer.parseInt(args[4]); + String outDir; + if (args.length > 5) { + outDir = args[5]; // 表示输出文件保存到指定的文件夹 + } else { + outDir = null; // 表示输出文件保存到input文件所在的文件夹 + } + String outputFile = generateOutputFileName(input, eParam, outDir); + + // 打印信息供用户确认 + System.out.println("Input file: " + input); + System.out.println("Has header: " + hasHeader); + System.out.println("Time index: " + timeIdx); + System.out.println("Value index: " + valueIdx); + System.out.println("eParam: " + eParam); + System.out.println("outDir: " + outDir); + System.out.println("Output file: " + outputFile); + + // 开始运算 + Polyline polyline = new Polyline(); + + // Using Files.lines() for memory-efficient line-by-line reading + try (Stream<String> lines = Files.lines(Paths.get(input))) { + Iterator<String> iterator = lines.iterator(); + int lineNumber = 0; + + // Skip the header line if necessary + if (hasHeader && iterator.hasNext()) { + iterator.next(); // Skip the header + } + + // Process each line + while (iterator.hasNext()) { + lineNumber++; + String line = iterator.next(); + String[] columns = line.split(","); + + if (columns.length > Math.max(timeIdx, valueIdx)) { + double time; + if (timeIdx < 0) { + time = lineNumber; + } else { + time = Double.parseDouble(columns[timeIdx]); + } + double value = Double.parseDouble(columns[valueIdx]); + polyline.addVertex(new Point(time, value)); + // System.out.println("Line " + lineNumber + " - Time: " + time + ", + // Value: " + value); + } else { + System.out.println("Line " + lineNumber + " is malformed (not enough columns)."); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + + long startTime = System.currentTimeMillis(); + List<Point> results = buildEffectiveArea(polyline, eParam, false); // precomputation mode + long endTime = System.currentTimeMillis(); + + // System.out.println("result point number: " + results.size()); // precomputation + // mode所以自然是等于n个点 + System.out.println( + "n=" + + polyline.getVertices().size() + + ", e=" + + eParam + + ", Time taken to reduce points: " + + (endTime - startTime) + + "ms"); + + // 输出结果到csv,按照z,x,y三列,因为results结果已经按照z(即DS)递增排序,对应bottom-up的淘汰顺序,越小代表越早被淘汰 + try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { + // 写入表头 + writer.write("z,x,y"); + writer.newLine(); + + // 写入数据行,按顺序 z, x, y + for (Point point : results) { + writer.write(point.z + "," + point.x + "," + point.y); + writer.newLine(); + } + + System.out.println("CSV file written successfully to " + outputFile); + } catch (IOException e) { + e.printStackTrace(); + } + } + + // Method to generate the output file name based on input path + private static String generateOutputFileName(String inputFilePath, int e, String outDir) { + // Get the file name without extension + Path path = Paths.get(inputFilePath); + String fileNameWithoutExtension = path.getFileName().toString(); + String name = fileNameWithoutExtension.substring(0, fileNameWithoutExtension.lastIndexOf('.')); + + // Get the file extension + String extension = + path.getFileName().toString().substring(fileNameWithoutExtension.lastIndexOf('.')); + + // Create output file path by appending '-ds' before the extension + String outputFile = name + "-ds-e" + e + extension; + + if (outDir == null) { // 表示使用input文件所在的文件夹 + // Handle path compatibility for different operating systems (Linux/Windows) + return path.getParent().resolve(outputFile).toString(); + } else { + Path out = Paths.get(outDir); + return out.resolve(outputFile).toString(); + } + } +}
