This is an automated email from the ASF dual-hosted git repository. leirui pushed a commit to branch research/area-visualization in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 1f328cbe744f4c10751adafd38c67d68cd8a8875 Author: Lei Rui <[email protected]> AuthorDate: Wed Jan 22 21:59:37 2025 +0800 before --- .../org/apache/iotdb/db/query/eBUG/Polyline.java | 4 + .../java/org/apache/iotdb/db/query/eBUG/Tmp.java | 41 +++++ .../java/org/apache/iotdb/db/query/eBUG/eBUG.java | 185 ++++++--------------- 3 files changed, 97 insertions(+), 133 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java index 9fa11b192cb..bed14731832 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Polyline.java @@ -24,4 +24,8 @@ public class Polyline { public Point get(int index) { return vertices.get(index); } + + public void clear() { + vertices.clear(); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tmp.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tmp.java new file mode 100644 index 00000000000..ad95ee6ec51 --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Tmp.java @@ -0,0 +1,41 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintWriter; +import java.util.Comparator; +import java.util.PriorityQueue; +import java.util.Random; + +public class Tmp { + public static void main(String[] args) { + int seed = 10; + Random rand = new Random(seed); + int eParam = 1; + try (PrintWriter writer = new PrintWriter(new File("tmp.csv"))) { + for (int n = 100_0000; n < 100000_0000; n += 5000_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 +// for (int n = 19000000; n < 3000_0000; n +=200_0000) { + PriorityQueue<Point> heap = new PriorityQueue<>(Comparator.comparingDouble(t -> t.y)); + for (int i = 0; i < n; i++) { + double v = rand.nextInt(1000000); + heap.add(new Point(i, v)); + } + + long startTime = System.currentTimeMillis(); + long sum = 0; + while (!heap.isEmpty()) { + Point p = heap.poll(); + sum += p.x; + } + long endTime = System.currentTimeMillis(); + System.out.println(n + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); + writer.println(n + "," + "0" + "," + (endTime - startTime)); + System.out.println(sum); + } + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } + + System.out.println("finish"); + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java index 8c4d41f37f0..77e21a27067 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java @@ -49,23 +49,17 @@ public class eBUG { List<Point> res = new ArrayList<>(); -// if (recentEliminated.length == 0) { // e=0 -// System.out.println("e=0"); -// res.add(pa); -// res.add(pi); -// res.add(pb); -// return res; // pa pi pb已经按照时间戳递增 -// } - + // TODO note: here may happen because e=c=0 or just 0<c<=e if (recentEliminated.length >= (pb_idx - pa_idx - 2)) { // e >= the total number of eliminated points in this region // System.out.println("[c=(b-a-2)]<=e, use T directly"); // worst case下, k=c res = lineToSimplify.getVertices().subList(pa_idx, pb_idx + 1); // 左闭右开 return res; // 已经按照时间戳递增 } + // TODO note: here may happen because c>e=0 or c>e>0. the latter needs search among recentEliminated and sort by timestamp // 从最近淘汰的e个点里寻找位于pa~pb之间的点,找到的有可能小于e个点 // System.out.println("k>=[c=(b-a-2)]>e, search among e"); - res.add(pa); + res.add(pa); // 注意pa pi pb已经按照时间戳递增 res.add(pi); res.add(pb); @@ -81,7 +75,9 @@ public class eBUG { } // 按照时间戳递增排序,这是为了后面计算total areal displacement需要 - res.sort(Comparator.comparingDouble(p -> p.x)); // 按时间戳排序 这样的话时间复杂度变成e*log(e) + if (recentEliminated.length > 0) { // 否则e=0,不需要排序pa pi pb三个点 + res.sort(Comparator.comparingDouble(p -> p.x)); // 按时间戳排序 这样的话时间复杂度变成e*log(e) + } return res; } @@ -168,7 +164,7 @@ public class eBUG { tri.prev.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以 Triangle newPre = new Triangle(tri.prev); // deep copy and inherit connection -// tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) + // tri.prev = newPre; // can omit, because already done by new Triangle(tri.prev) // 2. 处理pi被淘汰引起tri.prev被更新的事情 // 前一个三角形连到后一个三角形 @@ -214,7 +210,7 @@ public class eBUG { tri.next.markDeleted(); // O(1) 这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以 Triangle newNext = new Triangle(tri.next); // deep copy and inherit connection -// tri.next = newNext; // omit, because already done by new Triangle(tri.prev) + // tri.next = newNext; // omit, because already done by new Triangle(tri.prev) if (tri.prev != null) { tri.prev.next = tri.next; // ATTENTION!!!: 这里的tri.next已经被换掉!所以之前的要重新赋值! @@ -262,145 +258,68 @@ public class eBUG { } public static void main(String[] args) { - Polyline polyline = new Polyline(); - List<Polyline> polylineList = new ArrayList<>(); - Random rand = new Random(1); - int n = 1000_0000; - - int p = 10; - for (int i = 0; i < n; i += p) { - Polyline polylineBatch = new Polyline(); - for (int j = i; j < Math.min(i + p, n); j++) { - double v = rand.nextInt(1000000); - - polyline.addVertex(new Point(j, v)); - - polylineBatch.addVertex(new Point(j, v)); - } - polylineList.add(polylineBatch); - } -// try (FileWriter writer = new FileWriter("raw.csv")) { -// // 写入CSV头部 -// writer.append("x,y,z\n"); -// -// // 写入每个点的数据 -// for (int i = 0; i < polyline.size(); i++) { -// Point point = polyline.get(i); -// writer.append(point.x + "," + point.y + "," + point.z + "\n"); -// } -// System.out.println("Data has been written"); -// } catch (IOException e) { -// System.out.println("Error writing to CSV file: " + e.getMessage()); -// } + int eParam = 2000_0000; + try (PrintWriter writer = new PrintWriter(new File("exp_varyN_e" + eParam + ".csv"))) { + for (int n = 100_0000; n < 2000_0000; n += 200_0000) { // 超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度 + // TODO 注意 要有一个点是n=300w + + Polyline polyline = new Polyline(); + int seed = 1; + Random rand = new Random(seed); // TODO 注意seed在每轮里重设 + for (int i = 0; i < n; i++) { + double v = rand.nextInt(1000000); + polyline.addVertex(new Point(i, v)); + } - System.out.println("---------------------------------"); -// List<Point> results = new ArrayList<>(); - // 计算运行时间 -// int eParam = 10; - try (PrintWriter writer = new PrintWriter(new File("exp.csv"))) { -// int[] eParamList = {0, 1, 100, 500, 1000, 5000, 10000, 50000, 10_0000, 30_0000, 50_0000, 60_0000, -// 70_0000, 80_0000, 90_0000, 100_0000, 150_0000, 200_0000, 250_0000, 300_0000}; - for (int eParam = 0; eParam < n*1.5; eParam += 2_0000) { -// for (int eParam : eParamList) { long startTime = System.currentTimeMillis(); List<Point> results = buildEffectiveArea(polyline, eParam, false); - // 输出结果 long endTime = System.currentTimeMillis(); + System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); - System.out.println(results.size()); writer.println(n + "," + eParam + "," + (endTime - startTime)); + System.out.println(results.size()); + + // clear + polyline.clear(); + results.clear(); + polyline = null; + results = null; + System.gc(); } } catch (FileNotFoundException e) { throw new RuntimeException(e); } -// if (results.size() <= 100) { -// System.out.println("+++++++++++++++++++"); -// for (int i = 0; i < results.size(); i++) { -// Point point = results.get(i); -// System.out.println("Point: (" + point.x + ", " + point.y + ", " + point.z + ")"); -// } +// int n = 300_0000; +// int seed = 1; +// Random rand = new Random(seed); // TODO 注意seed在每轮里重设 +// Polyline polyline = new Polyline(); // 注意:如果是固定n变化e实验,这个数据要一次性生成,不然每次随机不一样 +// for (int i = 0; i < n; i++) { +// double v = rand.nextInt(1000000); +// polyline.addVertex(new Point(i, v)); // } - -// try (FileWriter writer = new FileWriter("fast.csv")) { -// // 写入CSV头部 -// writer.append("x,y,z\n"); // -// // 写入每个点的数据 -// for (int i = 0; i < results.size(); i++) { -// Point point = results.get(i); -// writer.append(point.x + "," + point.y + "," + point.z + "\n"); -// } -// System.out.println("Data has been written"); -// } catch (IOException e) { -// System.out.println("Error writing to CSV file: " + e.getMessage()); -// } - -// System.out.println("---------------------------------"); -// List<List<Point>> resultsBatchList = new ArrayList<>(); -// // 计算运行时间 -// int cnt = 0; -// startTime = System.currentTimeMillis(); -// for (Polyline polylineBatch : polylineList) { -// List<Point> resultsBatch = new ArrayList<>(); -// buildEffectiveArea(polylineBatch, resultsBatch); -// cnt += resultsBatch.size(); -// resultsBatchList.add(resultsBatch); -// } -// // 输出结果 -// endTime = System.currentTimeMillis(); -// System.out.println("Time taken to reduce points: " + (endTime - startTime) + "ms"); -// System.out.println(cnt); +// try (PrintWriter writer = new PrintWriter(new File("exp_varyE_n" + n + ".csv"))) { +// int[] eParamList = {0, 1, 2, 10, 1000, 10000, 10_0000, 30_0000, 50_0000, 60_0000, 80_0000, 100_0000, 150_0000, 200_0000}; +// for (int eParam : eParamList) { +// eParam *= 3; // TODO 注意 因为n=300w而不是100w // -// System.out.println("---------------------------------"); -// // 使用 Stream API 合并所有列表 -// List<Point> mergedList = resultsBatchList.stream().flatMap(List::stream).collect(Collectors.toList()); -// int sameCnt = 0; -// for (int i = 0; i < mergedList.size(); i++) { -// if (mergedList.get(i).z == results.get(i).z) { -// sameCnt += 1; -// } -// } -// System.out.println("sameCnt=" + sameCnt + ", percent=" + sameCnt * 1.0 / mergedList.size()); +// long startTime = System.currentTimeMillis(); +// List<Point> results = buildEffectiveArea(polyline, eParam, false); +// long endTime = System.currentTimeMillis(); // -// try (FileWriter writer = new FileWriter("batch.csv")) { -// // 写入CSV头部 -// writer.append("x,y,z\n"); +// System.out.println("n=" + n + ", e=" + eParam + ", Time taken to reduce points: " + (endTime - startTime) + "ms"); +// writer.println(n + "," + eParam + "," + (endTime - startTime)); +// System.out.println(results.size()); // -// // 写入每个点的数据 -// for (int i = 0; i < mergedList.size(); i++) { -// Point point = mergedList.get(i); -// writer.append(point.x + "," + point.y + "," + point.z + "\n"); +// // clear 注意不能把原始数据清除,还要接着用 +// System.gc(); // } -// System.out.println("Data has been written"); -// } catch (IOException e) { -// System.out.println("Error writing to CSV file: " + e.getMessage()); +// } catch (FileNotFoundException e) { +// throw new RuntimeException(e); // } - } - // float[] vlist = new float[]{11346, 33839, 35469, 23108, 22812, 5519, 5526, 4865, 5842, - // 23089}; - // for (int i = 0; i < vlist.length; i++) { - // polyline.addVertex(new vPoint(i, vlist[i])); - // } - - // ArrayList<Double> v = new ArrayList<>(); - // String filePath = "D://desktop//数据集//New York Stock Exchange//merged_prices.csv"; - // try (BufferedReader br = new BufferedReader(new FileReader(filePath))) { - // String line; - // int count = 0; - // while ((line = br.readLine()) != null && count < 1000) { - // String[] columns = line.split(","); // 假设 CSV 文件以逗号分隔 - // v.add(Double.parseDouble(columns[0])); // 读取第一列数据 - // count++; - // } - // } catch (Exception e) { - // e.printStackTrace(); - // } - // // 打印数据长度 - // System.out.println("Data length: " + v.size()); - // for (int i = 0; i < v.size(); i++) { - // polyline.addVertex(new vPoint(i, v.get(i))); - // } + System.out.println("finish"); + } }
