This is an automated email from the ASF dual-hosted git repository. hui pushed a commit to branch research/encoding-reorder in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 95c9f4e1f853266b06a54152c50fa483787d6d98 Author: FrankHWD <[email protected]> AuthorDate: Wed Nov 22 14:31:08 2023 +0800 Add setting. --- .../apache/iotdb/tsfile/encoding/EncodeTest.java | 8 +++ .../tsfile/encoding/KernelDensityEstimation.java | 82 ++++++++++++++++++++++ .../iotdb/tsfile/encoding/REGERDoubleTest.java | 25 ++++++- .../iotdb/tsfile/encoding/REGERFloatTest.java | 30 ++++++++ .../apache/iotdb/tsfile/encoding/REGERTest.java | 18 +++-- 5 files changed, 158 insertions(+), 5 deletions(-) diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java index 0b849cc63de..b0cfe011499 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java @@ -37,6 +37,10 @@ public class EncodeTest { String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/sota_ratio"; String input_parent_dir = parent_dir + "trans_data/"; +// String parent_dir = "E:\\encoding-reorder-icde\\vldb\\iotdb_datasets_lists\\"; +// String output_parent_dir = "E:\\encoding-reorder-icde\\compression_ratio\\sota_ratio"; +// String input_parent_dir = parent_dir; + ArrayList<String> input_path_list = new ArrayList<>(); ArrayList<String> output_path_list = new ArrayList<>(); ArrayList<String> dataset_name = new ArrayList<>(); @@ -54,6 +58,8 @@ public class EncodeTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); for (int i = 0; i < dataset_name.size(); i++) { input_path_list.add(input_parent_dir + dataset_name.get(i)); @@ -82,6 +88,8 @@ public class EncodeTest { // dataset_block_size.add(64); output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 // dataset_block_size.add(256); + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 // for(int file_i=3;file_i<4;file_i++){ for (int file_i = 0; file_i < input_path_list.size(); file_i++) { diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java index f17e224258b..429b95ccf8a 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java @@ -1,5 +1,16 @@ package org.apache.iotdb.tsfile.encoding; +import com.csvreader.CsvReader; +import com.csvreader.CsvWriter; +import org.apache.iotdb.tsfile.read.filter.operator.In; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -183,5 +194,76 @@ public class KernelDensityEstimation { // // 高斯核函数 // return Math.exp(-0.5 * Math.pow((x - xi) / bandwidth, 2)) / Math.sqrt(2 * Math.PI); // } + + public static void calculate(int[] data, int block_size) { + Map<Integer, Integer> data_map = new HashMap<>(); + int[] ts_block; + int[] third_value; + ts_block = new int[block_size]; + int i = 0; + int min_value = Integer.MAX_VALUE; + for (int j = 0; j < block_size; j++) { + ts_block[j] = data[j + i * block_size]; + if(ts_block[j]<min_value){ + min_value = ts_block[j]; + } + if(data_map.containsKey(ts_block[j])){ + int tmp = data_map.get(ts_block[j]); + tmp++; + data_map.put(ts_block[j],tmp); + }else{ + data_map.put(ts_block[j],1); + } + } + double[] kernelDensity = calculateKernelDensity(data_map); + third_value= findMinIndex(kernelDensity); +// for(int j=0;j<third_value.length;j++){ +// third_value[j] += min_value; +// } + System.out.println("Minimum point: x=" + (Arrays.toString(third_value))); + } + + @Test + public void CalParameter() throws IOException { + String input_parent_dir = "E:\\encoding-reorder-icde\\vldb\\iotdb_datasets_lists\\"; + ArrayList<String> input_path_list = new ArrayList<>(); + ArrayList<String> dataset_name = new ArrayList<>(); + + //dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); + for (String value : dataset_name) { + input_path_list.add(input_parent_dir + value); + } + + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + String inputPath = input_path_list.get(file_i); + File file = new File(inputPath); + File[] tempList = file.listFiles(); + assert tempList != null; + + for (File f : tempList) { + System.out.println(f); + InputStream inputStream = Files.newInputStream(f.toPath()); + CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); + ArrayList<ArrayList<Integer>> data = new ArrayList<>(); + + loader.readHeaders(); + while (loader.readRecord()) { + ArrayList<Integer> tmp = new ArrayList<>(); + //tmp.add(Integer.valueOf(loader.getValues()[0])); + tmp.add(0); + tmp.add(Integer.valueOf(loader.getValues()[1])); + data.add(tmp); + } + inputStream.close(); + + int[] data_arr = new int[data.size()]; + for (int i = 0; i < data.size(); i++) { + data_arr[i] = data.get(i).get(1); + } + calculate(data_arr, 128); + } + } + } } diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java index fb51b459eb1..818e0c8c3a1 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java @@ -2892,6 +2892,8 @@ public class REGERDoubleTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -2905,6 +2907,8 @@ public class REGERDoubleTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -2918,6 +2922,8 @@ public class REGERDoubleTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); @@ -2948,7 +2954,8 @@ public class REGERDoubleTest { // dataset_block_size.add(64); output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 // dataset_block_size.add(256); - + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 2; file_i < 3; file_i++) { @@ -3090,6 +3097,8 @@ public class REGERDoubleTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -3103,6 +3112,8 @@ public class REGERDoubleTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -3116,6 +3127,8 @@ public class REGERDoubleTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); @@ -3147,6 +3160,8 @@ public class REGERDoubleTest { // dataset_block_size.add(512); output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv"); // 11 // dataset_block_size.add(512); + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 // int[] file_lists = {0,2,11}; // for (int file_i : file_lists) { @@ -3289,6 +3304,8 @@ public class REGERDoubleTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -3302,6 +3319,8 @@ public class REGERDoubleTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -3315,6 +3334,8 @@ public class REGERDoubleTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); @@ -3347,6 +3368,8 @@ public class REGERDoubleTest { // dataset_block_size.add(512); output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv"); // 11 // dataset_block_size.add(512); + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 0; file_i < 1; file_i++) { diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java index 79d05f84d9a..caca32fccd4 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java @@ -3076,6 +3076,8 @@ public class REGERFloatTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -3089,6 +3091,8 @@ public class REGERFloatTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -3102,6 +3106,8 @@ public class REGERFloatTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); @@ -3132,6 +3138,8 @@ public class REGERFloatTest { // dataset_block_size.add(64); output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 // dataset_block_size.add(256); + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 for (int file_i = 0; file_i < input_path_list.size(); file_i++) { @@ -3275,6 +3283,8 @@ public class REGERFloatTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -3288,6 +3298,8 @@ public class REGERFloatTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -3301,6 +3313,8 @@ public class REGERFloatTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); @@ -3318,6 +3332,8 @@ public class REGERFloatTest { output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv"); // 9 output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv"); // 10 output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv"); // 11 + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 // int[] file_lists = {0,2,11}; // for (int file_i : file_lists) { @@ -3446,6 +3462,8 @@ public class REGERFloatTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -3459,6 +3477,8 @@ public class REGERFloatTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -3472,6 +3492,8 @@ public class REGERFloatTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); @@ -3504,6 +3526,8 @@ public class REGERFloatTest { // dataset_block_size.add(512); output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv"); // 11 // dataset_block_size.add(512); + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 0; file_i < 1; file_i++) { @@ -3625,6 +3649,8 @@ public class REGERFloatTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -3638,6 +3664,8 @@ public class REGERFloatTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -3651,6 +3679,8 @@ public class REGERFloatTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERTest.java index 57ad43e2f80..9288a7bb56f 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERTest.java @@ -9,9 +9,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Objects; +import java.util.*; import static java.lang.Math.abs; @@ -2988,7 +2986,11 @@ public class REGERTest { String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/reger"; +// String parent_dir = "E:\\encoding-reorder-icde\\vldb\\iotdb_datasets_lists\\"; +// String output_parent_dir = "E:\\encoding-reorder-icde\\compression_ratio\\block_size"; + String input_parent_dir = parent_dir + "trans_data/"; + //String input_parent_dir = parent_dir; ArrayList<String> input_path_list = new ArrayList<>(); ArrayList<String> output_path_list = new ArrayList<>(); ArrayList<String> dataset_name = new ArrayList<>(); @@ -3007,6 +3009,8 @@ public class REGERTest { dataset_name.add("TH-Climate"); dataset_name.add("TY-Transport"); dataset_name.add("EPM-Education"); + dataset_name.add("FANYP-Sensors"); + dataset_name.add("TRAJET-Transport"); int[] dataset_0 = {547, 2816}; int[] dataset_1 = {1719, 3731}; @@ -3020,6 +3024,8 @@ public class REGERTest { int[] dataset_9 = {474, 678}; int[] dataset_10 = {4, 30, 38, 49, 58}; int[] dataset_11 = {5182, 8206}; + int[] dataset_12 = {0}; + int[] dataset_13 = {0}; dataset_third.add(dataset_0); dataset_third.add(dataset_1); @@ -3033,6 +3039,8 @@ public class REGERTest { dataset_third.add(dataset_9); dataset_third.add(dataset_10); dataset_third.add(dataset_11); + dataset_third.add(dataset_12); + dataset_third.add(dataset_13); for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); @@ -3063,6 +3071,8 @@ public class REGERTest { // dataset_block_size.add(64); output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv"); // 11 // dataset_block_size.add(256); + output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 + output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 0; file_i < 1; file_i++) { @@ -3118,7 +3128,7 @@ public class REGERTest { long decodeTime = 0; double ratio = 0; double compressed_size = 0; - int repeatTime2 = 100; + int repeatTime2 = 1; long s = System.nanoTime(); int length = 0; for (int repeat = 0; repeat < repeatTime2; repeat++)
