This is an automated email from the ASF dual-hosted git repository.

hui pushed a commit to branch research/encoding-reorder
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit f3c12667bed347c19057e687b5ebfb93f2356368
Author: xjz17 <[email protected]>
AuthorDate: Tue Nov 14 23:43:04 2023 +0800

    update
---
 .../apache/iotdb/tsfile/encoding/EncodeTest.java   |  68 +++---
 .../tsfile/encoding/KernelDensityEstimation.java   | 240 ++++++++++++---------
 .../tsfile/encoding/REGERCompress1ArrayTest.java   |   2 +-
 3 files changed, 172 insertions(+), 138 deletions(-)

diff --git 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java
 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java
index 8a9b1588729..ba564045dd9 100644
--- 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java
+++ 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java
@@ -30,9 +30,13 @@ public class EncodeTest {
     // 
"C:\\Users\\xiaoj\\Documents\\GitHub\\encoding-reorder\\vldb\\compression_ratio\\sota_ratio";
     //
     // String parent_dir = "C:\\Users\\Jinnsjao 
Shawl\\Documents\\GitHub\\encoding-reorder\\";
-    String parent_dir = "E:\\vldb-reorder\\encoding-reorder\\";
-    String output_parent_dir = parent_dir + 
"vldb\\compression_ratio\\sota_ratio";
-    String input_parent_dir = parent_dir + "reorder\\iotdb_test_small\\";
+//    String parent_dir = "E:\\vldb-reorder\\encoding-reorder\\";
+//    String output_parent_dir = parent_dir + 
"vldb\\compression_ratio\\sota_ratio";
+
+    String parent_dir = 
"/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/";
+    String output_parent_dir = 
"/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/reger_remove_value";
+    String input_parent_dir = parent_dir + "trans_data/";
+
     ArrayList<String> input_path_list = new ArrayList<>();
     ArrayList<String> output_path_list = new ArrayList<>();
     ArrayList<String> dataset_name = new ArrayList<>();
@@ -55,41 +59,29 @@ public class EncodeTest {
       input_path_list.add(input_parent_dir + dataset_name.get(i));
     }
 
-    output_path_list.add(output_parent_dir + "\\CS-Sensors_ratio.csv"); // 0
-    dataset_block_size.add(1024);
-    //    dataset_k.add(5);
-    output_path_list.add(output_parent_dir + "\\Metro-Traffic_ratio.csv"); // 1
-    dataset_block_size.add(512);
-    //    dataset_k.add(7);
-    output_path_list.add(output_parent_dir + "\\USGS-Earthquakes_ratio.csv"); 
// 2
-    dataset_block_size.add(512);
-    //    dataset_k.add(7);
-    output_path_list.add(output_parent_dir + "\\YZ-Electricity_ratio.csv"); // 
3
-    dataset_block_size.add(512);
-    //    dataset_k.add(1);
-    output_path_list.add(output_parent_dir + "\\GW-Magnetic_ratio.csv"); // 4
-    dataset_block_size.add(128);
-    //    dataset_k.add(6);
-    output_path_list.add(output_parent_dir + "\\TY-Fuel_ratio.csv"); // 5
-    dataset_block_size.add(64);
-    //    dataset_k.add(5);
-    output_path_list.add(output_parent_dir + "\\Cyber-Vehicle_ratio.csv"); // 6
-    dataset_block_size.add(128);
-    //    dataset_k.add(4);
-    output_path_list.add(output_parent_dir + "\\Vehicle-Charge_ratio.csv"); // 
7
-    dataset_block_size.add(512);
-    //    dataset_k.add(8);
-    output_path_list.add(output_parent_dir + "\\Nifty-Stocks_ratio.csv"); // 8
-    dataset_block_size.add(256);
-    //    dataset_k.add(1);
-    output_path_list.add(output_parent_dir + "\\TH-Climate_ratio.csv"); // 9
-    dataset_block_size.add(512);
-    //    dataset_k.add(2);
-    output_path_list.add(output_parent_dir + "\\TY-Transport_ratio.csv"); // 10
-    dataset_block_size.add(512);
-    //    dataset_k.add(9);
-    output_path_list.add(output_parent_dir + "\\EPM-Education_ratio.csv"); // 
11
-    dataset_block_size.add(512);
+    output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0
+//        dataset_block_size.add(128);
+
+    output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1
+//        dataset_block_size.add(4096);
+    output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 
2
+//        dataset_block_size.add(8192);
+    output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3
+    output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4
+    output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5
+//        dataset_block_size.add(8192);
+    output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6
+//        dataset_block_size.add(2048);
+    output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7
+//        dataset_block_size.add(2048);
+    output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8
+//        dataset_block_size.add(128);
+    output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9
+//        dataset_block_size.add(64);
+    output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10
+//        dataset_block_size.add(64);
+    output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11
+//        dataset_block_size.add(256);
 
     //        for(int file_i=3;file_i<4;file_i++){
     for (int file_i = 0; file_i < input_path_list.size(); file_i++) {
diff --git 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java
 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java
index 5fde4ef7aa1..bcd5b0621bf 100644
--- 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java
+++ 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java
@@ -43,103 +43,145 @@ public class KernelDensityEstimation {
 //            int[] minIndex = findMinIndex(kernelDensity);
 //            System.out.println("Minimum point: x=" + 
(Arrays.toString(minIndex)));
         }
-      }
-    }
-    int[] final_minIndex = new int[final_min_count];
-    //            if(final_min_count>0){
-    //                final_minIndex[0] = minIndex[0];
-    //                int pre_value = minIndex[0];
-    //                for(int mv = 1; mv<final_min_count;mv++){
-    //                    if(minIndex[mv]-pre_value>16){
-    //                        pre_value = minIndex[mv];
-    //
-    //                    }
-    //                }
-    System.arraycopy(minIndex, 0, final_minIndex, 0, final_min_count);
-    //            }
-    return final_minIndex;
-  }
-  //    public static void main(String[] args) {
-  //        // 数据分布
-  //        Map<Integer, Integer> data = new HashMap<>();
-  //        data.put(1, 3);
-  //        data.put(2, 10);
-  //        data.put(3, 100);
-  //        data.put(4, 12);
-  //       if( data.containsKey(10)){
-  //           System.out.println("contain");
-  //       }
-  //        if( data.containsKey(1)){
-  //            System.out.println("contain 1");
-  //        }
-  //        // 选择带宽
-  //        double bandwidth = 1.0;
-  //
-  //        // 计算核密度曲线的极小值点
-  //        findMinima(data, bandwidth);
-  //    }
-  //
-  //    static void findMinima(Map<Integer, Integer> data, double bandwidth) {
-  //        // 计算核密度估计
-  //        Map<Integer, Double> kernelDensityEstimate = 
calculateKernelDensity(data, bandwidth);
-  //
-  //        // 计算导数
-  //        Map<Integer, Double> derivative = 
calculateDerivative(kernelDensityEstimate);
-  //
-  //        System.out.println(derivative);
-  //
-  //        // 打印导数为零的点
-  //        System.out.println("Minima Points:");
-  //        for (Map.Entry<Integer, Double> entry : derivative.entrySet()) {
-  //            if (entry.getValue() == 0.0) {
-  //                System.out.println("Point " + entry.getKey());
-  //            }
-  //        }
-  //    }
-  //
-  //    private static Map<Integer, Double> 
calculateKernelDensity(Map<Integer, Integer> data,
-  // double bandwidth) {
-  //        // 计算核密度估计
-  //        Map<Integer, Double> kernelDensityEstimate = new HashMap<>();
-  //
-  //        for (Map.Entry<Integer, Integer> entry : data.entrySet()) {
-  //            int point = entry.getKey();
-  //            double sum = 0.0;
-  //
-  //            for (Map.Entry<Integer, Integer> dataEntry : data.entrySet()) {
-  //                double x = dataEntry.getKey();
-  //                double kernel = gaussianKernel(x, point, bandwidth);
-  //                sum += kernel;
-  //            }
-  //
-  //            kernelDensityEstimate.put(point, sum / (data.size() * 
bandwidth));
-  //        }
-  //
-  //        return kernelDensityEstimate;
-  //    }
-  //
-  //    private static Map<Integer, Double> calculateDerivative(Map<Integer, 
Double> function) {
-  //        // 计算导数
-  //        Map<Integer, Double> derivative = new HashMap<>();
-  //
-  //        for (Map.Entry<Integer, Double> entry : function.entrySet()) {
-  //            int point = entry.getKey();
-  //
-  //            if (point > 1 && point < 4) {
-  //                double derivativeValue = (function.get(point + 1) - 
function.get(point - 1)) /
-  // 2.0;
-  //                derivative.put(point, derivativeValue);
-  //            } else {
-  //                // 边缘点处理
-  //                derivative.put(point, 0.0);
-  //            }
-  //        }
-  //
-  //        return derivative;
-  //    }
-  //
-  //    private static double gaussianKernel(double x, double xi, double 
bandwidth) {
-  //        // 高斯核函数
-  //        return Math.exp(-0.5 * Math.pow((x - xi) / bandwidth, 2)) / 
Math.sqrt(2 * Math.PI);
-  //    }
+
+        // 计算核密度估计
+        static double[] calculateKernelDensity(Map<Integer, Integer> 
discreteDistribution) {
+            int maxKey = 
discreteDistribution.keySet().stream().max(Integer::compare).orElse(0);
+            double[] kernelDensity = new double[maxKey];
+
+            for (int x = 1; x <= maxKey; x++) {
+                for (Map.Entry<Integer, Integer> entry : 
discreteDistribution.entrySet()) {
+                    int dataPoint = entry.getKey();
+                    int weight = entry.getValue();
+                    kernelDensity[x - 1] += gaussianKernel(x, dataPoint) * 
weight;
+                }
+            }
+
+            return kernelDensity;
+        }
+
+        // 高斯核函数
+        private static double gaussianKernel(int x, int dataPoint) {
+            double bandwidth = 1.0; // 可调整的带宽参数
+            return Math.exp(-0.5 * Math.pow((x - dataPoint) / bandwidth, 2)) / 
(Math.sqrt(2 * Math.PI) * bandwidth);
+        }
+
+        // 寻找数组中的最小值索引
+        static int[] findMinIndex(double[] array) {
+            int[] minIndex = new int[array.length];
+            int final_min_count = 0;
+            int pre_value = 0;
+//            double preValue = array[0];
+
+            for (int i = 1; i < array.length-1; i++) {
+                if (array[i] < array[i-1] && array[i] < array[i+1]) {
+                    if(final_min_count != 0){
+                        if(i>pre_value+32){
+                            minIndex[final_min_count] = i;
+                            final_min_count ++;
+                            pre_value = i;
+                        }
+                    }else{
+                        minIndex[final_min_count] = i;
+                        final_min_count ++;
+                        pre_value = i;
+                    }
+                }
+            }
+            int[] final_minIndex = new int[final_min_count];
+//            if(final_min_count>0){
+//                final_minIndex[0] = minIndex[0];
+//                int pre_value = minIndex[0];
+//                for(int mv = 1; mv<final_min_count;mv++){
+//                    if(minIndex[mv]-pre_value>16){
+//                        pre_value = minIndex[mv];
+//
+//                    }
+//                }
+            System.arraycopy(minIndex, 0, final_minIndex, 0, final_min_count);
+//            }
+            return final_minIndex;
+        }
+//    public static void main(String[] args) {
+//        // 数据分布
+//        Map<Integer, Integer> data = new HashMap<>();
+//        data.put(1, 3);
+//        data.put(2, 10);
+//        data.put(3, 100);
+//        data.put(4, 12);
+//       if( data.containsKey(10)){
+//           System.out.println("contain");
+//       }
+//        if( data.containsKey(1)){
+//            System.out.println("contain 1");
+//        }
+//        // 选择带宽
+//        double bandwidth = 1.0;
+//
+//        // 计算核密度曲线的极小值点
+//        findMinima(data, bandwidth);
+//    }
+//
+//    static void findMinima(Map<Integer, Integer> data, double bandwidth) {
+//        // 计算核密度估计
+//        Map<Integer, Double> kernelDensityEstimate = 
calculateKernelDensity(data, bandwidth);
+//
+//        // 计算导数
+//        Map<Integer, Double> derivative = 
calculateDerivative(kernelDensityEstimate);
+//
+//        System.out.println(derivative);
+//
+//        // 打印导数为零的点
+//        System.out.println("Minima Points:");
+//        for (Map.Entry<Integer, Double> entry : derivative.entrySet()) {
+//            if (entry.getValue() == 0.0) {
+//                System.out.println("Point " + entry.getKey());
+//            }
+//        }
+//    }
+//
+//    private static Map<Integer, Double> calculateKernelDensity(Map<Integer, 
Integer> data, double bandwidth) {
+//        // 计算核密度估计
+//        Map<Integer, Double> kernelDensityEstimate = new HashMap<>();
+//
+//        for (Map.Entry<Integer, Integer> entry : data.entrySet()) {
+//            int point = entry.getKey();
+//            double sum = 0.0;
+//
+//            for (Map.Entry<Integer, Integer> dataEntry : data.entrySet()) {
+//                double x = dataEntry.getKey();
+//                double kernel = gaussianKernel(x, point, bandwidth);
+//                sum += kernel;
+//            }
+//
+//            kernelDensityEstimate.put(point, sum / (data.size() * 
bandwidth));
+//        }
+//
+//        return kernelDensityEstimate;
+//    }
+//
+//    private static Map<Integer, Double> calculateDerivative(Map<Integer, 
Double> function) {
+//        // 计算导数
+//        Map<Integer, Double> derivative = new HashMap<>();
+//
+//        for (Map.Entry<Integer, Double> entry : function.entrySet()) {
+//            int point = entry.getKey();
+//
+//            if (point > 1 && point < 4) {
+//                double derivativeValue = (function.get(point + 1) - 
function.get(point - 1)) / 2.0;
+//                derivative.put(point, derivativeValue);
+//            } else {
+//                // 边缘点处理
+//                derivative.put(point, 0.0);
+//            }
+//        }
+//
+//        return derivative;
+//    }
+//
+//    private static double gaussianKernel(double x, double xi, double 
bandwidth) {
+//        // 高斯核函数
+//        return Math.exp(-0.5 * Math.pow((x - xi) / bandwidth, 2)) / 
Math.sqrt(2 * Math.PI);
+//    }
 }
+
diff --git 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompress1ArrayTest.java
 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompress1ArrayTest.java
index 4bd952f8665..1c5f025f569 100644
--- 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompress1ArrayTest.java
+++ 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompress1ArrayTest.java
@@ -2859,7 +2859,7 @@ public class REGERCompress1ArrayTest {
         for (String value : dataset_name) {
             input_path_list.add(input_parent_dir + value);
             dataset_k.add(1);
-            dataset_block_size.add(128);
+            dataset_block_size.add(1024);
         }
 
         output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0

Reply via email to