This is an automated email from the ASF dual-hosted git repository. hui pushed a commit to branch research/encoding-reorder in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 7ca4c7b4de9b395f48c4a3cf0e4e74ba8eb32665 Author: xjz17 <[email protected]> AuthorDate: Sun Dec 3 15:31:20 2023 +0800 update --- .../encoding/encoder/DeltaBinaryEncoder.java | 2 +- .../apache/iotdb/tsfile/encoding/EncodeTest.java | 7 +- .../iotdb/tsfile/encoding/REGERDoubleTest.java | 41 +- .../tsfile/encoding/REGERFloatDecreaseTest.java | 797 ++++++++++----------- .../iotdb/tsfile/encoding/REGERFloatTest.java | 204 +++++- .../iotdb/tsfile/encoding/RegerPDoubleTest.java | 7 +- .../iotdb/tsfile/encoding/RegerPFloatTest.java | 2 +- 7 files changed, 601 insertions(+), 459 deletions(-) diff --git a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DeltaBinaryEncoder.java b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DeltaBinaryEncoder.java index 38fc8f6013f..3ff33b948c4 100644 --- a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DeltaBinaryEncoder.java +++ b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DeltaBinaryEncoder.java @@ -45,7 +45,7 @@ import java.io.IOException; */ public abstract class DeltaBinaryEncoder extends Encoder { - protected static final int BLOCK_DEFAULT_SIZE = 1024; + protected static final int BLOCK_DEFAULT_SIZE = 512; private static final Logger logger = LoggerFactory.getLogger(DeltaBinaryEncoder.class); protected ByteArrayOutputStream out; protected int blockSize; diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java index b0cfe011499..ef0bf35793e 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/EncodeTest.java @@ -91,8 +91,8 @@ public class EncodeTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - // for(int file_i=3;file_i<4;file_i++){ - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + for(int file_i=4;file_i<5;file_i++){ +// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); // String Output = "C:\\Users\\xiaoj\\Desktop\\test_ratio_ts_2diff.csv"; @@ -148,7 +148,10 @@ public class EncodeTest { for (int i = 0; i < 2; i++) { columnIndexes.add(i, i); } + int count_csv =0; for (File f : tempList) { + System.out.println(count_csv); + count_csv ++; System.out.println(f); fileRepeat += 1; InputStream inputStream = Files.newInputStream(f.toPath()); diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java index c4b5f3565e8..b5c4f78c435 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERDoubleTest.java @@ -3015,12 +3015,13 @@ public class REGERDoubleTest { } } - @Test - public void REGER() throws IOException { +// @Test + public static void REGER() throws IOException { // String parent_dir = "C:/Users/xiaoj/Desktop/test"; String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/reger_double"; - + int pack_size = 16; + int block_size = 512; String input_parent_dir = parent_dir + "trans_data/"; ArrayList<String> input_path_list = new ArrayList<>(); @@ -3077,7 +3078,7 @@ public class REGERDoubleTest { for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); dataset_k.add(1); - dataset_block_size.add(1024); + dataset_block_size.add(block_size); } output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 @@ -3106,9 +3107,9 @@ public class REGERDoubleTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { +// int[] file_lists = {5,6,8,10}; +// for (int file_i : file_lists) { + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 12; file_i < 14; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3170,12 +3171,12 @@ public class REGERDoubleTest { long decodeTime = 0; double ratio = 0; double compressed_size = 0; - int repeatTime2 = 200; + int repeatTime2 = 100; long s = System.nanoTime(); int[] best_order = new int[3]; int length = 0; for (int repeat = 0; repeat < repeatTime2; repeat++) - length = ReorderingRegressionEncoder(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), 8, encoded_result); + length = ReorderingRegressionEncoder(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), pack_size, encoded_result); long e = System.nanoTime(); encodeTime += ((e - s) / repeatTime2); compressed_size += length; @@ -3215,6 +3216,8 @@ public class REGERDoubleTest { // String parent_dir = "C:/Users/xiaoj/Desktop/test"; String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/block_size_double"; + int pack_size = 16; + String input_parent_dir = parent_dir + "trans_data/"; ArrayList<String> input_path_list = new ArrayList<>(); @@ -3300,9 +3303,9 @@ public class REGERDoubleTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { +// int[] file_lists = {5,6,8,10}; +// for (int file_i : file_lists) { + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 12; file_i < 14; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3387,7 +3390,7 @@ public class REGERDoubleTest { data2_arr, block_size, dataset_third.get(file_i), - 8, + pack_size, encoded_result); long e = System.nanoTime(); encodeTime += ((e - s) / repeatTime2); @@ -3424,6 +3427,8 @@ public class REGERDoubleTest { "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/pack_size_double"; + int pack_size = 16; + int block_size = 512; String input_parent_dir = parent_dir + "trans_data/"; ArrayList<String> input_path_list = new ArrayList<>(); @@ -3480,7 +3485,7 @@ public class REGERDoubleTest { for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); dataset_k.add(1); - dataset_block_size.add(1024); + dataset_block_size.add(block_size); } output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 @@ -3511,9 +3516,9 @@ public class REGERDoubleTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { +// int[] file_lists = {5,6,8,10}; +// for (int file_i : file_lists) { + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 12; file_i < 14; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3566,7 +3571,7 @@ public class REGERDoubleTest { for (int i = 0; i < data.size(); i++) { data2_arr[i] = data.get(i) ; } - for (int segment_size_exp = 6; segment_size_exp > 2; segment_size_exp--) { + for (int segment_size_exp = 8; segment_size_exp > 2; segment_size_exp--) { int segment_size = (int) Math.pow(2, segment_size_exp); System.out.println(segment_size); diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatDecreaseTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatDecreaseTest.java index 2bbbb522e25..1e17fb891cc 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatDecreaseTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatDecreaseTest.java @@ -2507,8 +2507,7 @@ public class REGERFloatDecreaseTest { // return encode_pos; } - - private static int REGERBlockEncoder(long[] data, int i, int block_size, int supply_length, int[] third_value, int segment_size, int encode_pos, byte[] cur_byte, int[] block_sort) { + private static int REGERBlockEncoderIncrease(long[] data, int i, int block_size, int supply_length, int[] third_value, int segment_size, int encode_pos, byte[] cur_byte, int[] block_sort) { long min_time = (long) getTime(data[i * block_size]) << 32; @@ -2526,7 +2525,7 @@ public class REGERFloatDecreaseTest { // System.out.println((data[i * block_size+1])); // System.out.println(getTime(data[i * block_size+1])); for (int j = 0; j < block_size; j++) { - long tmp_j = data[j + i * block_size] ;//- min_time; + long tmp_j = data[j + i * block_size]- min_time; // System.out.println(getTime(data[j + i * block_size])); // System.out.println(getTime(data[i * block_size])); ts_block[j] = tmp_j; @@ -2577,7 +2576,7 @@ public class REGERFloatDecreaseTest { // int min_value = Integer.MAX_VALUE; for (int j = 0; j < end; j++) { - long tmp_j = data[j + i * block_size] ;//- min_time; + long tmp_j = data[j + i * block_size] - min_time; ts_block[j] = tmp_j; ts_block_value[j] = combine2Int(getValue(tmp_j), getTime(tmp_j)); @@ -2610,7 +2609,8 @@ public class REGERFloatDecreaseTest { // third_value= findMinIndex(kernelDensity); block_size = supply_length; } - + int2Bytes(getTime(min_time),encode_pos,cur_byte); + encode_pos += 4; // printTSBlock(ts_block); // printTSBlock(ts_block_value); @@ -2702,6 +2702,241 @@ public class REGERFloatDecreaseTest { } + return encode_pos; + } + + private static int REGERBlockEncoderDecrease(long[] data, int i, int block_size, int supply_length, int[] third_value, int segment_size, int encode_pos, byte[] cur_byte, int[] block_sort) { + + + long min_time = (long) getTime(data[i * block_size]) << 32; + long[] ts_block; + long[] ts_block_value; + long[] ts_block_partition; + + if (supply_length == 0) { + ts_block = new long[block_size]; + ts_block_value = new long[block_size]; + ts_block_partition = new long[block_size]; +// Map<Integer, Integer> data_map = new HashMap<>(); +// int min_value = Integer.MAX_VALUE; + +// System.out.println((data[i * block_size+1])); +// System.out.println(getTime(data[i * block_size+1])); + for (int j = 0; j < block_size; j++) { + long tmp_j = data[j + i * block_size] - min_time; +// System.out.println(getTime(data[j + i * block_size])); +// System.out.println(getTime(data[i * block_size])); + ts_block[j] = tmp_j; + ts_block_value[j] = combine2Int(getValue(tmp_j), getTime(tmp_j)); + +// if(ts_block[j][1]<min_value){ +// min_value = ts_block[j][1]; +// } +// if(data_map.containsKey(ts_block[j][1])){ +// int tmp = data_map.get(ts_block[j][1]); +// tmp++; +// data_map.put(ts_block[j][1],tmp); +// }else{ +// data_map.put(ts_block[j][1],1); +// } +// ts_block_value[j] = data[j + i * block_size] - (min_time); + } +// for (int j = 0; j < block_size; j++) { +// ts_block[j][0] = (data[j + i * block_size][0] - min_time); +// ts_block[j][1] = data[j + i * block_size][1]; +// if(ts_block[j][1]<min_value){ +// min_value = ts_block[j][1]; +// } +// int tmp_value = ts_block[j][1]-min_value; +// if(data_map.containsKey(tmp_value)){ +// int tmp = data_map.get(tmp_value); +// tmp++; +// data_map.put(tmp_value,tmp); +// }else{ +// data_map.put(tmp_value,1); +// } +// ts_block_value[j][0] =ts_block[j][0]; +// ts_block_value[j][1] =ts_block[j][1]; +// } +// double[] kernelDensity = calculateKernelDensity(data_map); +// +// third_value= findMinIndex(kernelDensity); +// for(int j=0;j<third_value.length;j++){ +// third_value[j] += min_value; +// } +// System.out.println("Minimum point: x=" + (Arrays.toString(third_value))); + } else { + ts_block = new long[supply_length]; + ts_block_value = new long[supply_length]; + ts_block_partition = new long[supply_length]; + int end = data.length - i * block_size; +// Map<Integer, Integer> data_map = new HashMap<>(); +// int min_value = Integer.MAX_VALUE; + + for (int j = 0; j < end; j++) { + long tmp_j = data[j + i * block_size] - min_time; + ts_block[j] = tmp_j; + ts_block_value[j] = combine2Int(getValue(tmp_j), getTime(tmp_j)); + +// if(ts_block[j][1]<min_value){ +// min_value = ts_block[j][1]; +// } +// if(data_map.containsKey(ts_block[j][1])){ +// int tmp = data_map.get(ts_block[j][1]); +// tmp++; +// data_map.put(ts_block[j][1],tmp); +// }else{ +// data_map.put(ts_block[j][1],1); +// } +// ts_block_value[j] = data[j + i * block_size] - (min_time); + } + for (int j = end; j < supply_length; j++) { + ts_block[j] = 0; + ts_block_value[j] = 0; +// if(data_map.containsKey(ts_block[j][1])){ +// int tmp = data_map.get(ts_block[j][1]); +// tmp++; +// data_map.put(ts_block[j][1],tmp); +// }else{ +// data_map.put(ts_block[j][1],1); +// } +// ts_block_value[j] =0; + } +// double[] kernelDensity = calculateKernelDensity(data_map); + +// third_value= findMinIndex(kernelDensity); + block_size = supply_length; + } + +// printTSBlock(ts_block); +// printTSBlock(ts_block_value); + + int[] reorder_length = new int[5]; + float[] theta_reorder = new float[4]; + int[] time_length = new int[5];// length,max_bit_width_interval,max_bit_width_value,max_bit_width_deviation + float[] theta_time = new float[4]; + int[] partition_length = new int[5]; // length,max_bit_width_interval,max_bit_width_value,max_bit_width_deviation + float[] theta_partition = new float[4]; + + trainParameter(ts_block, block_size, theta_time); + + long[] ts_block_delta_time = getEncodeBitsRegressionNoTrain(ts_block, block_size, time_length, theta_time, segment_size); + + + int pos_ts_block_partition = 0; + if (third_value.length > 0) { + for(int j=block_size-1;j>=0;j--){ + long datum = ts_block[j]; + if (getValue(datum) <= third_value[0]) { + ts_block_partition[pos_ts_block_partition] = datum; + pos_ts_block_partition++; + } + } + for (int third_i = 1; third_i <= third_value.length - 1; third_i++) { + for (int j = block_size - 1; j >= 0; j--) { + long datum = ts_block[j]; + if (getValue(datum) <= third_value[third_i] && getValue(datum) > third_value[third_i - 1]) { + ts_block_partition[pos_ts_block_partition] = datum; + pos_ts_block_partition++; + } + } + } + for (int j = block_size - 1; j >= 0; j--) { + long datum = ts_block[j]; + if (getValue(datum) > third_value[third_value.length - 1]) { + ts_block_partition[pos_ts_block_partition] = datum; + pos_ts_block_partition++; + } + } +// System.out.println(pos_ts_block_partition); +// for (long datum : ts_block) { +// if (getValue(datum) <= third_value[0]) { +// ts_block_partition[pos_ts_block_partition] = datum; +// pos_ts_block_partition++; +// } +// } +// for (int third_i = 1; third_i < third_value.length - 1; third_i++) { +// for (long datum : ts_block) { +// if (getValue(datum) <= third_value[third_i] && getValue(datum) > third_value[third_i - 1]) { +// ts_block_partition[pos_ts_block_partition] = datum; +// pos_ts_block_partition++; +// } +// } +// } +// for (long datum : ts_block) { +// if (getValue(datum) > third_value[third_value.length - 1]) { +// ts_block_partition[pos_ts_block_partition] = datum; +// pos_ts_block_partition++; +// } +// } + } + + + trainParameter(ts_block_partition, block_size, theta_partition); + long[] ts_block_delta_partition = getEncodeBitsRegressionNoTrain(ts_block_partition, block_size, partition_length, theta_partition, segment_size); + + Arrays.sort(ts_block_value); + trainParameter(ts_block_value, block_size, theta_reorder); + long[] ts_block_delta_reorder = getEncodeBitsRegressionNoTrain(ts_block_value, block_size, reorder_length, theta_reorder, segment_size); + + + ReorderingTimeSeries(ts_block_value, reorder_length, theta_reorder, segment_size); + + int segment_n = (block_size - 1) / segment_size; + long[] bit_width_segments = new long[segment_n]; + + + int choose = min3(time_length[0], partition_length[0], reorder_length[0]); + if (choose == 0) { +// System.out.println("time"); +// System.out.println(Arrays.toString(time_length)); +// intByte2Bytes(0, encode_pos, cur_byte); +// encode_pos ++; + block_sort[i] = 0; + ts_block_delta_time = ReorderingTimeSeries(ts_block, time_length, theta_time, segment_size); + bit_width_segments = segmentBitPacking(ts_block_delta_time, block_size, segment_size); + int2Bytes(getTime(min_time),encode_pos,cur_byte); + encode_pos += 4; +// ts_block_delta_time[0] += min_time; + + encode_pos = encodeSegment2Bytes(ts_block_delta_time, bit_width_segments, time_length, segment_size, theta_time, encode_pos, cur_byte); +// System.out.println(Arrays.toString(time_length)); + } else if (choose == 1) { +// System.out.println(i); +// System.out.println("partition"); +// printTSBlock(ts_block_partition); +// System.out.println(Arrays.toString(partition_length)); +// intByte2Bytes(0, encode_pos, cur_byte); +// encode_pos ++; + block_sort[i] = 0; + ts_block_delta_partition = ReorderingTimeSeries(ts_block_partition, partition_length, theta_partition, segment_size); + bit_width_segments = segmentBitPacking(ts_block_delta_partition, block_size, segment_size); +// ts_block_delta_partition[0] += min_time; + int2Bytes(getTime(min_time),encode_pos,cur_byte); + encode_pos += 4; + encode_pos = encodeSegment2Bytes(ts_block_delta_partition, bit_width_segments, partition_length, segment_size, theta_partition, encode_pos, cur_byte); +// System.out.println(Arrays.toString(partition_length)); + } else if (choose == 2) { + int2Bytes(getTime(min_time),encode_pos,cur_byte); + encode_pos += 4; +// System.out.println("value"); +// printTSBlock(ts_block_value); +// System.out.println(Arrays.toString(reorder_length)); +// intByte2Bytes(1, encode_pos, cur_byte); +// encode_pos ++; + block_sort[i] = 1; + ts_block_delta_reorder = ReorderingTimeSeries(ts_block_value, reorder_length, theta_reorder, segment_size); + + bit_width_segments = segmentBitPacking(ts_block_delta_reorder, block_size, segment_size); + encode_pos = encodeSegment2Bytes(ts_block_delta_reorder, bit_width_segments, reorder_length, segment_size, theta_reorder, encode_pos, cur_byte); + } + +// block_sort[i] = 0; +// ts_block_delta_partition = ReorderingTimeSeries(ts_block_partition, partition_length, theta_partition, segment_size); +// bit_width_segments = segmentBitPacking(ts_block_delta_partition, block_size, segment_size); +//// ts_block_delta_partition[0] += min_time; +// encode_pos = encodeSegment2Bytes(ts_block_delta_partition, bit_width_segments, partition_length, segment_size, theta_partition, encode_pos, cur_byte); + // int segment_n = (block_size - 1) / segment_size; // long[] bit_width_segments = new long[segment_n]; // @@ -2725,7 +2960,7 @@ public class REGERFloatDecreaseTest { return encode_pos; } - public static int ReorderingRegressionEncoder(long[] data, int block_size, int[] third_value, int segment_size, byte[] encoded_result) { + public static int ReorderingRegressionEncoderIncrease(long[] data, int block_size, int[] third_value, int segment_size, byte[] encoded_result) { block_size++; // ArrayList<Byte> encoded_result = new ArrayList<Byte>(); int length_all = data.length; @@ -2746,10 +2981,59 @@ public class REGERFloatDecreaseTest { int length_block_sort = (int) Math.ceil((double)(block_num+1)/(double) 8); encode_pos += length_block_sort; -// for (int i = 44; i < 45; i++) { +// for (int i = 0; i < 1; i++) { for (int i = 0; i < block_num; i++) { // System.out.println(i); - encode_pos = REGERBlockEncoder(data, i, block_size, 0, third_value, segment_size, encode_pos, encoded_result, block_sort); + encode_pos = REGERBlockEncoderIncrease(data, i, block_size, 0, third_value, segment_size, encode_pos, encoded_result, block_sort); + } + + int remaining_length = length_all - block_num * block_size; + if (remaining_length == 1) { + long2Bytes(data[data.length - 1], encode_pos, encoded_result); + encode_pos += 8; + } + if (remaining_length != 0 && remaining_length != 1) { + int supple_length; + if (remaining_length % segment_size == 0) { + supple_length = 1; + } else if (remaining_length % segment_size == 1) { + supple_length = 0; + } else { + supple_length = segment_size + 1 - remaining_length % segment_size; + } + encode_pos = REGERBlockEncoderIncrease(data, block_num, block_size, supple_length + remaining_length, third_value, segment_size, encode_pos, encoded_result, block_sort); + + } + encodeSort(block_sort,encode_pos_block_sort,encoded_result); + + return encode_pos; + } + + public static int ReorderingRegressionEncoderDecrease(long[] data, int block_size, int[] third_value, int segment_size, byte[] encoded_result) { + block_size++; +// ArrayList<Byte> encoded_result = new ArrayList<Byte>(); + int length_all = data.length; +// System.out.println(length_all); + int encode_pos = 0; + int2Bytes(length_all, encode_pos, encoded_result); + encode_pos += 4; + + int block_num = length_all / block_size; + int2Bytes(block_size, encode_pos, encoded_result); + encode_pos += 4; + + int2Bytes(segment_size, encode_pos, encoded_result); + encode_pos += 4; + + int[] block_sort = new int[block_num+1]; + int encode_pos_block_sort = encode_pos; + int length_block_sort = (int) Math.ceil((double)(block_num+1)/(double) 8); + encode_pos += length_block_sort; + +// for (int i = 0; i < 1; i++) { + for (int i = 0; i < block_num; i++) { + + encode_pos = REGERBlockEncoderDecrease(data, i, block_size, 0, third_value, segment_size, encode_pos, encoded_result, block_sort); } int remaining_length = length_all - block_num * block_size; @@ -2766,7 +3050,7 @@ public class REGERFloatDecreaseTest { } else { supple_length = segment_size + 1 - remaining_length % segment_size; } - encode_pos = REGERBlockEncoder(data, block_num, block_size, supple_length + remaining_length, third_value, segment_size, encode_pos, encoded_result, block_sort); + encode_pos = REGERBlockEncoderDecrease(data, block_num, block_size, supple_length + remaining_length, third_value, segment_size, encode_pos, encoded_result, block_sort); } encodeSort(block_sort,encode_pos_block_sort,encoded_result); @@ -2793,11 +3077,12 @@ public class REGERFloatDecreaseTest { public static int REGERBlockDecoder(byte[] encoded, int decode_pos, int[][] value_list, int block_size, int segment_size, int[] value_pos_arr) { - + int min_time_0 = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; int time0 = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; - value_list[value_pos_arr[0]][0] = time0; + value_list[value_pos_arr[0]][0] = time0+min_time_0; int value0 = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; value_list[value_pos_arr[0]][1] = value0; @@ -2871,7 +3156,7 @@ public class REGERFloatDecreaseTest { for(;pos_decode_time_result<length_decode_time_result;pos_decode_time_result++){ pre_time = (int) (theta_time0 + theta_time1 * (float)pre_time ) + decode_time_result[pos_decode_time_result] + min_time; pre_value = (int) (theta_value0 + theta_value1 * (float)pre_value ) + decode_value_result[pos_decode_time_result] + min_value; - value_list[value_pos_arr[0]][0] = pre_time; + value_list[value_pos_arr[0]][0] = pre_time+min_time_0; value_list[value_pos_arr[0]][1] = pre_value; value_pos_arr[0] ++; } @@ -2882,14 +3167,15 @@ public class REGERFloatDecreaseTest { public static int REGERBlockDecoderValue(byte[] encoded, int decode_pos, int[][] value_list, int block_size, int segment_size, int[] value_pos_arr) { - + int min_time_0 = bytes2Integer(encoded, decode_pos, 4); + decode_pos += 4; int time0 = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; - value_list[value_pos_arr[0]][1] = time0; + value_list[value_pos_arr[0]][1] = time0 ; int value0 = bytes2Integer(encoded, decode_pos, 4); decode_pos += 4; - value_list[value_pos_arr[0]][0] = value0; + value_list[value_pos_arr[0]][0] = value0+min_time_0; @@ -2963,7 +3249,7 @@ public class REGERFloatDecreaseTest { pre_time = (int) (theta_time0 + theta_time1 * (float)pre_time ) + decode_time_result[pos_decode_time_result] + min_time; pre_value = (int) (theta_value0 + theta_value1 * (float)pre_value ) + decode_value_result[pos_decode_time_result] + min_value; value_list[value_pos_arr[0]][1] = pre_time; - value_list[value_pos_arr[0]][0] = pre_value; + value_list[value_pos_arr[0]][0] = pre_value+min_time_0; value_pos_arr[0] ++; } } @@ -3017,7 +3303,7 @@ public class REGERFloatDecreaseTest { int[] value_pos_arr = new int[1]; -// for (int k = 0; k < 2; k++) { +// for (int k = 0; k < 1; k++) { for (int k = 0; k < block_num; k++) { int cur_block_sort = block_sort[k]; if(cur_block_sort==0) @@ -3050,10 +3336,10 @@ public class REGERFloatDecreaseTest { } @Test - public void REGER() throws IOException { + public void REGERIncrease() throws IOException { // String parent_dir = "C:/Users/xiaoj/Desktop/test"; String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; - String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/reger_float"; + String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/reger_partition_increase"; String input_parent_dir = parent_dir + "trans_data/"; @@ -3111,7 +3397,7 @@ public class REGERFloatDecreaseTest { for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); dataset_k.add(1); - dataset_block_size.add(1024); + dataset_block_size.add(512); } output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 @@ -3140,10 +3426,10 @@ public class REGERFloatDecreaseTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {0}; - for (int file_i : file_lists) { +// int[] file_lists = {0,1,2,3}; +// for (int file_i : file_lists) { // for (int file_i = 0; file_i < input_path_list.size(); file_i++) { -// for (int file_i = 12; file_i < 14; file_i++) { + for (int file_i = 0; file_i < 12; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3204,27 +3490,27 @@ public class REGERFloatDecreaseTest { long decodeTime = 0; double ratio = 0; double compressed_size = 0; - int repeatTime2 = 200; + int repeatTime2 = 1; long s = System.nanoTime(); int[] best_order = new int[3]; int length = 0; for (int repeat = 0; repeat < repeatTime2; repeat++) - length = ReorderingRegressionEncoder(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), 8, encoded_result); + length = ReorderingRegressionEncoderIncrease(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), 16, encoded_result); long e = System.nanoTime(); encodeTime += ((e - s) / repeatTime2); compressed_size += length; double ratioTmp = compressed_size / (double) (data.size() * Integer.BYTES * 2); ratio += ratioTmp; s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) - REGERDecoder(encoded_result); +// for (int repeat = 0; repeat < repeatTime2; repeat++) +// REGERDecoder(encoded_result); e = System.nanoTime(); decodeTime += ((e - s) / repeatTime2); String[] record = { f.toString(), - "REGER", + "REGER-Increase", String.valueOf(encodeTime), String.valueOf(decodeTime), String.valueOf(data.size()), @@ -3244,201 +3530,20 @@ public class REGERFloatDecreaseTest { } } -// @Test - public static void REGERVaryBlockSize() throws IOException { - // String parent_dir = "C:/Users/xiaoj/Desktop/test"; - String parent_dir = - "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; - String output_parent_dir = - "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/block_size_float"; + @Test + public void REGERDecrease() throws IOException { +// String parent_dir = "C:/Users/xiaoj/Desktop/test"; + String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; + String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/reger_partition_decrease"; + String input_parent_dir = parent_dir + "trans_data/"; ArrayList<String> input_path_list = new ArrayList<>(); ArrayList<String> output_path_list = new ArrayList<>(); ArrayList<String> dataset_name = new ArrayList<>(); + ArrayList<Integer> dataset_block_size = new ArrayList<>(); ArrayList<int[]> dataset_third = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - dataset_name.add("FANYP-Sensors"); - dataset_name.add("TRAJET-Transport"); - - int[] dataset_0 = {547, 2816}; - int[] dataset_1 = {1719, 3731}; - int[] dataset_2 = {-48, -11, 6, 25, 52}; - int[] dataset_3 = {8681, 13584}; - int[] dataset_4 = {79, 184, 274}; - int[] dataset_5 = {17, 68}; - int[] dataset_6 = {677}; - int[] dataset_7 = {1047, 1725}; - int[] dataset_8 = {227, 499, 614, 1013}; - int[] dataset_9 = {474, 678}; - int[] dataset_10 = {4, 30, 38, 49, 58}; - int[] dataset_11 = {5182, 8206}; - int[] dataset_12 = {652477}; - int[] dataset_13 = {581388}; - - dataset_third.add(dataset_0); - dataset_third.add(dataset_1); - dataset_third.add(dataset_2); - dataset_third.add(dataset_3); - dataset_third.add(dataset_4); - dataset_third.add(dataset_5); - dataset_third.add(dataset_6); - dataset_third.add(dataset_7); - dataset_third.add(dataset_8); - dataset_third.add(dataset_9); - dataset_third.add(dataset_10); - dataset_third.add(dataset_11); - dataset_third.add(dataset_12); - dataset_third.add(dataset_13); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - } - - output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv"); // 1 - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv"); // 2 - output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); // 4 - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv"); // 5 - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); // 6 - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv"); // 7 - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv"); // 8 - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv"); // 9 - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv"); // 10 - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv"); // 11 - output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 - output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { -// for (int file_i = 12; file_i < 14; file_i++) { - String inputPath = input_path_list.get(file_i); - String Output = output_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8); - - String[] head = { - "Input Direction", - "Encoding Algorithm", - "Encoding Time", - "Decoding Time", - "Points", - "Compressed Size", - "Block Size", - "Compression Ratio" - }; - writer.writeRecord(head); // write header to output file - - assert tempList != null; - - int count_csv =0; - for (File f : tempList) { - System.out.println(count_csv); - count_csv ++; - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); - ArrayList<Long> data = new ArrayList<>(); - - // add a column to "data" - loader.readHeaders(); - loader.readRecord(); - int time0 = Integer.parseInt(loader.getValues()[0]); - int value0 = Integer.parseInt(loader.getValues()[1]); - data.add(combine2Int(0, value0)); - - while (loader.readRecord()) { - int time_tmp = Integer.parseInt(loader.getValues()[0]) - time0; - int value_tmp = Integer.parseInt(loader.getValues()[1]); - data.add(combine2Int(time_tmp, value_tmp)); - } - ArrayList<Integer> result2 = new ArrayList<>(); - splitTimeStamp3(data, result2); - - - long[] data2_arr = new long[data.size()]; - for (int i = 0; i < data.size(); i++) { - data2_arr[i] = data.get(i); - } - for (int block_size_exp = 13; block_size_exp >= 4; block_size_exp--) { - int block_size = (int) Math.pow(2, block_size_exp); - System.out.println(block_size); - - byte[] encoded_result = new byte[data2_arr.length * 12]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - int repeatTime2 = 1; - long s = System.nanoTime(); - int length = 0; - for (int repeat = 0; repeat < repeatTime2; repeat++) - length = - ReorderingRegressionEncoder( - data2_arr, - block_size, - dataset_third.get(file_i), - 8, - encoded_result); - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data.size() * Integer.BYTES * 2); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) REGERDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "REGER-32-FLOAT", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data.size()), - String.valueOf(compressed_size), - String.valueOf(block_size_exp), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - } - writer.close(); - } - } - -// @Test - public static void REGERVaryPackSize() throws IOException { - // String parent_dir = "C:/Users/xiaoj/Desktop/test"; - String parent_dir = - "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; - String output_parent_dir = - "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/pack_size_float"; - - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> output_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - ArrayList<int[]> dataset_third = new ArrayList<>(); - ArrayList<Integer> dataset_k = new ArrayList<>(); + ArrayList<Integer> dataset_k = new ArrayList<>(); dataset_name.add("CS-Sensors"); dataset_name.add("Metro-Traffic"); dataset_name.add("USGS-Earthquakes"); @@ -3487,41 +3592,39 @@ public class REGERFloatDecreaseTest { for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); dataset_k.add(1); - dataset_block_size.add(1024); + dataset_block_size.add(512); } output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 - // dataset_block_size.add(1024); +// dataset_block_size.add(128); - output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv"); // 1 - // dataset_block_size.add(512); - output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv"); // 2 - // dataset_block_size.add(512); + output_path_list.add(output_parent_dir + "/Metro-Traffic_ratio.csv");// 1 +// dataset_block_size.add(4096); + output_path_list.add(output_parent_dir + "/USGS-Earthquakes_ratio.csv");// 2 +// dataset_block_size.add(8192); output_path_list.add(output_parent_dir + "/YZ-Electricity_ratio.csv"); // 3 - // dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); // 4 - // dataset_block_size.add(128); - output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv"); // 5 - // dataset_block_size.add(64); - output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); // 6 - // dataset_block_size.add(128); - output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv"); // 7 - // dataset_block_size.add(512); - output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv"); // 8 - // dataset_block_size.add(256); - output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv"); // 9 - // dataset_block_size.add(512); - output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv"); // 10 - // dataset_block_size.add(512); - output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv"); // 11 - // dataset_block_size.add(512); + output_path_list.add(output_parent_dir + "/GW-Magnetic_ratio.csv"); //4 + output_path_list.add(output_parent_dir + "/TY-Fuel_ratio.csv");//5 +// dataset_block_size.add(8192); + output_path_list.add(output_parent_dir + "/Cyber-Vehicle_ratio.csv"); //6 +// dataset_block_size.add(2048); + output_path_list.add(output_parent_dir + "/Vehicle-Charge_ratio.csv");//7 +// dataset_block_size.add(2048); + output_path_list.add(output_parent_dir + "/Nifty-Stocks_ratio.csv");//8 +// dataset_block_size.add(128); + output_path_list.add(output_parent_dir + "/TH-Climate_ratio.csv");//9 +// dataset_block_size.add(64); + output_path_list.add(output_parent_dir + "/TY-Transport_ratio.csv");//10 +// dataset_block_size.add(64); + output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 +// dataset_block_size.add(256); output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { +// int[] file_lists = {0,1,2,3}; +// for (int file_i : file_lists) { // for (int file_i = 0; file_i < input_path_list.size(); file_i++) { -// for (int file_i = 12; file_i < 14; file_i++) { + for (int file_i = 0; file_i < 12; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3537,8 +3640,7 @@ public class REGERFloatDecreaseTest { "Decoding Time", "Points", "Compressed Size", - "Block Size", - "Compression Ratio" + "Compression Ratio", }; writer.writeRecord(head); // write header to output file @@ -3553,7 +3655,6 @@ public class REGERFloatDecreaseTest { CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); ArrayList<Long> data = new ArrayList<>(); - // add a column to "data" loader.readHeaders(); loader.readRecord(); int time0 = Integer.parseInt(loader.getValues()[0]); @@ -3561,200 +3662,80 @@ public class REGERFloatDecreaseTest { data.add(combine2Int(0, value0)); while (loader.readRecord()) { - int time_tmp = Integer.parseInt(loader.getValues()[0]) - time0; - int value_tmp = Integer.parseInt(loader.getValues()[1]); - data.add(combine2Int(time_tmp, value_tmp)); - } - ArrayList<Integer> result2 = new ArrayList<>(); - splitTimeStamp3(data, result2); - - - long[] data2_arr = new long[data.size()]; - for (int i = 0; i < data.size(); i++) { - data2_arr[i] = data.get(i); - } - for (int segment_size_exp = 6; segment_size_exp > 2; segment_size_exp--) { - int segment_size = (int) Math.pow(2, segment_size_exp); - System.out.println(segment_size); - - byte[] encoded_result = new byte[data2_arr.length * 12]; - long encodeTime = 0; - long decodeTime = 0; - double ratio = 0; - double compressed_size = 0; - int repeatTime2 = 1; - long s = System.nanoTime(); - int length = 0; - for (int repeat = 0; repeat < repeatTime2; repeat++) - length = - ReorderingRegressionEncoder( - data2_arr, - dataset_block_size.get(file_i), - dataset_third.get(file_i), - segment_size, - encoded_result); - long e = System.nanoTime(); - encodeTime += ((e - s) / repeatTime2); - compressed_size += length; - double ratioTmp = compressed_size / (double) (data.size() * Integer.BYTES * 2); - ratio += ratioTmp; - s = System.nanoTime(); - for (int repeat = 0; repeat < repeatTime2; repeat++) REGERDecoder(encoded_result); - e = System.nanoTime(); - decodeTime += ((e - s) / repeatTime2); - - String[] record = { - f.toString(), - "REGER-32-FLOAT", - String.valueOf(encodeTime), - String.valueOf(decodeTime), - String.valueOf(data.size()), - String.valueOf(compressed_size), - String.valueOf(segment_size_exp), - String.valueOf(ratio) - }; - writer.writeRecord(record); - System.out.println(ratio); - } - } - writer.close(); - } - } - - - public static void REGERCorrect() throws IOException { - - String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; - String input_parent_dir = parent_dir + "trans_data/"; - ArrayList<String> input_path_list = new ArrayList<>(); - ArrayList<String> dataset_name = new ArrayList<>(); - ArrayList<Integer> dataset_block_size = new ArrayList<>(); - ArrayList<int[]> dataset_third = new ArrayList<>(); - dataset_name.add("CS-Sensors"); - dataset_name.add("Metro-Traffic"); - dataset_name.add("USGS-Earthquakes"); - dataset_name.add("YZ-Electricity"); - dataset_name.add("GW-Magnetic"); - dataset_name.add("TY-Fuel"); - dataset_name.add("Cyber-Vehicle"); - dataset_name.add("Vehicle-Charge"); - dataset_name.add("Nifty-Stocks"); - dataset_name.add("TH-Climate"); - dataset_name.add("TY-Transport"); - dataset_name.add("EPM-Education"); - dataset_name.add("FANYP-Sensors"); - dataset_name.add("TRAJET-Transport"); - - int[] dataset_0 = {547, 2816}; - int[] dataset_1 = {1719, 3731}; - int[] dataset_2 = {-48, -11, 6, 25, 52}; - int[] dataset_3 = {8681, 13584}; - int[] dataset_4 = {79, 184, 274}; - int[] dataset_5 = {17, 68}; - int[] dataset_6 = {677}; - int[] dataset_7 = {1047, 1725}; - int[] dataset_8 = {227, 499, 614, 1013}; - int[] dataset_9 = {474, 678}; - int[] dataset_10 = {4, 30, 38, 49, 58}; - int[] dataset_11 = {5182, 8206}; - int[] dataset_12 = {0}; - int[] dataset_13 = {0}; - - dataset_third.add(dataset_0); - dataset_third.add(dataset_1); - dataset_third.add(dataset_2); - dataset_third.add(dataset_3); - dataset_third.add(dataset_4); - dataset_third.add(dataset_5); - dataset_third.add(dataset_6); - dataset_third.add(dataset_7); - dataset_third.add(dataset_8); - dataset_third.add(dataset_9); - dataset_third.add(dataset_10); - dataset_third.add(dataset_11); - dataset_third.add(dataset_12); - dataset_third.add(dataset_13); - - for (String value : dataset_name) { - input_path_list.add(input_parent_dir + value); - dataset_block_size.add(128); - } - - -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - for (int file_i = 3; file_i < 4; file_i++) { - String inputPath = input_path_list.get(file_i); - - File file = new File(inputPath); - File[] tempList = file.listFiles(); - - - assert tempList != null; - - for (File f : tempList) { -// f = tempList[2]; - - System.out.println(f); - InputStream inputStream = Files.newInputStream(f.toPath()); - CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8); -// ArrayList<ArrayList<Integer>> data = new ArrayList<>(); - ArrayList<Long> data = new ArrayList<>(); - - // add a column to "data" - loader.readHeaders(); - loader.readRecord(); - int time0 = Integer.parseInt(loader.getValues()[0]); - int value0 = Integer.parseInt(loader.getValues()[1]); - data.add(combine2Int(0, value0)); - while (loader.readRecord()) { int time_tmp = Integer.parseInt(loader.getValues()[0]) - time0; int value_tmp = Integer.parseInt(loader.getValues()[1]); data.add(combine2Int(time_tmp, value_tmp)); } + inputStream.close(); ArrayList<Integer> result2 = new ArrayList<>(); splitTimeStamp3(data, result2); - int num_of_points = data.size(); - int[][] correctness_test_data = new int[num_of_points][2]; - long[] data2_arr = new long[num_of_points]; + int number_test = data.size(); + long[] data2_arr = new long[number_test]; + int[][] correctness_test_data = new int[number_test][2]; + - for (int i = 0; i < num_of_points; i++) { + for (int i = 0; i < number_test; i++) { data2_arr[i] = data.get(i); - correctness_test_data[i][0] = getTime(data2_arr[i]); - correctness_test_data[i][1] = getValue(data2_arr[i]); -// System.out.println(getValue(data2_arr[i])); +// correctness_test_data[i][0] = getTime(data2_arr[i]); +// correctness_test_data[i][1] = getValue(data2_arr[i]); } - +// System.out.println(Arrays.deepToString(correctness_test_data)); byte[] encoded_result = new byte[data2_arr.length * 8]; + long encodeTime = 0; + long decodeTime = 0; double ratio = 0; double compressed_size = 0; int repeatTime2 = 1; + long s = System.nanoTime(); + int[] best_order = new int[3]; int length = 0; -// System.out.println(Arrays.deepToString(correctness_test_data)); for (int repeat = 0; repeat < repeatTime2; repeat++) - length = ReorderingRegressionEncoder(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), 8, encoded_result); + length = ReorderingRegressionEncoderDecrease(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), 16, encoded_result); + long e = System.nanoTime(); + encodeTime += ((e - s) / repeatTime2); compressed_size += length; double ratioTmp = compressed_size / (double) (data.size() * Integer.BYTES * 2); ratio += ratioTmp; - int[][] decode_result = new int[num_of_points][2]; + s = System.nanoTime(); + int[][] decode_result = new int[number_test][2]; for (int repeat = 0; repeat < repeatTime2; repeat++) decode_result = REGERDecoder(encoded_result); - Arrays.sort(decode_result, (a, b) -> { - if (a[0] == b[0]) return Integer.compare(a[1], b[1]); - return Integer.compare(a[0], b[0]); - }); +// Arrays.sort(decode_result, (a, b) -> { +// if (a[0] == b[0]) return Integer.compare(a[1], b[1]); +// return Integer.compare(a[0], b[0]); +// }); // System.out.println(Arrays.deepToString(decode_result)); + e = System.nanoTime(); + decodeTime += ((e - s) / repeatTime2); + + + String[] record = { + f.toString(), + "REGER-Decrease", + String.valueOf(encodeTime), + String.valueOf(decodeTime), + String.valueOf(data.size()), + String.valueOf(compressed_size), + String.valueOf(ratio), +// String.valueOf(best_order[0]), +// String.valueOf(best_order[1]), +// String.valueOf(best_order[2]) + }; + writer.writeRecord(record); +// System.out.println(Arrays.toString(best_order)); System.out.println(ratio); // break; } - + writer.close(); } } + } diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java index 1114b24d36f..fbd52cf1034 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERFloatTest.java @@ -21,6 +21,11 @@ public class REGERFloatTest { else return 32 - Integer.numberOfLeadingZeros(num); } + public static int zigzag(int num){ + if(num < 0) return 2*(-num) - 1; + else return 2*num; + } + public static boolean containsValue(int[] array, int targetValue) { for (int value : array) { if (value == targetValue) { @@ -1646,7 +1651,7 @@ public class REGERFloatTest { int max_interval_segment = Integer.MIN_VALUE; int max_value_segment = Integer.MIN_VALUE; int length = 0; - long delta_time = combine2Int(timestamp_delta_min, value_delta_min); +// long delta_time = combine2Int(timestamp_delta_min, value_delta_min); // for (int i = block_size - 1; i > 0; i--) { for (int i = 1; i < block_size; i++) { tmp_j = ts_block_delta[i]; @@ -1725,6 +1730,153 @@ public class REGERFloatTest { // } + } + public static long[] getEncodeBitsRegressionNoTrain1( + long[] ts_block, + int block_size, + int[] raw_length, + float[] theta, + int segment_size) { + + long[] ts_block_delta = new long[ts_block.length]; + + float theta0_r = theta[0]; + float theta1_r = theta[1]; + float theta0_v = theta[2]; + float theta1_v = theta[3]; + + ts_block_delta[0] = ts_block[0]; + int timestamp_delta_min = Integer.MAX_VALUE; + int value_delta_min = Integer.MAX_VALUE; + int max_interval = Integer.MIN_VALUE; + int max_value = Integer.MIN_VALUE; + int max_interval_segment = Integer.MIN_VALUE; + int max_value_segment = Integer.MIN_VALUE; + int length = 0; + + int j = 1; + long tmp_j_1 = ts_block[0]; + long tmp_j; + while (j < block_size) { + tmp_j = ts_block[j]; + + int epsilon_r_j = + zigzag( getTime(tmp_j) + - (int) (theta0_r + theta1_r * (float) getTime(tmp_j_1))); + int epsilon_v_j = + zigzag( getValue(tmp_j) + - (int) (theta0_v + theta1_v * (float) getValue(tmp_j_1))); + if (epsilon_r_j > max_interval) { + max_interval = epsilon_r_j; + } + if (epsilon_v_j > max_value) { + max_value = epsilon_v_j; + } + + if (epsilon_r_j > max_interval_segment) { + max_interval_segment = epsilon_r_j; + } + if (epsilon_v_j > max_value_segment) { + max_value_segment = epsilon_v_j; + } +// if (epsilon_r_j < timestamp_delta_min) { +// timestamp_delta_min = epsilon_r_j; +// } +// if (epsilon_v_j < value_delta_min) { +// value_delta_min = epsilon_v_j; +// } + ts_block_delta[j] = combine2Int(epsilon_r_j, epsilon_v_j); + + if (j % segment_size == 0) { + length += getBitWith(max_interval_segment) * segment_size; + length += getBitWith(max_value_segment) * segment_size; + max_interval_segment = Integer.MIN_VALUE; + max_value_segment = Integer.MIN_VALUE; + } + + tmp_j_1 = tmp_j; + j++; + } + + + +// for (int i = 1; i < block_size; i++) { +// tmp_j = ts_block_delta[i]; +// int epsilon_r = getTime(tmp_j) - timestamp_delta_min; +// int epsilon_v = getValue(tmp_j) - value_delta_min; +// +// ts_block_delta[i] = combine2Int(epsilon_r, epsilon_v); +// +// if (epsilon_r > max_interval) { +// max_interval = epsilon_r; +// } +// if (epsilon_v > max_value) { +// max_value = epsilon_v; +// } +// +// if (epsilon_r > max_interval_segment) { +// max_interval_segment = epsilon_r; +// } +// if (epsilon_v > max_value_segment) { +// max_value_segment = epsilon_v; +// } +// if (i % segment_size == 0) { +// length += getBitWith(max_interval_segment) * segment_size; +// length += getBitWith(max_value_segment) * segment_size; +// max_interval_segment = Integer.MIN_VALUE; +// max_value_segment = Integer.MIN_VALUE; +// } +// } + + int max_bit_width_interval = getBitWith(max_interval); + int max_bit_width_value = getBitWith(max_value); + +//System.out.println("--------------------------------------------------"); + + raw_length[0] = length; + raw_length[1] = max_bit_width_interval; + raw_length[2] = max_bit_width_value; + raw_length[3] = timestamp_delta_min; + raw_length[4] = value_delta_min; + +// printTSBlock(ts_block_delta); + return ts_block_delta; + +// int[][] ts_block_delta_segment = new int[block_size][2]; +// int pos_ts_block_delta_segment = 0; +// int[] tmp_segment = new int[2]; +// int max_interval_segment = Integer.MIN_VALUE; +// int max_value_segment = Integer.MIN_VALUE; +// tmp_segment[0] = max_interval_segment; +// tmp_segment[1] = max_value_segment; +// +// +// if (epsilon_r > max_interval_segment) { +// max_interval_segment = epsilon_r; +// tmp_segment[0] = max_interval_segment; +// } +// if (epsilon_v > max_value_segment) { +// max_value_segment = epsilon_v; +// tmp_segment[1] = max_value_segment; +// } +// if (j % segment_size == 0) { +// ts_block_delta_segment[pos_ts_block_delta_segment][0] = tmp_segment[0]; +// ts_block_delta_segment[pos_ts_block_delta_segment][1] = tmp_segment[1]; +// pos_ts_block_delta_segment++; +// tmp_segment = new int[2]; +// max_interval_segment = Integer.MIN_VALUE; +// max_value_segment = Integer.MIN_VALUE; +// tmp_segment[0] = max_interval_segment; +// tmp_segment[1] = max_value_segment; +// } +// +// +// for (int j = 0; j < pos_ts_block_delta_segment; j++) { +// length += getBitWith(ts_block_delta_segment[j][0] - timestamp_delta_min); +// length += getBitWith(ts_block_delta_segment[j][1] - value_delta_min); +// } + + } public static int getBeta( @@ -2247,8 +2399,8 @@ public class REGERFloatTest { pos_encode++; intByte2Bytes(bit_width_time[1], pos_encode, encoded_result); pos_encode++; - } +// pos_encode += Math.ceil((double) (pos_time*5)/8); for (int i = 0; i < pos_value; i++) { int[] bit_width_value = run_length_value[i]; intByte2Bytes(bit_width_value[0], pos_encode, encoded_result); @@ -2257,6 +2409,7 @@ public class REGERFloatTest { pos_encode++; } +// pos_encode += Math.ceil((double) (pos_value*5)/8); return pos_encode; } @@ -2337,15 +2490,12 @@ public class REGERFloatTest { float2bytes(theta[3], pos_encode, encoded_result); pos_encode += 4; - int2Bytes( raw_length[3], pos_encode, encoded_result); + int2Bytes(raw_length[3], pos_encode, encoded_result); pos_encode += 4; int2Bytes(raw_length[4], pos_encode, encoded_result); pos_encode += 4; -// System.out.println(Arrays.toString(theta)); - - pos_encode = encodeRLEBitWidth2Bytes(bit_width_segments, pos_encode, encoded_result); // printTSBlock(bit_width_segments); @@ -2511,7 +2661,7 @@ public class REGERFloatTest { private static int REGERBlockEncoder(long[] data, int i, int block_size, int supply_length, int[] third_value, int segment_size, int encode_pos, byte[] cur_byte, int[] block_sort) { - long min_time = (long) getTime(data[i * block_size]) << 32; +// long min_time = (long) getTime(data[i * block_size]) << 32; long[] ts_block; long[] ts_block_value; long[] ts_block_partition; @@ -2526,7 +2676,7 @@ public class REGERFloatTest { // System.out.println((data[i * block_size+1])); // System.out.println(getTime(data[i * block_size+1])); for (int j = 0; j < block_size; j++) { - long tmp_j = data[j + i * block_size] ;//- min_time; + long tmp_j = data[j + i * block_size];//- min_time; // System.out.println(getTime(data[j + i * block_size])); // System.out.println(getTime(data[i * block_size])); ts_block[j] = tmp_j; @@ -2577,7 +2727,7 @@ public class REGERFloatTest { // int min_value = Integer.MAX_VALUE; for (int j = 0; j < end; j++) { - long tmp_j = data[j + i * block_size] ;//- min_time; + long tmp_j = data[j + i * block_size];// - min_time; ts_block[j] = tmp_j; ts_block_value[j] = combine2Int(getValue(tmp_j), getTime(tmp_j)); @@ -3054,7 +3204,8 @@ public class REGERFloatTest { // String parent_dir = "C:/Users/xiaoj/Desktop/test"; String parent_dir = "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/reger_float"; - + int pack_size = 16; + int block_size = 512; String input_parent_dir = parent_dir + "trans_data/"; ArrayList<String> input_path_list = new ArrayList<>(); @@ -3111,7 +3262,7 @@ public class REGERFloatTest { for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); dataset_k.add(1); - dataset_block_size.add(1024); + dataset_block_size.add(block_size); } output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 @@ -3140,9 +3291,9 @@ public class REGERFloatTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { +// int[] file_lists = {4}; +// for (int file_i : file_lists) { + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 12; file_i < 14; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3204,12 +3355,12 @@ public class REGERFloatTest { long decodeTime = 0; double ratio = 0; double compressed_size = 0; - int repeatTime2 = 200; + int repeatTime2 = 100; long s = System.nanoTime(); int[] best_order = new int[3]; int length = 0; for (int repeat = 0; repeat < repeatTime2; repeat++) - length = ReorderingRegressionEncoder(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), 8, encoded_result); + length = ReorderingRegressionEncoder(data2_arr, dataset_block_size.get(file_i), dataset_third.get(file_i), pack_size, encoded_result); long e = System.nanoTime(); encodeTime += ((e - s) / repeatTime2); compressed_size += length; @@ -3251,6 +3402,7 @@ public class REGERFloatTest { "/Users/xiaojinzhao/Documents/GitHub/iotdb/iotdb-core/tsfile/src/test/resources/"; String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/block_size_float"; + int pack_size = 16; String input_parent_dir = parent_dir + "trans_data/"; ArrayList<String> input_path_list = new ArrayList<>(); @@ -3321,9 +3473,9 @@ public class REGERFloatTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { +// int[] file_lists = {5,6,8,10}; +// for (int file_i : file_lists) { + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 12; file_i < 14; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3394,7 +3546,7 @@ public class REGERFloatTest { data2_arr, block_size, dataset_third.get(file_i), - 8, + pack_size, encoded_result); long e = System.nanoTime(); encodeTime += ((e - s) / repeatTime2); @@ -3432,6 +3584,8 @@ public class REGERFloatTest { String output_parent_dir = "/Users/xiaojinzhao/Documents/GitHub/encoding-reorder/compression_ratio/pack_size_float"; + int block_size = 512; + String input_parent_dir = parent_dir + "trans_data/"; ArrayList<String> input_path_list = new ArrayList<>(); ArrayList<String> output_path_list = new ArrayList<>(); @@ -3487,7 +3641,7 @@ public class REGERFloatTest { for (String value : dataset_name) { input_path_list.add(input_parent_dir + value); dataset_k.add(1); - dataset_block_size.add(1024); + dataset_block_size.add(block_size); } output_path_list.add(output_parent_dir + "/CS-Sensors_ratio.csv"); // 0 @@ -3518,9 +3672,9 @@ public class REGERFloatTest { output_path_list.add(output_parent_dir + "/FANYP-Sensors_ratio.csv"); // 12 output_path_list.add(output_parent_dir + "/TRAJET-Transport_ratio.csv"); // 13 - int[] file_lists = {5,6,8,10}; - for (int file_i : file_lists) { -// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { +// int[] file_lists = {5,6,8,10}; +// for (int file_i : file_lists) { + for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 12; file_i < 14; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3573,7 +3727,7 @@ public class REGERFloatTest { for (int i = 0; i < data.size(); i++) { data2_arr[i] = data.get(i); } - for (int segment_size_exp = 6; segment_size_exp > 2; segment_size_exp--) { + for (int segment_size_exp = 8; segment_size_exp > 2; segment_size_exp--) { int segment_size = (int) Math.pow(2, segment_size_exp); System.out.println(segment_size); diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPDoubleTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPDoubleTest.java index 9731f616cb9..f827565a802 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPDoubleTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPDoubleTest.java @@ -1340,7 +1340,6 @@ public class RegerPDoubleTest { // pos_encode +=Math.ceil((double)(12+precision[pre])/(double)(4)); - for (int i = 2; i < theta.length; i++) { double2bytes(theta[i], pos_encode, encoded_result); pos_encode += 8; @@ -2166,10 +2165,10 @@ public class RegerPDoubleTest { // dataset_block_size.add(512); -// int[] file_lists = {0,2,6,7}; // -// for (int file_i : file_lists) { + int[] file_lists = {1,9}; // + for (int file_i : file_lists) { // for (int file_i = 0; file_i < input_path_list.size(); file_i++) { - for (int file_i = 13; file_i < 14; file_i++) { +// for (int file_i = 13; file_i < 14; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPFloatTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPFloatTest.java index a9f762da8f9..bae409913f0 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPFloatTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/RegerPFloatTest.java @@ -2513,7 +2513,7 @@ public class RegerPFloatTest { // 0 2 6 7 - int[] file_lists = {13}; + int[] file_lists = {1,9}; for (int file_i : file_lists) { // for (int file_i = 0; file_i < input_path_list.size(); file_i++) { // for (int file_i = 12; file_i < 14; file_i++) {
