jack870131 commented on a change in pull request #231: [IOTDB-73]Add new 
encoding method for regular timestamp column
URL: https://github.com/apache/incubator-iotdb/pull/231#discussion_r303744315
 
 

 ##########
 File path: 
tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/RegularDataEncoder.java
 ##########
 @@ -0,0 +1,438 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.iotdb.tsfile.utils.BytesUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p> RegularDataEncoder is an encoder for compressing data in type of 
integer and long. We adapt a
+ * hypothesis that the difference between each data point is the same, which 
it means the data is
+ * regular. </p> <p>To encode the regular data, we first create an array as a 
block to store the data
+ * loaded into the encoder. While it reach the default block size, start 
calculating the delta between
+ * each data point in this block in order to checkout whether there are 
missing points exist in the data.
+ * If there is, create a bitmap for this block to denote the position of 
missing points. Next, store
+ * the data info (the data size, the minimum delta value and the first data 
point of this block) and the
+ * bitmap with its info into the result byte array output stream.</p>
+ *
+ * @author tsunghantsai
+ */
+public abstract class RegularDataEncoder extends Encoder {
+
+  protected static final int BLOCK_DEFAULT_SIZE = 128;
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(RegularDataEncoder.class);
+  protected ByteArrayOutputStream out;
+  protected int blockSize;
+
+  protected int writeIndex = -1;
+
+  /**
+   * constructor of RegularDataEncoder.
+   *
+   * @param size - the number how many numbers to be packed into a block.
+   */
+  public RegularDataEncoder(int size) {
+    super(TSEncoding.REGULAR);
+    blockSize = size;
+  }
+
+  protected abstract void writeHeader() throws IOException;
+
+  protected abstract void reset();
+
+  protected abstract void flushBlockBuffer(ByteArrayOutputStream out) throws 
IOException;
+
+  protected void writeHeaderToBytes() throws IOException {
+    out.write(BytesUtils.intToBytes(writeIndex));
+    writeHeader();
+  }
+
+  /**
+   * calling this method to flush all values which haven't encoded to result 
byte array.
+   */
+  @Override
+  public void flush(ByteArrayOutputStream out) {
+    try {
+      flushBlockBuffer(out);
+    } catch (IOException e) {
+      LOGGER.error("flush data to stream failed!", e);
+    }
+  }
+
+  public static class IntRegularEncoder extends RegularDataEncoder {
+
+    private int[] data;
+    private int[] missingPointData;
+    private int[] regularData;
+    private int firstValue;
+    private int previousValue;
+    private int minDeltaBase;
+    private boolean isMissingPoint;
+    private boolean isLastPack;
+    private BitSet bitmap;
+
+    public IntRegularEncoder() {
+      this(BLOCK_DEFAULT_SIZE);
+    }
+
+    /**
+     * constructor of RegularDataEncoder.
+     *
+     * @param size - the number how many numbers to be packed into a block.
+     */
+    public IntRegularEncoder(int size) {
+      super(size);
+      reset();
+    }
+
+    @Override
+    protected void flushBlockBuffer(ByteArrayOutputStream out) throws 
IOException {
+      if (writeIndex == -1) {
+        return;
+      }
+
+      this.out = out;
+      // write last pack
+      if (writeIndex < blockSize) {
+        isLastPack = true;
+        checkMissingPoint(out);
+      }
+      // write identifier
+      out.write(BytesUtils.boolToBytes(isMissingPoint));
+      // write bitmap if missing points exist
+      if (isMissingPoint) {
+        writeBitmap(out);
+      }
+      // write header
+      writeHeaderToBytes();
+
+      reset();
+      writeIndex = -1;
+    }
+
+    @Override
+    protected void reset() {
+      blockSize = BLOCK_DEFAULT_SIZE;
+      minDeltaBase = Integer.MAX_VALUE;
+      isMissingPoint = false;
+      isLastPack = false;
+      firstValue = 0;
+      previousValue = 0;
+    }
+
+    @Override
+    protected void writeHeader() throws IOException {
+      out.write(BytesUtils.intToBytes(minDeltaBase));
+      out.write(BytesUtils.intToBytes(firstValue));
+    }
+
+    @Override
+    public void encode(int value, ByteArrayOutputStream out) {
+      try {
+        encodeValue(value, out);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public int getOneItemMaxSize() {
+      return 4;
+    }
+
+    @Override
+    public long getMaxByteSize() {
+      // The meaning of 20 is: 
identifier(4)+bitmapLength(4)+index(4)+minDeltaBase(4)+firstValue(4)
+      return (long) 20 + (writeIndex * 2 / 8) + (writeIndex * 4);
+    }
+
+    /**
+     * input a integer or long value.
+     *
+     * @param value value to encode
+     * @param out   - the ByteArrayOutputStream which data encode into
+     */
+    public void encodeValue(int value, ByteArrayOutputStream out) throws 
IOException {
+      if (writeIndex == -1) {
+        data = new int[blockSize];
+        writeIndex = 0;
+      }
+      data[writeIndex++] = value;
+      if (writeIndex == blockSize) {
+        checkMissingPoint(out);
+      }
+    }
+
+    private void checkMissingPoint(ByteArrayOutputStream out) throws 
IOException {
+      // get the new regular data if the missing point exists in the original 
data
+      data = getRegularData(data);
+      firstValue = data[0];
+      if (isMissingPoint) {
+        writeIndex = data.length;
+      }
+      if (!isLastPack) {
+        flush(out);
+      }
+    }
+
+    private void writeBitmap(ByteArrayOutputStream out) throws IOException {
+      // generate bitmap
+      data2Diff(missingPointData);
+      byte[] bsArr = bitmap.toByteArray();
+      out.write(BytesUtils.intToBytes(bsArr.length));
+      out.write(bsArr);
+    }
+
+    private int[] getRegularData(int[] data) {
+      if (writeIndex > 1) {
+        previousValue = data[0];
+        minDeltaBase = data[1] - data[0];
+        // calculate minimum elapsed of the data and check whether the missing 
point exists
+        for (int i = 1; i < writeIndex; i++) {
+          calcDelta(data[i]);
+          previousValue = data[i];
+        }
+      }
+      // generate the continious data when the missing point exists
+      if (isMissingPoint) {
+        generateRegularData(data);
+        return regularData;
+      }
+      return data;
+    }
+
+    private void generateRegularData(int[] data) {
+      missingPointData = data;
+      blockSize = (int) (((data[writeIndex - 1] - data[0]) / minDeltaBase) + 
1);
+      regularData = new int[blockSize];
+      for (int i = 0; i < blockSize; i++) {
+        regularData[i] = (data[0] + minDeltaBase * i);
 
 Review comment:
   Done. Thx.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to