kr11 commented on a change in pull request #231: [IOTDB-73]Add new encoding method for regular timestamp column URL: https://github.com/apache/incubator-iotdb/pull/231#discussion_r303724241
########## File path: tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/RegularDataEncoder.java ########## @@ -0,0 +1,438 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.tsfile.encoding.encoder; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.BitSet; + +import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; +import org.apache.iotdb.tsfile.utils.BytesUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * <p> RegularDataEncoder is an encoder for compressing data in type of integer and long. We adapt a + * hypothesis that the difference between each data point is the same, which it means the data is + * regular. </p> <p>To encode the regular data, we first create an array as a block to store the data + * loaded into the encoder. While it reach the default block size, start calculating the delta between + * each data point in this block in order to checkout whether there are missing points exist in the data. + * If there is, create a bitmap for this block to denote the position of missing points. Next, store + * the data info (the data size, the minimum delta value and the first data point of this block) and the + * bitmap with its info into the result byte array output stream.</p> + * + * @author tsunghantsai + */ +public abstract class RegularDataEncoder extends Encoder { + + protected static final int BLOCK_DEFAULT_SIZE = 128; + private static final Logger LOGGER = LoggerFactory.getLogger(RegularDataEncoder.class); + protected ByteArrayOutputStream out; + protected int blockSize; + + protected int writeIndex = -1; + + /** + * constructor of RegularDataEncoder. + * + * @param size - the number how many numbers to be packed into a block. + */ + public RegularDataEncoder(int size) { + super(TSEncoding.REGULAR); + blockSize = size; + } + + protected abstract void writeHeader() throws IOException; + + protected abstract void reset(); + + protected abstract void flushBlockBuffer(ByteArrayOutputStream out) throws IOException; + + protected void writeHeaderToBytes() throws IOException { + out.write(BytesUtils.intToBytes(writeIndex)); + writeHeader(); + } + + /** + * calling this method to flush all values which haven't encoded to result byte array. + */ + @Override + public void flush(ByteArrayOutputStream out) { + try { + flushBlockBuffer(out); + } catch (IOException e) { + LOGGER.error("flush data to stream failed!", e); + } + } + + public static class IntRegularEncoder extends RegularDataEncoder { + + private int[] data; + private int[] missingPointData; + private int[] regularData; + private int firstValue; + private int previousValue; + private int minDeltaBase; + private boolean isMissingPoint; + private boolean isLastPack; + private BitSet bitmap; + + public IntRegularEncoder() { + this(BLOCK_DEFAULT_SIZE); + } + + /** + * constructor of RegularDataEncoder. + * + * @param size - the number how many numbers to be packed into a block. + */ + public IntRegularEncoder(int size) { + super(size); + reset(); + } + + @Override + protected void flushBlockBuffer(ByteArrayOutputStream out) throws IOException { + if (writeIndex == -1) { + return; + } + + this.out = out; + // write last pack + if (writeIndex < blockSize) { + isLastPack = true; + checkMissingPoint(out); + } + // write identifier + out.write(BytesUtils.boolToBytes(isMissingPoint)); + // write bitmap if missing points exist + if (isMissingPoint) { + writeBitmap(out); + } + // write header + writeHeaderToBytes(); + + reset(); + writeIndex = -1; + } + + @Override + protected void reset() { + blockSize = BLOCK_DEFAULT_SIZE; + minDeltaBase = Integer.MAX_VALUE; + isMissingPoint = false; + isLastPack = false; Review comment: `isLastPack` is a little confusing. In the current code, `flush` might be invoked by `checkMissingPoint` or `flushBlockBuffer`, decided by `isLastPack`, and `checkMissingPoint` may be called in `flushBlockBuffer`. It's a little nested. Another logic for reference: 1. Remove `nested`. `checkMissingPoint` is only called in `flush -> flushBlockBuffer`. 2. Whenever `writeIndex == blockSize` or `flush` is invoked directly (the case of "last pack"), running into flushBlockBuffer and do `checkMissingPoint`. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
