http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java index 46d6787..83b1ca7 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java @@ -76,13 +76,8 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { throw new UnsupportedOperationException("invalid data type: " + dataType); } - @Override - public void setByteArrayPage(byte[][] byteArray) { - throw new UnsupportedOperationException("invalid data type: " + dataType); - } - /** - * Create a new column page based on the LV (Length Value) encoded bytes + * Create a new column page for decimal page */ static ColumnPage newDecimalColumnPage(byte[] lvEncodedBytes, int scale, int precision) throws MemoryException { @@ -90,7 +85,7 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { DecimalConverterFactory.INSTANCE.getDecimalConverter(precision, scale); int size = decimalConverter.getSize(); if (size < 0) { - return getLVBytesColumnPage(lvEncodedBytes, scale, precision, DataType.DECIMAL); + return getLVBytesColumnPage(lvEncodedBytes, DataType.DECIMAL); } else { // Here the size is always fixed. return getDecimalColumnPage(lvEncodedBytes, scale, precision, size); @@ -100,9 +95,9 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { /** * Create a new column page based on the LV (Length Value) encoded bytes */ - static ColumnPage newVarLengthColumnPage(byte[] lvEncodedBytes, int scale, int precision) + static ColumnPage newLVBytesColumnPage(byte[] lvEncodedBytes) throws MemoryException { - return getLVBytesColumnPage(lvEncodedBytes, scale, precision, DataType.BYTE_ARRAY); + return getLVBytesColumnPage(lvEncodedBytes, DataType.BYTE_ARRAY); } private static ColumnPage getDecimalColumnPage(byte[] lvEncodedBytes, int scale, int precision, @@ -135,8 +130,8 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { return page; } - private static ColumnPage getLVBytesColumnPage(byte[] lvEncodedBytes, int scale, - int precision, DataType dataType) throws MemoryException { + private static ColumnPage getLVBytesColumnPage(byte[] lvEncodedBytes, DataType dataType) + throws MemoryException { // extract length and data, set them to rowOffset and unsafe memory correspondingly int rowId = 0; List<Integer> rowOffset = new ArrayList<>(); @@ -160,9 +155,9 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { VarLengthColumnPageBase page; int inputDataLength = offset; if (unsafe) { - page = new UnsafeVarLengthColumnPage(DECIMAL, numRows, inputDataLength, scale, precision); + page = new UnsafeVarLengthColumnPage(DECIMAL, numRows, inputDataLength, -1, -1); } else { - page = new SafeVarLengthColumnPage(dataType, numRows, scale, precision); + page = new SafeVarLengthColumnPage(dataType, numRows, -1, -1); } // set total length and rowOffset in page @@ -328,7 +323,7 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { } @Override - public void encode(PrimitiveCodec codec) { + public void convertValue(ColumnPageValueConverter codec) { throw new UnsupportedOperationException("invalid data type: " + dataType); } }
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveCompressionCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveCompressionCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveCompressionCodec.java deleted file mode 100644 index 7ae606f..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveCompressionCodec.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import java.io.IOException; - -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.datastore.page.ComplexColumnPage; -import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; -import org.apache.carbondata.core.memory.MemoryException; -import org.apache.carbondata.core.metadata.datatype.DataType; - -/** - * Subclass of this codec depends on statistics of the column page (adaptive) to perform apply - * and decode, it also employs compressor to compress the encoded data - */ -public abstract class AdaptiveCompressionCodec implements ColumnPageCodec { - - // TODO: cache and reuse the same encoder since snappy is thread-safe - - // compressor that can be used by subclass - protected final Compressor compressor; - - // statistics of this page, can be used by subclass - protected final SimpleStatsResult stats; - - // the data type used for storage - protected final DataType targetDataType; - - // the data type specified in schema - protected final DataType srcDataType; - - protected AdaptiveCompressionCodec(DataType srcDataType, DataType targetDataType, - SimpleStatsResult stats, Compressor compressor) { - this.stats = stats; - this.srcDataType = srcDataType; - this.targetDataType = targetDataType; - this.compressor = compressor; - } - - public abstract String getName(); - - public abstract EncodedColumnPage encode(ColumnPage input) throws MemoryException, IOException; - - public abstract ColumnPage decode(byte[] input, int offset, int length) throws MemoryException; - - @Override - public EncodedColumnPage[] encodeComplexColumn(ComplexColumnPage input) { - throw new UnsupportedOperationException("internal error"); - } - - @Override - public String toString() { - return String.format("%s[src type: %s, target type: %s, stats(%s)]", - getClass().getName(), srcDataType, targetDataType, stats); - } - - protected String debugInfo() { - return this.toString(); - } -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveIntegralCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveIntegralCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveIntegralCodec.java deleted file mode 100644 index 6480bcd..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/AdaptiveIntegralCodec.java +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import java.io.IOException; - -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.datastore.page.LazyColumnPage; -import org.apache.carbondata.core.datastore.page.PrimitiveCodec; -import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; -import org.apache.carbondata.core.memory.MemoryException; -import org.apache.carbondata.core.metadata.CodecMetaFactory; -import org.apache.carbondata.core.metadata.datatype.DataType; - -/** - * Codec for integer (byte, short, int, long) data type page. - * This codec will do type casting on page data to make storage minimum. - */ -class AdaptiveIntegralCodec extends AdaptiveCompressionCodec { - - private ColumnPage encodedPage; - - public static ColumnPageCodec newInstance(DataType srcDataType, DataType targetDataType, - SimpleStatsResult stats, Compressor compressor) { - return new AdaptiveIntegralCodec(srcDataType, targetDataType, stats, compressor); - } - - private AdaptiveIntegralCodec(DataType srcDataType, DataType targetDataType, - SimpleStatsResult stats, Compressor compressor) { - super(srcDataType, targetDataType, stats, compressor); - } - - @Override - public String getName() { - return "AdaptiveIntegralCodec"; - } - - @Override - public EncodedColumnPage encode(ColumnPage input) throws MemoryException, IOException { - encodedPage = ColumnPage - .newPage(targetDataType, input.getPageSize(), stats.getScale(), stats.getPrecision()); - input.encode(codec); - byte[] result = encodedPage.compress(compressor); - encodedPage.freeMemory(); - return new EncodedMeasurePage(input.getPageSize(), result, - CodecMetaFactory.createMeta(stats, targetDataType), - ((SimpleStatsResult)input.getStatistics()).getNullBits()); - } - - @Override - public ColumnPage decode(byte[] input, int offset, int length) throws MemoryException { - ColumnPage page = ColumnPage.decompress(compressor, targetDataType, input, offset, length, - stats.getScale(), stats.getPrecision()); - return LazyColumnPage.newPage(page, codec); - } - - // encoded value = (type cast page value to target data type) - private PrimitiveCodec codec = new PrimitiveCodec() { - @Override - public void encode(int rowId, byte value) { - switch (targetDataType) { - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, short value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte) value); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, int value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte) value); - break; - case SHORT: - encodedPage.putShort(rowId, (short) value); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, value); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, long value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte) value); - break; - case SHORT: - encodedPage.putShort(rowId, (short) value); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, (int) value); - break; - case INT: - encodedPage.putInt(rowId, (int) value); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, float value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte) value); - break; - case SHORT: - encodedPage.putShort(rowId, (short) value); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, (int) value); - break; - case INT: - encodedPage.putInt(rowId, (int) value); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, double value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte) value); - break; - case SHORT: - encodedPage.putShort(rowId, (short) value); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, (int) value); - break; - case INT: - encodedPage.putInt(rowId, (int) value); - break; - case LONG: - encodedPage.putLong(rowId, (long) value); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public long decodeLong(byte value) { - return value; - } - - @Override - public long decodeLong(short value) { - return value; - } - - @Override - public long decodeLong(int value) { - return value; - } - - @Override - public double decodeDouble(byte value) { - return value; - } - - @Override - public double decodeDouble(short value) { - return value; - } - - @Override - public double decodeDouble(int value) { - return value; - } - - @Override - public double decodeDouble(long value) { - return value; - } - - @Override - public double decodeDouble(float value) { - throw new RuntimeException("internal error: " + debugInfo()); - } - - @Override - public double decodeDouble(double value) { - throw new RuntimeException("internal error: " + debugInfo()); - } - }; - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageCodec.java index 62c6cd5..40a85d3 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageCodec.java @@ -17,41 +17,26 @@ package org.apache.carbondata.core.datastore.page.encoding; -import java.io.IOException; - -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.datastore.page.ComplexColumnPage; -import org.apache.carbondata.core.memory.MemoryException; +import java.util.Map; /** - * Codec for a column page data, implementation should not keep state across pages, - * caller may use the same object to apply multiple pages. + * Codec for a column page data. */ public interface ColumnPageCodec { - /** - * Codec name will be stored in BlockletHeader (DataChunk3) + * Return the codec name */ String getName(); /** - * encode a column page and return the encoded data - */ - EncodedColumnPage encode(ColumnPage input) throws MemoryException, IOException; - - /** - * encode complex column page and return the coded data - * TODO: remove this interface after complex column page is unified with column page + * Return a new Encoder which will be used to encode one column page. + * This will be called for every column page */ - EncodedColumnPage[] encodeComplexColumn(ComplexColumnPage input); + ColumnPageEncoder createEncoder(Map<String, String> parameter); /** - * decode byte array from offset to a column page - * @param input encoded byte array - * @param offset startoffset of the input to decode - * @param length length of data to decode - * @return decoded data + * Return a new Decoder with specified metadata. + * This will be called for every column page */ - ColumnPage decode(byte[] input, int offset, int length) throws MemoryException, IOException; - + ColumnPageDecoder createDecoder(ColumnPageEncoderMeta meta); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java new file mode 100644 index 0000000..4cdd819 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.encoding; + +import java.io.IOException; + +import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.memory.MemoryException; + +public interface ColumnPageDecoder { + + /** + * Apply decoding algorithm on input byte array and return decoded column page + */ + ColumnPage decode(byte[] input, int offset, int length) throws MemoryException, IOException; + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java new file mode 100644 index 0000000..7a48785 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.encoding; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.carbondata.core.datastore.compression.Compressor; +import org.apache.carbondata.core.datastore.compression.CompressorFactory; +import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.datastore.page.ComplexColumnPage; +import org.apache.carbondata.core.datastore.page.encoding.dimension.legacy.ComplexDimensionIndexCodec; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.util.CarbonMetadataUtil; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.format.BlockletMinMaxIndex; +import org.apache.carbondata.format.DataChunk2; +import org.apache.carbondata.format.Encoding; +import org.apache.carbondata.format.PresenceMeta; + +public abstract class ColumnPageEncoder { + + protected abstract byte[] encodeData(ColumnPage input) throws MemoryException, IOException; + + protected abstract List<Encoding> getEncodingList(); + + protected abstract ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage); + + /** + * Return a encoded column page by encoding the input page + * The encoded binary data and metadata are wrapped in encoding column page + */ + public EncodedColumnPage encode(ColumnPage inputPage) throws IOException, MemoryException { + byte[] encodedBytes = encodeData(inputPage); + DataChunk2 pageMetadata = buildPageMetadata(inputPage, encodedBytes); + return new EncodedColumnPage(pageMetadata, encodedBytes, inputPage.getStatistics()); + } + + private DataChunk2 buildPageMetadata(ColumnPage inputPage, byte[] encodedBytes) + throws IOException { + DataChunk2 dataChunk = new DataChunk2(); + dataChunk.setData_page_length(encodedBytes.length); + fillBasicFields(inputPage, dataChunk); + fillNullBitSet(inputPage, dataChunk); + fillEncoding(inputPage, dataChunk); + fillMinMaxIndex(inputPage, dataChunk); + fillLegacyFields(dataChunk); + return dataChunk; + } + + private void fillBasicFields(ColumnPage inputPage, DataChunk2 dataChunk) { + dataChunk.setChunk_meta(CarbonMetadataUtil.getSnappyChunkCompressionMeta()); + dataChunk.setNumberOfRowsInpage(inputPage.getPageSize()); + dataChunk.setRowMajor(false); + } + + private void fillNullBitSet(ColumnPage inputPage, DataChunk2 dataChunk) { + PresenceMeta presenceMeta = new PresenceMeta(); + presenceMeta.setPresent_bit_streamIsSet(true); + Compressor compressor = CompressorFactory.getInstance().getCompressor(); + presenceMeta.setPresent_bit_stream( + compressor.compressByte(inputPage.getNullBits().toByteArray())); + dataChunk.setPresence(presenceMeta); + } + + private void fillEncoding(ColumnPage inputPage, DataChunk2 dataChunk) throws IOException { + dataChunk.setEncoders(getEncodingList()); + dataChunk.setEncoder_meta(buildEncoderMeta(inputPage)); + } + + private List<ByteBuffer> buildEncoderMeta(ColumnPage inputPage) throws IOException { + ColumnPageEncoderMeta meta = getEncoderMeta(inputPage); + List<ByteBuffer> metaDatas = new ArrayList<>(); + if (meta != null) { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(stream); + meta.write(out); + metaDatas.add(ByteBuffer.wrap(stream.toByteArray())); + } + return metaDatas; + } + + private void fillMinMaxIndex(ColumnPage inputPage, DataChunk2 dataChunk) { + dataChunk.setMin_max(buildMinMaxIndex(inputPage)); + } + + private BlockletMinMaxIndex buildMinMaxIndex(ColumnPage inputPage) { + BlockletMinMaxIndex index = new BlockletMinMaxIndex(); + byte[] bytes = CarbonUtil.getValueAsBytes( + inputPage.getDataType(), inputPage.getStatistics().getMax()); + ByteBuffer max = ByteBuffer.wrap( + bytes); + ByteBuffer min = ByteBuffer.wrap( + CarbonUtil.getValueAsBytes(inputPage.getDataType(), inputPage.getStatistics().getMin())); + index.addToMax_values(max); + index.addToMin_values(min); + return index; + } + + /** + * `buildPageMetadata` will call this for backward compatibility + */ + protected void fillLegacyFields(DataChunk2 dataChunk) + throws IOException { + // Subclass should override this to update datachunk2 if any backward compatibility if required, + // For example, when using IndexStorageCodec, rle_page_length and rowid_page_length need to be + // updated + } + + /** + * Apply encoding algorithm for complex column page and return the coded data + * TODO: remove this interface after complex column page is unified with column page + */ + public static EncodedColumnPage[] encodeComplexColumn(ComplexColumnPage input) + throws IOException, MemoryException { + EncodedColumnPage[] encodedPages = new EncodedColumnPage[input.getDepth()]; + int index = 0; + Iterator<byte[][]> iterator = input.iterator(); + while (iterator.hasNext()) { + byte[][] subColumnPage = iterator.next(); + encodedPages[index++] = encodeChildColumn(subColumnPage); + } + return encodedPages; + } + + private static EncodedColumnPage encodeChildColumn(byte[][] data) + throws IOException, MemoryException { + Compressor compressor = CompressorFactory.getInstance().getCompressor(); + ComplexDimensionIndexCodec codec = new ComplexDimensionIndexCodec(false, false, compressor); + ColumnPageEncoder encoder = codec.createEncoder(null); + return encoder.encode(ColumnPage.wrapByteArrayPage(data)); + } + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java new file mode 100644 index 0000000..d30117e --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.encoding; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; +import org.apache.carbondata.core.metadata.ValueEncoderMeta; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.schema.table.Writable; +import org.apache.carbondata.core.util.DataTypeUtil; + +/** + * It holds metadata for one column page + */ +public class ColumnPageEncoderMeta extends ValueEncoderMeta implements Writable { + + // data type of this column + private DataType dataType; + + private int scale; + private int precision; + + public static final char BYTE_VALUE_MEASURE = 'c'; + public static final char SHORT_VALUE_MEASURE = 'j'; + public static final char INT_VALUE_MEASURE = 'k'; + public static final char BIG_INT_MEASURE = 'd'; + public static final char DOUBLE_MEASURE = 'n'; + public static final char BIG_DECIMAL_MEASURE = 'b'; + public static final char STRING = 's'; + public static final char TIMESTAMP = 't'; + public static final char DATE = 'x'; + public static final char BYTE_ARRAY = 'y'; + + public ColumnPageEncoderMeta() { + } + + public ColumnPageEncoderMeta(DataType dataType, SimpleStatsResult stats) { + if (dataType == null) { + throw new IllegalArgumentException("data type must not be null"); + } + this.dataType = dataType; + setType(convertType(dataType)); + if (stats != null) { + assert (stats.getDataType() == dataType); + setDecimal(stats.getDecimalPoint()); + setMaxValue(stats.getMax()); + setMinValue(stats.getMin()); + this.scale = stats.getScale(); + this.precision = stats.getPrecision(); + } + } + + private char convertType(DataType type) { + switch (type) { + case BYTE: + case SHORT: + case INT: + case LONG: + return CarbonCommonConstants.BIG_INT_MEASURE; + case DOUBLE: + return CarbonCommonConstants.DOUBLE_MEASURE; + case DECIMAL: + return CarbonCommonConstants.BIG_DECIMAL_MEASURE; + case STRING: + return STRING; + case TIMESTAMP: + return TIMESTAMP; + case DATE: + return DATE; + case BYTE_ARRAY: + return BYTE_ARRAY; + default: + throw new RuntimeException("Unexpected type: " + type); + } + } + + public DataType getDataType() { + return dataType; + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeByte(dataType.ordinal()); + out.writeInt(getDecimal()); + out.writeByte(getDataTypeSelected()); + writeMinMax(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + dataType = DataType.valueOf(in.readByte()); + setDecimal(in.readInt()); + setDataTypeSelected(in.readByte()); + readMinMax(in); + } + + private void writeMinMax(DataOutput out) throws IOException { + switch (dataType) { + case BYTE: + out.writeByte((byte) getMaxValue()); + out.writeByte((byte) getMinValue()); + out.writeLong(0L); // unique value is obsoleted, maintain for compatibility + break; + case SHORT: + out.writeShort((short) getMaxValue()); + out.writeShort((short) getMinValue()); + out.writeLong(0L); // unique value is obsoleted, maintain for compatibility + break; + case INT: + out.writeInt((int) getMaxValue()); + out.writeInt((int) getMinValue()); + out.writeLong(0L); // unique value is obsoleted, maintain for compatibility + break; + case LONG: + out.writeLong((Long) getMaxValue()); + out.writeLong((Long) getMinValue()); + out.writeLong(0L); // unique value is obsoleted, maintain for compatibility + break; + case DOUBLE: + out.writeDouble((Double) getMaxValue()); + out.writeDouble((Double) getMinValue()); + out.writeDouble(0d); // unique value is obsoleted, maintain for compatibility + break; + case DECIMAL: + byte[] maxAsBytes = getMaxAsBytes(); + byte[] minAsBytes = getMinAsBytes(); + byte[] unique = DataTypeUtil.bigDecimalToByte(BigDecimal.ZERO); + out.writeShort((short) maxAsBytes.length); + out.write(maxAsBytes); + out.writeShort((short) minAsBytes.length); + out.write(minAsBytes); + // unique value is obsoleted, maintain for compatibility + out.writeShort((short) unique.length); + out.write(unique); + out.writeInt(scale); + out.writeInt(precision); + break; + case BYTE_ARRAY: + // for complex type, it will come here, ignoring stats for complex type + // TODO: support stats for complex type + break; + default: + throw new IllegalArgumentException("invalid data type: " + dataType); + } + } + + private void readMinMax(DataInput in) throws IOException { + switch (dataType) { + case BYTE: + this.setMaxValue(in.readByte()); + this.setMinValue(in.readByte()); + in.readLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case SHORT: + this.setMaxValue(in.readShort()); + this.setMinValue(in.readShort()); + in.readLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case INT: + this.setMaxValue(in.readInt()); + this.setMinValue(in.readInt()); + in.readLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case LONG: + this.setMaxValue(in.readLong()); + this.setMinValue(in.readLong()); + in.readLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case DOUBLE: + this.setMaxValue(in.readDouble()); + this.setMinValue(in.readDouble()); + in.readDouble(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case DECIMAL: + byte[] max = new byte[in.readShort()]; + in.readFully(max); + this.setMaxValue(DataTypeUtil.byteToBigDecimal(max)); + byte[] min = new byte[in.readShort()]; + in.readFully(min); + this.setMinValue(DataTypeUtil.byteToBigDecimal(min)); + // unique value is obsoleted, maintain for compatiability + short uniqueLength = in.readShort(); + in.readFully(new byte[uniqueLength]); + this.scale = in.readInt(); + this.precision = in.readInt(); + break; + case BYTE_ARRAY: + // for complex type, it will come here, ignoring stats for complex type + // TODO: support stats for complex type + break; + default: + throw new IllegalArgumentException("invalid data type: " + dataType); + } + } + + public byte[] getMaxAsBytes() { + return getValueAsBytes(getMaxValue()); + } + + public byte[] getMinAsBytes() { + return getValueAsBytes(getMinValue()); + } + + /** + * convert value to byte array + */ + private byte[] getValueAsBytes(Object value) { + ByteBuffer b; + switch (dataType) { + case BYTE: + b = ByteBuffer.allocate(8); + b.putLong((byte) value); + b.flip(); + return b.array(); + case SHORT: + b = ByteBuffer.allocate(8); + b.putLong((short) value); + b.flip(); + return b.array(); + case INT: + b = ByteBuffer.allocate(8); + b.putLong((int) value); + b.flip(); + return b.array(); + case LONG: + b = ByteBuffer.allocate(8); + b.putLong((long) value); + b.flip(); + return b.array(); + case DOUBLE: + b = ByteBuffer.allocate(8); + b.putDouble((double) value); + b.flip(); + return b.array(); + case DECIMAL: + return DataTypeUtil.bigDecimalToByte((BigDecimal)value); + case STRING: + case TIMESTAMP: + case DATE: + return (byte[]) value; + default: + throw new IllegalArgumentException("Invalid data type: " + dataType); + } + } + + public int getScale() { + return scale; + } + + public int getPrecision() { + return precision; + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ComplexDimensionIndexCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ComplexDimensionIndexCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ComplexDimensionIndexCodec.java deleted file mode 100644 index 12efba4..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ComplexDimensionIndexCodec.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import java.util.Iterator; - -import org.apache.carbondata.core.datastore.DimensionType; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForInt; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort; -import org.apache.carbondata.core.datastore.columnar.IndexStorage; -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.datastore.page.ComplexColumnPage; -import org.apache.carbondata.core.memory.MemoryException; -import org.apache.carbondata.core.metadata.ColumnarFormatVersion; -import org.apache.carbondata.core.util.ByteUtil; - -public class ComplexDimensionIndexCodec extends IndexStorageCodec { - - ComplexDimensionIndexCodec(boolean isSort, boolean isInvertedIndex, Compressor compressor) { - super(isSort, isInvertedIndex, compressor); - } - - @Override - public String getName() { - return "ComplexDimensionIndexCodec"; - } - - @Override - public EncodedColumnPage encode(ColumnPage input) throws MemoryException { - throw new UnsupportedOperationException("internal error"); - } - - @Override - public EncodedColumnPage[] encodeComplexColumn(ComplexColumnPage input) { - EncodedColumnPage[] encodedPages = new EncodedColumnPage[input.getDepth()]; - int index = 0; - Iterator<byte[][]> iterator = input.iterator(); - while (iterator.hasNext()) { - byte[][] data = iterator.next(); - encodedPages[index++] = encodeChildColumn(input.getPageSize(), data); - } - return encodedPages; - } - - private EncodedColumnPage encodeChildColumn(int pageSize, byte[][] data) { - IndexStorage indexStorage; - if (version == ColumnarFormatVersion.V3) { - indexStorage = new BlockIndexerStorageForShort(data, false, false, false); - } else { - indexStorage = new BlockIndexerStorageForInt(data, false, false, false); - } - byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage()); - byte[] compressed = compressor.compressByte(flattened); - return new EncodedDimensionPage(pageSize, compressed, indexStorage, - DimensionType.COMPLEX); - } - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java index b9aac73..8f36de7 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java @@ -20,6 +20,14 @@ package org.apache.carbondata.core.datastore.page.encoding; import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.datastore.compression.CompressorFactory; +import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec; +import org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec; +import org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec; +import org.apache.carbondata.core.datastore.page.encoding.dimension.legacy.ComplexDimensionIndexCodec; +import org.apache.carbondata.core.datastore.page.encoding.dimension.legacy.DictDimensionIndexCodec; +import org.apache.carbondata.core.datastore.page.encoding.dimension.legacy.DirectDictDimensionIndexCodec; +import org.apache.carbondata.core.datastore.page.encoding.dimension.legacy.HighCardDictDimensionIndexCodec; import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; import org.apache.carbondata.core.metadata.datatype.DataType; @@ -28,12 +36,85 @@ import org.apache.carbondata.core.metadata.datatype.DataType; */ public class DefaultEncodingStrategy extends EncodingStrategy { - private static final Compressor compressor = CompressorFactory.getInstance().getCompressor(); - private static final int THREE_BYTES_MAX = (int) Math.pow(2, 23) - 1; private static final int THREE_BYTES_MIN = - THREE_BYTES_MAX - 1; - private DataType fitLongMinMax(long max, long min) { + private static final boolean newWay = false; + + @Override + public ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, ColumnPage inputPage) { + // TODO: add log + if (columnSpec instanceof TableSpec.MeasureSpec) { + return createEncoderForMeasure(inputPage); + } else { + if (newWay) { + return createEncoderForDimension((TableSpec.DimensionSpec) columnSpec, inputPage); + } else { + return createEncoderForDimensionLegacy((TableSpec.DimensionSpec) columnSpec); + } + } + } + + private ColumnPageEncoder createEncoderForDimension(TableSpec.DimensionSpec columnSpec, + ColumnPage inputPage) { + Compressor compressor = CompressorFactory.getInstance().getCompressor(); + switch (columnSpec.getDimensionType()) { + case GLOBAL_DICTIONARY: + case DIRECT_DICTIONARY: + case PLAIN_VALUE: + return new DirectCompressCodec(inputPage.getDataType()).createEncoder(null); + case COMPLEX: + return new ComplexDimensionIndexCodec(false, false, compressor).createEncoder(null); + default: + throw new RuntimeException("unsupported dimension type: " + + columnSpec.getDimensionType()); + } + } + + private ColumnPageEncoder createEncoderForDimensionLegacy(TableSpec.DimensionSpec columnSpec) { + TableSpec.DimensionSpec dimensionSpec = columnSpec; + Compressor compressor = CompressorFactory.getInstance().getCompressor(); + switch (dimensionSpec.getDimensionType()) { + case GLOBAL_DICTIONARY: + return new DictDimensionIndexCodec( + dimensionSpec.isInSortColumns(), + dimensionSpec.isInSortColumns() && dimensionSpec.isDoInvertedIndex(), + compressor).createEncoder(null); + case DIRECT_DICTIONARY: + return new DirectDictDimensionIndexCodec( + dimensionSpec.isInSortColumns(), + dimensionSpec.isInSortColumns() && dimensionSpec.isDoInvertedIndex(), + compressor).createEncoder(null); + case PLAIN_VALUE: + return new HighCardDictDimensionIndexCodec( + dimensionSpec.isInSortColumns(), + dimensionSpec.isInSortColumns() && dimensionSpec.isDoInvertedIndex(), + compressor).createEncoder(null); + default: + throw new RuntimeException("unsupported dimension type: " + + dimensionSpec.getDimensionType()); + } + } + + private ColumnPageEncoder createEncoderForMeasure(ColumnPage columnPage) { + SimpleStatsResult stats = columnPage.getStatistics(); + switch (stats.getDataType()) { + case BYTE: + case SHORT: + case INT: + case LONG: + return selectCodecByAlgorithm(stats).createEncoder(null); + case FLOAT: + case DOUBLE: + case DECIMAL: + case BYTE_ARRAY: + return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null); + default: + throw new RuntimeException("unsupported data type: " + stats.getDataType()); + } + } + + private static DataType fitLongMinMax(long max, long min) { if (max <= Byte.MAX_VALUE && min >= Byte.MIN_VALUE) { return DataType.BYTE; } else if (max <= Short.MAX_VALUE && min >= Short.MIN_VALUE) { @@ -47,7 +128,7 @@ public class DefaultEncodingStrategy extends EncodingStrategy { } } - private DataType fitMinMax(DataType dataType, Object max, Object min) { + private static DataType fitMinMax(DataType dataType, Object max, Object min) { switch (dataType) { case BYTE: return fitLongMinMax((byte) max, (byte) min); @@ -65,7 +146,7 @@ public class DefaultEncodingStrategy extends EncodingStrategy { } // fit the long input value into minimum data type - private DataType fitDelta(DataType dataType, Object max, Object min) { + private static DataType fitDelta(DataType dataType, Object max, Object min) { // use long data type to calculate delta to avoid overflow long value; switch (dataType) { @@ -97,15 +178,15 @@ public class DefaultEncodingStrategy extends EncodingStrategy { } } - // choose between adaptive encoder or delta adaptive encoder, based on whose target data type - // size is smaller - @Override ColumnPageCodec newCodecForIntegralType(SimpleStatsResult stats) { + /** + * choose between adaptive encoder or delta adaptive encoder, based on whose target data type + * size is smaller + */ + static ColumnPageCodec selectCodecByAlgorithm(SimpleStatsResult stats) { DataType srcDataType = stats.getDataType(); DataType adaptiveDataType = fitMinMax(stats.getDataType(), stats.getMax(), stats.getMin()); DataType deltaDataType; - // TODO: this handling is for data compatibility, change to Override check when implementing - // encoding override feature if (adaptiveDataType == DataType.LONG) { deltaDataType = DataType.LONG; } else { @@ -114,58 +195,15 @@ public class DefaultEncodingStrategy extends EncodingStrategy { if (Math.min(adaptiveDataType.getSizeInBytes(), deltaDataType.getSizeInBytes()) == srcDataType.getSizeInBytes()) { // no effect to use adaptive or delta, use compression only - return DirectCompressCodec.newInstance(stats, compressor); + return new DirectCompressCodec(stats.getDataType()); } if (adaptiveDataType.getSizeInBytes() <= deltaDataType.getSizeInBytes()) { // choose adaptive encoding - return AdaptiveIntegralCodec.newInstance( - stats.getDataType(), adaptiveDataType, stats, compressor); + return new AdaptiveIntegralCodec(stats.getDataType(), adaptiveDataType, stats); } else { // choose delta adaptive encoding - return DeltaIntegralCodec.newInstance(stats.getDataType(), deltaDataType, stats, compressor); + return new AdaptiveDeltaIntegralCodec(stats.getDataType(), deltaDataType, stats); } } - @Override - ColumnPageCodec newCodecForFloatingType(SimpleStatsResult stats) { - return DirectCompressCodec.newInstance(stats, compressor); - } - - // for decimal, currently it is a very basic implementation - @Override - ColumnPageCodec newCodecForDecimalType(SimpleStatsResult stats) { - return DirectCompressCodec.newInstance(stats, compressor); - } - - @Override - ColumnPageCodec newCodecForByteArrayType(SimpleStatsResult stats) { - return DirectCompressCodec.newInstance(stats, compressor); - } - - @Override - public ColumnPageCodec newCodec(TableSpec.DimensionSpec dimensionSpec) { - Compressor compressor = CompressorFactory.getInstance().getCompressor(); - switch (dimensionSpec.getDimensionType()) { - case GLOBAL_DICTIONARY: - return new DictDimensionIndexCodec( - dimensionSpec.isInSortColumns(), - dimensionSpec.isInSortColumns() && dimensionSpec.isDoInvertedIndex(), - compressor); - case DIRECT_DICTIONARY: - return new DirectDictDimensionIndexCodec( - dimensionSpec.isInSortColumns(), - dimensionSpec.isInSortColumns() && dimensionSpec.isDoInvertedIndex(), - compressor); - case PLAIN_VALUE: - return new HighCardDictDimensionIndexCodec( - dimensionSpec.isInSortColumns(), - dimensionSpec.isInSortColumns() && dimensionSpec.isDoInvertedIndex(), - compressor); - case COMPLEX: - return new ComplexDimensionIndexCodec(false, false, compressor); - default: - throw new RuntimeException("unsupported dimension type: " + - dimensionSpec.getDimensionType()); - } - } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegralCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegralCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegralCodec.java deleted file mode 100644 index 80545b9..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegralCodec.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import java.io.IOException; - -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.datastore.page.LazyColumnPage; -import org.apache.carbondata.core.datastore.page.PrimitiveCodec; -import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; -import org.apache.carbondata.core.memory.MemoryException; -import org.apache.carbondata.core.metadata.CodecMetaFactory; -import org.apache.carbondata.core.metadata.datatype.DataType; - -/** - * Codec for integer (byte, short, int, long) data type page. - * This codec will calculate delta of page max value and page value, - * and do type casting of the diff to make storage minimum. - */ -public class DeltaIntegralCodec extends AdaptiveCompressionCodec { - - private ColumnPage encodedPage; - - private long max; - - public static DeltaIntegralCodec newInstance(DataType srcDataType, DataType targetDataType, - SimpleStatsResult stats, Compressor compressor) { - return new DeltaIntegralCodec(srcDataType, targetDataType, stats, compressor); - } - - private DeltaIntegralCodec(DataType srcDataType, DataType targetDataType, SimpleStatsResult stats, - Compressor compressor) { - super(srcDataType, targetDataType, stats, compressor); - switch (srcDataType) { - case BYTE: - max = (byte) stats.getMax(); - break; - case SHORT: - max = (short) stats.getMax(); - break; - case INT: - max = (int) stats.getMax(); - break; - case LONG: - max = (long) stats.getMax(); - break; - case FLOAT: - case DOUBLE: - max = (long) ((double) stats.getMax()); - break; - default: - throw new UnsupportedOperationException( - "unsupported data type for Delta compress: " + srcDataType); - } - } - - @Override - public String getName() { - return "DeltaIntegralCodec"; - } - - @Override - public EncodedColumnPage encode(ColumnPage input) throws MemoryException, IOException { - encodedPage = ColumnPage - .newPage(targetDataType, input.getPageSize(), stats.getScale(), stats.getPrecision()); - input.encode(codec); - byte[] result = encodedPage.compress(compressor); - encodedPage.freeMemory(); - return new EncodedMeasurePage(input.getPageSize(), - result, - CodecMetaFactory.createMeta(stats, targetDataType), - ((SimpleStatsResult)input.getStatistics()).getNullBits()); - } - - @Override - public ColumnPage decode(byte[] input, int offset, int length) throws MemoryException { - ColumnPage page = ColumnPage.decompress(compressor, targetDataType, input, offset, length, - stats.getScale(), stats.getPrecision()); - return LazyColumnPage.newPage(page, codec); - } - - // encoded value = (max value of page) - (page value) - private PrimitiveCodec codec = new PrimitiveCodec() { - @Override - public void encode(int rowId, byte value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte)(max - value)); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, short value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte)(max - value)); - break; - case SHORT: - encodedPage.putShort(rowId, (short)(max - value)); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, int value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte)(max - value)); - break; - case SHORT: - encodedPage.putShort(rowId, (short)(max - value)); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, (int)(max - value)); - break; - case INT: - encodedPage.putInt(rowId, (int)(max - value)); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, long value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte)(max - value)); - break; - case SHORT: - encodedPage.putShort(rowId, (short)(max - value)); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, (int)(max - value)); - break; - case INT: - encodedPage.putInt(rowId, (int)(max - value)); - break; - case LONG: - encodedPage.putLong(rowId, max - value); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, float value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte)(max - value)); - break; - case SHORT: - encodedPage.putShort(rowId, (short)(max - value)); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, (int)(max - value)); - break; - case INT: - encodedPage.putInt(rowId, (int)(max - value)); - break; - case LONG: - encodedPage.putLong(rowId, (long)(max - value)); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public void encode(int rowId, double value) { - switch (targetDataType) { - case BYTE: - encodedPage.putByte(rowId, (byte)(max - value)); - break; - case SHORT: - encodedPage.putShort(rowId, (short)(max - value)); - break; - case SHORT_INT: - encodedPage.putShortInt(rowId, (int)(max - value)); - break; - case INT: - encodedPage.putInt(rowId, (int)(max - value)); - break; - case LONG: - encodedPage.putLong(rowId, (long)(max - value)); - break; - default: - throw new RuntimeException("internal error: " + debugInfo()); - } - } - - @Override - public long decodeLong(byte value) { - return max - value; - } - - @Override - public long decodeLong(short value) { - return max - value; - } - - @Override - public long decodeLong(int value) { - return max - value; - } - - @Override - public double decodeDouble(byte value) { - return max - value; - } - - @Override - public double decodeDouble(short value) { - return max - value; - } - - @Override - public double decodeDouble(int value) { - return max - value; - } - - @Override - public double decodeDouble(long value) { - return max - value; - } - - @Override - public double decodeDouble(float value) { - // this codec is for integer type only - throw new RuntimeException("internal error: " + debugInfo()); - } - - @Override - public double decodeDouble(double value) { - // this codec is for integer type only - throw new RuntimeException("internal error: " + debugInfo()); - } - }; -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DictDimensionIndexCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DictDimensionIndexCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DictDimensionIndexCodec.java deleted file mode 100644 index 20b63ba..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DictDimensionIndexCodec.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import org.apache.carbondata.core.datastore.DimensionType; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForInt; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForInt; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort; -import org.apache.carbondata.core.datastore.columnar.IndexStorage; -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.metadata.ColumnarFormatVersion; -import org.apache.carbondata.core.util.ByteUtil; - -public class DictDimensionIndexCodec extends IndexStorageCodec { - - DictDimensionIndexCodec(boolean isSort, boolean isInvertedIndex, Compressor compressor) { - super(isSort, isInvertedIndex, compressor); - } - - @Override - public String getName() { - return "DictDimensionIndexCodec"; - } - - @Override - public EncodedColumnPage encode(ColumnPage input) { - IndexStorage indexStorage; - byte[][] data = input.getByteArrayPage(); - if (isInvertedIndex) { - if (version == ColumnarFormatVersion.V3) { - indexStorage = new BlockIndexerStorageForShort(data, true, false, isSort); - } else { - indexStorage = new BlockIndexerStorageForInt(data, true, false, isSort); - } - } else { - if (version == ColumnarFormatVersion.V3) { - indexStorage = new BlockIndexerStorageForNoInvertedIndexForShort(data, false); - } else { - indexStorage = new BlockIndexerStorageForNoInvertedIndexForInt(data); - } - } - byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage()); - byte[] compressed = compressor.compressByte(flattened); - return new EncodedDimensionPage(input.getPageSize(), compressed, indexStorage, - DimensionType.GLOBAL_DICTIONARY); - } - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectCompressCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectCompressCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectCompressCodec.java deleted file mode 100644 index 664926c..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectCompressCodec.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import java.io.IOException; - -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.datastore.page.ComplexColumnPage; -import org.apache.carbondata.core.datastore.page.LazyColumnPage; -import org.apache.carbondata.core.datastore.page.PrimitiveCodec; -import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; -import org.apache.carbondata.core.memory.MemoryException; -import org.apache.carbondata.core.metadata.CodecMetaFactory; - -/** - * This codec directly apply compression on the input data - */ -public class DirectCompressCodec implements ColumnPageCodec { - - private Compressor compressor; - private SimpleStatsResult stats; - - private DirectCompressCodec(SimpleStatsResult stats, Compressor compressor) { - this.compressor = compressor; - this.stats = stats; - } - - public static DirectCompressCodec newInstance(SimpleStatsResult stats, Compressor compressor) { - return new DirectCompressCodec(stats, compressor); - } - - @Override - public String getName() { - return "DirectCompressCodec"; - } - - @Override - public EncodedColumnPage encode(ColumnPage input) throws IOException, MemoryException { - byte[] result = input.compress(compressor); - return new EncodedMeasurePage(input.getPageSize(), result, - CodecMetaFactory.createMeta(stats, stats.getDataType()), - ((SimpleStatsResult)input.getStatistics()).getNullBits()); - } - - @Override - public EncodedColumnPage[] encodeComplexColumn(ComplexColumnPage input) { - throw new UnsupportedOperationException("internal error"); - } - - @Override - public ColumnPage decode(byte[] input, int offset, int length) throws MemoryException { - ColumnPage page = ColumnPage - .decompress(compressor, stats.getDataType(), input, offset, length, stats.getScale(), - stats.getPrecision()); - return LazyColumnPage.newPage(page, codec); - } - - private PrimitiveCodec codec = new PrimitiveCodec() { - @Override - public void encode(int rowId, byte value) { - } - - @Override - public void encode(int rowId, short value) { - } - - @Override - public void encode(int rowId, int value) { - } - - @Override - public void encode(int rowId, long value) { - } - - @Override - public void encode(int rowId, float value) { - } - - @Override - public void encode(int rowId, double value) { - } - - @Override - public long decodeLong(byte value) { - return value; - } - - @Override - public long decodeLong(short value) { - return value; - } - - @Override - public long decodeLong(int value) { - return value; - } - - @Override - public double decodeDouble(byte value) { - return value; - } - - @Override - public double decodeDouble(short value) { - return value; - } - - @Override - public double decodeDouble(int value) { - return value; - } - - @Override - public double decodeDouble(long value) { - return value; - } - - @Override - public double decodeDouble(float value) { - return value; - } - - @Override - public double decodeDouble(double value) { - return value; - } - }; -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectDictDimensionIndexCodec.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectDictDimensionIndexCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectDictDimensionIndexCodec.java deleted file mode 100644 index d3e5e66..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DirectDictDimensionIndexCodec.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import org.apache.carbondata.core.datastore.DimensionType; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForInt; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForInt; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort; -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort; -import org.apache.carbondata.core.datastore.columnar.IndexStorage; -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.memory.MemoryException; -import org.apache.carbondata.core.metadata.ColumnarFormatVersion; -import org.apache.carbondata.core.util.ByteUtil; - -public class DirectDictDimensionIndexCodec extends IndexStorageCodec { - - DirectDictDimensionIndexCodec(boolean isSort, boolean isInvertedIndex, Compressor compressor) { - super(isSort, isInvertedIndex, compressor); - } - - @Override - public String getName() { - return "DirectDictDimensionIndexCodec"; - } - - @Override - public EncodedColumnPage encode(ColumnPage input) throws MemoryException { - IndexStorage indexStorage; - byte[][] data = input.getByteArrayPage(); - if (isInvertedIndex) { - if (version == ColumnarFormatVersion.V3) { - indexStorage = new BlockIndexerStorageForShort(data, false, false, isSort); - } else { - indexStorage = new BlockIndexerStorageForInt(data, false, false, isSort); - } - } else { - if (version == ColumnarFormatVersion.V3) { - indexStorage = new BlockIndexerStorageForNoInvertedIndexForShort(data, false); - } else { - indexStorage = new BlockIndexerStorageForNoInvertedIndexForInt(data); - } - } - byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage()); - byte[] compressed = compressor.compressByte(flattened); - return new EncodedDimensionPage(input.getPageSize(), compressed, indexStorage, - DimensionType.GLOBAL_DICTIONARY); - } - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedColumnPage.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedColumnPage.java index 1630e06..43d6fc6 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedColumnPage.java @@ -17,62 +17,65 @@ package org.apache.carbondata.core.datastore.page.encoding; -import java.io.IOException; import java.nio.ByteBuffer; -import java.util.BitSet; +import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; +import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.format.DataChunk2; /** - * An column page after encoding and compression. + * An column page after encoding. */ -public abstract class EncodedColumnPage { - - // number of row of this page - protected int pageSize; +public class EncodedColumnPage { // encoded and compressed column page data - protected byte[] encodedData; - - protected BitSet nullBitSet; + protected final byte[] encodedData; // metadata of this page - protected DataChunk2 dataChunk2; - - EncodedColumnPage(int pageSize, byte[] encodedData) { - this.pageSize = pageSize; - this.encodedData = encodedData; - } + private DataChunk2 pageMetadata; - public abstract DataChunk2 buildDataChunk2() throws IOException; + // stats of this page + private SimpleStatsResult stats; /** - * return the encoded and compressed data page + * Constructor + * @param pageMetadata metadata of the encoded page + * @param encodedData encoded data for this page */ - public byte[] getEncodedData() { - return encodedData; + public EncodedColumnPage(DataChunk2 pageMetadata, byte[] encodedData, + SimpleStatsResult stats) { + if (pageMetadata == null) { + throw new IllegalArgumentException("data chunk2 must not be null"); + } + if (encodedData == null) { + throw new IllegalArgumentException("encoded data must not be null"); + } + this.pageMetadata = pageMetadata; + this.encodedData = encodedData; + this.stats = stats; } /** - * return the size of the s + * return the encoded data as ByteBuffer */ - public int getSerializedSize() { - return encodedData.length; - } - - public ByteBuffer serialize() { + public ByteBuffer getEncodedData() { return ByteBuffer.wrap(encodedData); } - public DataChunk2 getDataChunk2() { - return dataChunk2; + public DataChunk2 getPageMetadata() { + return pageMetadata; } - public void setNullBitSet(BitSet nullBitSet) { - this.nullBitSet = nullBitSet; + /** + * Return the total size of serialized data and metadata + */ + public int getTotalSerializedSize() { + int metadataSize = CarbonUtil.getByteArray(pageMetadata).length; + int dataSize = encodedData.length; + return metadataSize + dataSize; } - public BitSet getNullBitSet() { - return nullBitSet; + public SimpleStatsResult getStats() { + return stats; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedDimensionPage.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedDimensionPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedDimensionPage.java deleted file mode 100644 index 30d58cf..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedDimensionPage.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; - -import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.datastore.DimensionType; -import org.apache.carbondata.core.datastore.columnar.IndexStorage; -import org.apache.carbondata.core.datastore.page.statistics.TablePageStatistics; -import org.apache.carbondata.core.util.CarbonMetadataUtil; -import org.apache.carbondata.format.BlockletMinMaxIndex; -import org.apache.carbondata.format.DataChunk2; -import org.apache.carbondata.format.Encoding; -import org.apache.carbondata.format.SortState; - -/** - * Encoded dimension page that include data and inverted index - */ -public class EncodedDimensionPage extends EncodedColumnPage { - private IndexStorage indexStorage; - private DimensionType dimensionType; - - public EncodedDimensionPage(int pageSize, byte[] encodedData, IndexStorage indexStorage, - DimensionType dimensionType) { - super(pageSize, encodedData); - this.indexStorage = indexStorage; - this.dimensionType = dimensionType; - this.dataChunk2 = buildDataChunk2(); - } - - private int getTotalRowIdPageLengthInBytes() { - return CarbonCommonConstants.INT_SIZE_IN_BYTE + - indexStorage.getRowIdPageLengthInBytes() + indexStorage.getRowIdRlePageLengthInBytes(); - } - - @Override - public int getSerializedSize() { - int size = encodedData.length; - if (indexStorage.getRowIdPageLengthInBytes() > 0) { - size += getTotalRowIdPageLengthInBytes(); - } - if (indexStorage.getDataRlePageLengthInBytes() > 0) { - size += indexStorage.getDataRlePageLengthInBytes(); - } - return size; - } - - @Override - public ByteBuffer serialize() { - ByteBuffer buffer = ByteBuffer.allocate(getSerializedSize()); - buffer.put(encodedData); - if (indexStorage.getRowIdPageLengthInBytes() > 0) { - buffer.putInt(indexStorage.getRowIdPageLengthInBytes()); - short[] rowIdPage = (short[])indexStorage.getRowIdPage(); - for (short rowId : rowIdPage) { - buffer.putShort(rowId); - } - if (indexStorage.getRowIdRlePageLengthInBytes() > 0) { - short[] rowIdRlePage = (short[])indexStorage.getRowIdRlePage(); - for (short rowIdRle : rowIdRlePage) { - buffer.putShort(rowIdRle); - } - } - } - if (indexStorage.getDataRlePageLengthInBytes() > 0) { - short[] dataRlePage = (short[])indexStorage.getDataRlePage(); - for (short dataRle : dataRlePage) { - buffer.putShort(dataRle); - } - } - buffer.flip(); - return buffer; - } - - @Override - public DataChunk2 buildDataChunk2() { - DataChunk2 dataChunk = new DataChunk2(); - dataChunk.min_max = new BlockletMinMaxIndex(); - dataChunk.setChunk_meta(CarbonMetadataUtil.getSnappyChunkCompressionMeta()); - dataChunk.setNumberOfRowsInpage(pageSize); - List<Encoding> encodings = new ArrayList<Encoding>(); - dataChunk.setData_page_length(encodedData.length); - if (dimensionType == DimensionType.GLOBAL_DICTIONARY || - dimensionType == DimensionType.DIRECT_DICTIONARY || - dimensionType == DimensionType.COMPLEX) { - encodings.add(Encoding.DICTIONARY); - } - if (dimensionType == DimensionType.DIRECT_DICTIONARY) { - encodings.add(Encoding.DIRECT_DICTIONARY); - } - if (indexStorage.getDataRlePageLengthInBytes() > 0 || - dimensionType == DimensionType.GLOBAL_DICTIONARY) { - dataChunk.setRle_page_length(indexStorage.getDataRlePageLengthInBytes()); - encodings.add(Encoding.RLE); - } - SortState sort = (indexStorage.getRowIdPageLengthInBytes() > 0) ? - SortState.SORT_EXPLICIT : SortState.SORT_NATIVE; - dataChunk.setSort_state(sort); - if (indexStorage.getRowIdPageLengthInBytes() > 0) { - dataChunk.setRowid_page_length(getTotalRowIdPageLengthInBytes()); - encodings.add(Encoding.INVERTED_INDEX); - } - if (dimensionType == DimensionType.PLAIN_VALUE) { - dataChunk.min_max.addToMax_values(ByteBuffer.wrap( - TablePageStatistics.updateMinMaxForNoDictionary(indexStorage.getMax()))); - dataChunk.min_max.addToMin_values(ByteBuffer.wrap( - TablePageStatistics.updateMinMaxForNoDictionary(indexStorage.getMin()))); - } else { - dataChunk.min_max.addToMax_values(ByteBuffer.wrap(indexStorage.getMax())); - dataChunk.min_max.addToMin_values(ByteBuffer.wrap(indexStorage.getMin())); - } - dataChunk.setEncoders(encodings); - return dataChunk; - } - - public IndexStorage getIndexStorage() { - return indexStorage; - } - - public DimensionType getDimensionType() { - return dimensionType; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/e6a4f641/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedMeasurePage.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedMeasurePage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedMeasurePage.java deleted file mode 100644 index 0ef48c6..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodedMeasurePage.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.encoding; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.List; - -import org.apache.carbondata.core.datastore.compression.Compressor; -import org.apache.carbondata.core.datastore.compression.CompressorFactory; -import org.apache.carbondata.core.metadata.ColumnPageCodecMeta; -import org.apache.carbondata.core.metadata.ValueEncoderMeta; -import org.apache.carbondata.core.util.CarbonMetadataUtil; -import org.apache.carbondata.core.util.CarbonUtil; -import org.apache.carbondata.format.BlockletMinMaxIndex; -import org.apache.carbondata.format.DataChunk2; -import org.apache.carbondata.format.Encoding; -import org.apache.carbondata.format.PresenceMeta; - -/** - * Encoded measure page that include data and statistics - */ -public class EncodedMeasurePage extends EncodedColumnPage { - - private ValueEncoderMeta metaData; - - public EncodedMeasurePage(int pageSize, byte[] encodedData, ValueEncoderMeta metaData, - BitSet nullBitSet) throws IOException { - super(pageSize, encodedData); - this.metaData = metaData; - this.nullBitSet = nullBitSet; - this.dataChunk2 = buildDataChunk2(); - } - - @Override - public DataChunk2 buildDataChunk2() throws IOException { - DataChunk2 dataChunk = new DataChunk2(); - dataChunk.min_max = new BlockletMinMaxIndex(); - dataChunk.setChunk_meta(CarbonMetadataUtil.getSnappyChunkCompressionMeta()); - dataChunk.setNumberOfRowsInpage(pageSize); - dataChunk.setData_page_length(encodedData.length); - dataChunk.setRowMajor(false); - // TODO : Change as per this encoders. - List<Encoding> encodings = new ArrayList<Encoding>(); - encodings.add(Encoding.DELTA); - dataChunk.setEncoders(encodings); - PresenceMeta presenceMeta = new PresenceMeta(); - presenceMeta.setPresent_bit_streamIsSet(true); - Compressor compressor = CompressorFactory.getInstance().getCompressor(); - presenceMeta.setPresent_bit_stream(compressor.compressByte(nullBitSet.toByteArray())); - dataChunk.setPresence(presenceMeta); - List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>(); - if (metaData instanceof ColumnPageCodecMeta) { - ColumnPageCodecMeta meta = (ColumnPageCodecMeta) metaData; - encoderMetaList.add(ByteBuffer.wrap(meta.serialize())); - dataChunk.min_max.addToMax_values(ByteBuffer.wrap(meta.getMaxAsBytes())); - dataChunk.min_max.addToMin_values(ByteBuffer.wrap(meta.getMinAsBytes())); - } else { - encoderMetaList.add(ByteBuffer.wrap(CarbonUtil.serializeEncodeMetaUsingByteBuffer(metaData))); - dataChunk.min_max.addToMax_values(ByteBuffer.wrap(CarbonUtil.getMaxValueAsBytes(metaData))); - dataChunk.min_max.addToMin_values(ByteBuffer.wrap(CarbonUtil.getMinValueAsBytes(metaData))); - } - dataChunk.setEncoder_meta(encoderMetaList); - return dataChunk; - } - - public ValueEncoderMeta getMetaData() { - return metaData; - } -} \ No newline at end of file
