http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/MeasurePageStatsVO.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/MeasurePageStatsVO.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/MeasurePageStatsVO.java deleted file mode 100644 index 865ae04..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/MeasurePageStatsVO.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.core.datastore.page.statistics; - -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.metadata.ValueEncoderMeta; -import org.apache.carbondata.core.metadata.datatype.DataType; - -public class MeasurePageStatsVO { - // statistics of each measure column - private Object[] min, max, nonExistValue; - private int[] decimal; - - private DataType[] dataType; - private byte[] selectedDataType; - - private MeasurePageStatsVO() { - } - - public MeasurePageStatsVO(ColumnPage[] measurePages) { - min = new Object[measurePages.length]; - max = new Object[measurePages.length]; - nonExistValue = new Object[measurePages.length]; - decimal = new int[measurePages.length]; - dataType = new DataType[measurePages.length]; - selectedDataType = new byte[measurePages.length]; - for (int i = 0; i < measurePages.length; i++) { - ColumnPageStatsVO stats = measurePages[i].getStatistics(); - min[i] = stats.getMin(); - max[i] = stats.getMax(); - nonExistValue[i] = stats.nonExistValue(); - decimal[i] = stats.getDecimal(); - dataType[i] = measurePages[i].getDataType(); - } - } - - public static MeasurePageStatsVO build(ValueEncoderMeta[] encoderMetas) { - Object[] max = new Object[encoderMetas.length]; - Object[] min = new Object[encoderMetas.length]; - int[] decimal = new int[encoderMetas.length]; - Object[] nonExistValue = new Object[encoderMetas.length]; - DataType[] dataType = new DataType[encoderMetas.length]; - byte[] selectedDataType = new byte[encoderMetas.length]; - for (int i = 0; i < encoderMetas.length; i++) { - max[i] = encoderMetas[i].getMaxValue(); - min[i] = encoderMetas[i].getMinValue(); - decimal[i] = encoderMetas[i].getDecimal(); - nonExistValue[i] = encoderMetas[i].getUniqueValue(); - dataType[i] = encoderMetas[i].getType(); - selectedDataType[i] = encoderMetas[i].getDataTypeSelected(); - } - - MeasurePageStatsVO stats = new MeasurePageStatsVO(); - stats.dataType = dataType; - stats.selectedDataType = selectedDataType; - stats.min = min; - stats.max = max; - stats.nonExistValue = nonExistValue; - stats.decimal = decimal; - return stats; - } - - public DataType getDataType(int measureIndex) { - return dataType[measureIndex]; - } - - public Object getMin(int measureIndex) { - return min[measureIndex]; - } - - public Object getMax(int measureIndex) { - return max[measureIndex]; - } - - public int getDecimal(int measureIndex) { - return decimal[measureIndex]; - } - - public Object getNonExistValue(int measureIndex) { - return nonExistValue[measureIndex]; - } - - public byte getDataTypeSelected(int measureIndex) { - return selectedDataType[measureIndex]; - } - - -}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java new file mode 100644 index 0000000..73ada4b --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.statistics; + +import java.math.BigDecimal; +import java.util.BitSet; + +import org.apache.carbondata.core.metadata.ColumnPageCodecMeta; +import org.apache.carbondata.core.metadata.ValueEncoderMeta; +import org.apache.carbondata.core.metadata.datatype.DataType; + +/** statics for primitive column page */ +public class PrimitivePageStatsCollector implements ColumnPageStatsCollector, SimpleStatsResult { + private DataType dataType; + private byte minByte, maxByte; + private short minShort, maxShort; + private int minInt, maxInt; + private long minLong, maxLong; + private double minDouble, maxDouble; + + // scale of the double value + private int decimal; + + // The index of the rowId whose value is null, will be set to 1 + private BitSet nullBitSet; + + // this is for encode flow + public static PrimitivePageStatsCollector newInstance(DataType dataType, int pageSize) { + switch (dataType) { + default: + return new PrimitivePageStatsCollector(dataType, pageSize); + } + } + + // this is for decode flow, we do not need to create nullBits, so passing 0 as pageSize + public static PrimitivePageStatsCollector newInstance(ColumnPageCodecMeta meta) { + PrimitivePageStatsCollector instance = + new PrimitivePageStatsCollector(meta.getSrcDataType(), 0); + // set min max from meta + switch (meta.getSrcDataType()) { + case BYTE: + instance.minByte = (byte) meta.getMinValue(); + instance.maxByte = (byte) meta.getMaxValue(); + break; + case SHORT: + instance.minShort = (short) meta.getMinValue(); + instance.maxShort = (short) meta.getMaxValue(); + break; + case INT: + instance.minInt = (int) meta.getMinValue(); + instance.maxInt = (int) meta.getMaxValue(); + break; + case LONG: + instance.minLong = (long) meta.getMinValue(); + instance.maxLong = (long) meta.getMaxValue(); + break; + case DOUBLE: + instance.minDouble = (double) meta.getMinValue(); + instance.maxDouble = (double) meta.getMaxValue(); + instance.decimal = meta.getDecimal(); + break; + } + return instance; + } + + public static PrimitivePageStatsCollector newInstance(ValueEncoderMeta meta) { + PrimitivePageStatsCollector instance = + new PrimitivePageStatsCollector(meta.getType(), 0); + // set min max from meta + switch (meta.getType()) { + case BYTE: + instance.minByte = (byte) meta.getMinValue(); + instance.maxByte = (byte) meta.getMaxValue(); + break; + case SHORT: + instance.minShort = (short) meta.getMinValue(); + instance.maxShort = (short) meta.getMaxValue(); + break; + case INT: + instance.minInt = (int) meta.getMinValue(); + instance.maxInt = (int) meta.getMaxValue(); + break; + case LONG: + instance.minLong = (long) meta.getMinValue(); + instance.maxLong = (long) meta.getMaxValue(); + break; + case DOUBLE: + instance.minDouble = (double) meta.getMinValue(); + instance.maxDouble = (double) meta.getMaxValue(); + instance.decimal = meta.getDecimal(); + break; + } + return instance; + } + + private PrimitivePageStatsCollector(DataType dataType, int pageSize) { + this.dataType = dataType; + this.nullBitSet = new BitSet(pageSize); + switch (dataType) { + case BYTE: + minByte = Byte.MAX_VALUE; + maxByte = Byte.MIN_VALUE; + break; + case SHORT: + minShort = Short.MAX_VALUE; + maxShort = Short.MIN_VALUE; + break; + case INT: + minInt = Integer.MAX_VALUE; + maxInt = Integer.MIN_VALUE; + break; + case LONG: + minLong = Long.MAX_VALUE; + maxLong = Long.MIN_VALUE; + break; + case DOUBLE: + minDouble = Double.MAX_VALUE; + maxDouble = Double.MIN_VALUE; + decimal = 0; + break; + case DECIMAL: + } + } + + @Override + public void updateNull(int rowId) { + nullBitSet.set(rowId); + long value = 0; + switch (dataType) { + case BYTE: + update((byte) value); + break; + case SHORT: + update((short) value); + break; + case INT: + update((int) value); + break; + case LONG: + update(value); + break; + case DOUBLE: + update(0d); + break; + } + } + + @Override + public void update(byte value) { + if (minByte > value) { + minByte = value; + } + if (maxByte < value) { + maxByte = value; + } + } + + @Override + public void update(short value) { + if (minShort > value) { + minShort = value; + } + if (maxShort < value) { + maxShort = value; + } + } + + @Override + public void update(int value) { + if (minInt > value) { + minInt = value; + } + if (maxInt < value) { + maxInt = value; + } + } + + @Override + public void update(long value) { + if (minLong > value) { + minLong = value; + } + if (maxLong < value) { + maxLong = value; + } + } + + @Override + public void update(double value) { + if (minDouble > value) { + minDouble = value; + } + if (maxDouble < value) { + maxDouble = value; + } + int scale = BigDecimal.valueOf(value).scale(); + if (scale < 0) { + decimal = scale; + } else { + decimal = Math.max(decimal, scale); + } + } + + @Override + public void update(byte[] value) { + } + + @Override + public Object getPageStats() { + return this; + } + + @Override + public String toString() { + switch (dataType) { + case BYTE: + return String.format("min: %s, max: %s, decimal: %s ", minByte, maxByte, decimal); + case SHORT: + return String.format("min: %s, max: %s, decimal: %s ", minShort, maxShort, decimal); + case INT: + return String.format("min: %s, max: %s, decimal: %s ", minInt, maxInt, decimal); + case LONG: + return String.format("min: %s, max: %s, decimal: %s ", minLong, maxLong, decimal); + case DOUBLE: + return String.format("min: %s, max: %s, decimal: %s ", minDouble, maxDouble, decimal); + } + return super.toString(); + } + + @Override + public Object getMin() { + switch (dataType) { + case BYTE: + return minByte; + case SHORT: + return minShort; + case INT: + return minInt; + case LONG: + return minLong; + case DOUBLE: + return minDouble; + } + return null; + } + + @Override + public Object getMax() { + switch (dataType) { + case BYTE: + return maxByte; + case SHORT: + return maxShort; + case INT: + return maxInt; + case LONG: + return maxLong; + case DOUBLE: + return maxDouble; + } + return null; + } + + @Override + public BitSet getNullBits() { + return nullBitSet; + } + + @Override + public int getDecimalPoint() { + return decimal; + } + + @Override + public DataType getDataType() { + return dataType; + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/SimpleStatsResult.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/SimpleStatsResult.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/SimpleStatsResult.java new file mode 100644 index 0000000..1db86ff --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/SimpleStatsResult.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.statistics; + +import java.util.BitSet; + +import org.apache.carbondata.core.metadata.datatype.DataType; + +public interface SimpleStatsResult { + + Object getMin(); + + Object getMax(); + + BitSet getNullBits(); + + int getDecimalPoint(); + + DataType getDataType(); +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/TablePageStatistics.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/TablePageStatistics.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/TablePageStatistics.java new file mode 100644 index 0000000..07de9c0 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/TablePageStatistics.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.statistics; + +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.datastore.columnar.IndexStorage; +import org.apache.carbondata.core.datastore.page.encoding.EncodedDimensionPage; +import org.apache.carbondata.core.datastore.page.encoding.EncodedMeasurePage; +import org.apache.carbondata.core.metadata.ColumnPageCodecMeta; +import org.apache.carbondata.core.metadata.ValueEncoderMeta; +import org.apache.carbondata.core.util.CarbonUtil; + +// Statistics of dimension and measure column in a TablePage +public class TablePageStatistics { + + // number of dimension after complex column expanded + private int numDimensionsExpanded; + + // min of each dimension column + private byte[][] dimensionMinValue; + + // max of each dimension column + private byte[][] dimensionMaxValue; + + // min of each measure column + private byte[][] measureMinValue; + + // max os each measure column + private byte[][] measureMaxValue; + + // null bit set for each measure column + private BitSet[] nullBitSet; + + public TablePageStatistics(EncodedDimensionPage[] dimensions, + EncodedMeasurePage[] measures) { + this.numDimensionsExpanded = dimensions.length; + int numMeasures = measures.length; + this.dimensionMinValue = new byte[numDimensionsExpanded][]; + this.dimensionMaxValue = new byte[numDimensionsExpanded][]; + this.measureMinValue = new byte[numMeasures][]; + this.measureMaxValue = new byte[numMeasures][]; + this.nullBitSet = new BitSet[numMeasures]; + updateDimensionMinMax(dimensions); + updateMeasureMinMax(measures); + } + + private void updateDimensionMinMax(EncodedDimensionPage[] dimensions) { + for (int i = 0; i < dimensions.length; i++) { + IndexStorage keyStorageArray = dimensions[i].getIndexStorage(); + switch (dimensions[i].getDimensionType()) { + case GLOBAL_DICTIONARY: + case DIRECT_DICTIONARY: + case COLUMN_GROUP: + case COMPLEX: + dimensionMinValue[i] = keyStorageArray.getMin(); + dimensionMaxValue[i] = keyStorageArray.getMax(); + break; + case PLAIN_VALUE: + dimensionMinValue[i] = updateMinMaxForNoDictionary(keyStorageArray.getMin()); + dimensionMaxValue[i] = updateMinMaxForNoDictionary(keyStorageArray.getMax()); + break; + } + } + } + + private void updateMeasureMinMax(EncodedMeasurePage[] measures) { + for (int i = 0; i < measures.length; i++) { + ValueEncoderMeta meta = measures[i].getMetaData(); + if (meta instanceof ColumnPageCodecMeta) { + ColumnPageCodecMeta metadata = (ColumnPageCodecMeta) meta; + measureMaxValue[i] = metadata.getMaxAsBytes(); + measureMinValue[i] = metadata.getMinAsBytes(); + } else { + measureMaxValue[i] = CarbonUtil.getMaxValueAsBytes(meta); + measureMinValue[i] = CarbonUtil.getMinValueAsBytes(meta); + } + nullBitSet[i] = measures[i].getNullBitSet(); + } + } + + /** + * Below method will be used to update the min or max value + * by removing the length from it + * + * @return min max value without length + */ + public static byte[] updateMinMaxForNoDictionary(byte[] valueWithLength) { + ByteBuffer buffer = ByteBuffer.wrap(valueWithLength); + byte[] actualValue = new byte[buffer.getShort()]; + buffer.get(actualValue); + return actualValue; + } + + public byte[][] getDimensionMinValue() { + return dimensionMinValue; + } + + public byte[][] getDimensionMaxValue() { + return dimensionMaxValue; + } + + public byte[][] getMeasureMinValue() { + return measureMinValue; + } + + public byte[][] getMeasureMaxValue() { + return measureMaxValue; + } + + public BitSet[] getNullBitSet() { + return nullBitSet; + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/VarLengthPageStatsCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/VarLengthPageStatsCollector.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/VarLengthPageStatsCollector.java new file mode 100644 index 0000000..e985f90 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/VarLengthPageStatsCollector.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.statistics; + +import java.util.BitSet; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.util.ByteUtil; + +public class VarLengthPageStatsCollector implements ColumnPageStatsCollector { + + private byte[] min, max; + + public static VarLengthPageStatsCollector newInstance() { + return new VarLengthPageStatsCollector(); + } + + private VarLengthPageStatsCollector() { + } + + @Override + public void updateNull(int rowId) { + + } + + @Override + public void update(byte value) { + + } + + @Override + public void update(short value) { + + } + + @Override + public void update(int value) { + + } + + @Override + public void update(long value) { + + } + + @Override + public void update(double value) { + + } + + @Override + public void update(byte[] value) { + if (min == null && max == null) { + min = value; + max = value; + } else { + if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(min, value) > 0) { + min = value; + } + if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(max, value) < 0) { + max = value; + } + } + } + + @Override + public Object getPageStats() { + // for binary type, we do not collect its stats + return new SimpleStatsResult() { + + @Override public Object getMin() { + return min; + } + + @Override public Object getMax() { + return max; + } + + @Override public BitSet getNullBits() { + return null; + } + + @Override public int getDecimalPoint() { + return 0; + } + + @Override public DataType getDataType() { + return null; + } + }; + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/metadata/BlockletInfoColumnar.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/BlockletInfoColumnar.java b/core/src/main/java/org/apache/carbondata/core/metadata/BlockletInfoColumnar.java index 47df6a5..da8a33d 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/BlockletInfoColumnar.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/BlockletInfoColumnar.java @@ -19,10 +19,13 @@ package org.apache.carbondata.core.metadata; import java.util.BitSet; -import org.apache.carbondata.core.datastore.page.statistics.MeasurePageStatsVO; +import org.apache.carbondata.core.datastore.page.EncodedTablePage; +// It is used for V1 and V2 format only public class BlockletInfoColumnar { + private EncodedTablePage encodedTablePage; + /** * measureOffset. */ @@ -85,8 +88,6 @@ public class BlockletInfoColumnar { private boolean[] aggKeyBlock; - private MeasurePageStatsVO stats; - /** * column min array */ @@ -98,11 +99,6 @@ public class BlockletInfoColumnar { private byte[][] columnMinData; /** - * true if given index is colgroup block - */ - private boolean[] colGrpBlock; - - /** * bit set which will holds the measure * indexes which are null */ @@ -317,20 +313,6 @@ public class BlockletInfoColumnar { } /** - * @return - */ - public boolean[] getColGrpBlocks() { - return this.colGrpBlock; - } - - /** - * @param colGrpBlock - */ - public void setColGrpBlocks(boolean[] colGrpBlock) { - this.colGrpBlock = colGrpBlock; - } - - /** * @return the measureNullValueIndex */ public BitSet[] getMeasureNullValueIndex() { @@ -344,11 +326,11 @@ public class BlockletInfoColumnar { this.measureNullValueIndex = measureNullValueIndex; } - public MeasurePageStatsVO getStats() { - return stats; + public void setEncodedTablePage(EncodedTablePage encodedData) { + this.encodedTablePage = encodedData; } - public void setStats(MeasurePageStatsVO stats) { - this.stats = stats; + public EncodedTablePage getEncodedTablePage() { + return encodedTablePage; } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/metadata/CodecMetaFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/CodecMetaFactory.java b/core/src/main/java/org/apache/carbondata/core/metadata/CodecMetaFactory.java new file mode 100644 index 0000000..ac83333 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/metadata/CodecMetaFactory.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.metadata; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.util.CarbonProperties; + +import static org.apache.carbondata.core.metadata.datatype.DataType.*; +import static org.apache.carbondata.core.metadata.datatype.DataType.LONG; + +public class CodecMetaFactory { + + private static final ColumnarFormatVersion version = + CarbonProperties.getInstance().getFormatVersion(); + + public static ValueEncoderMeta createMeta() { + switch (version) { + case V1: + case V2: + return new ValueEncoderMeta(); + case V3: + return ColumnPageCodecMeta.newInstance(); + default: + throw new UnsupportedOperationException("unsupported version: " + version); + } + } + + public static ValueEncoderMeta createMeta(SimpleStatsResult stats, DataType targetDataType) { + switch (version) { + case V1: + case V2: + ValueEncoderMeta meta = new ValueEncoderMeta(); + switch (stats.getDataType()) { + case SHORT: + meta.setMaxValue((long)(short) stats.getMax()); + meta.setMinValue((long)(short) stats.getMin()); + break; + case INT: + meta.setMaxValue((long)(int) stats.getMax()); + meta.setMinValue((long)(int) stats.getMin()); + break; + default: + meta.setMaxValue(stats.getMax()); + meta.setMinValue(stats.getMin()); + break; + } + meta.setDecimal(stats.getDecimalPoint()); + meta.setType(converType(stats.getDataType())); + return meta; + case V3: + return ColumnPageCodecMeta.newInstance(stats, targetDataType); + default: + throw new UnsupportedOperationException("unsupported version: " + version); + } + } + + public static char converType(DataType type) { + switch (type) { + case BYTE: + case SHORT: + case INT: + case LONG: + return CarbonCommonConstants.BIG_INT_MEASURE; + case DOUBLE: + return CarbonCommonConstants.DOUBLE_MEASURE; + case DECIMAL: + return CarbonCommonConstants.BIG_DECIMAL_MEASURE; + default: + throw new RuntimeException("Unexpected type: " + type); + } + } + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/metadata/ColumnPageCodecMeta.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/ColumnPageCodecMeta.java b/core/src/main/java/org/apache/carbondata/core/metadata/ColumnPageCodecMeta.java new file mode 100644 index 0000000..20a7568 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/metadata/ColumnPageCodecMeta.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.metadata; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; +import org.apache.carbondata.core.metadata.datatype.DataType; + +/** + * It holds metadata for one column page + */ +public class ColumnPageCodecMeta extends ValueEncoderMeta implements Serializable { + + private BitSet nullBitSet; + + private DataType srcDataType; + + private DataType targetDataType; + + public static final char BYTE_VALUE_MEASURE = 'c'; + public static final char SHORT_VALUE_MEASURE = 'j'; + public static final char INT_VALUE_MEASURE = 'k'; + public static final char BIG_INT_MEASURE = 'd'; + public static final char DOUBLE_MEASURE = 'n'; + public static final char BIG_DECIMAL_MEASURE = 'b'; + + static ColumnPageCodecMeta newInstance() { + return new ColumnPageCodecMeta(); + } + + static ColumnPageCodecMeta newInstance( + SimpleStatsResult stats, DataType targetDataType) { + ColumnPageCodecMeta meta = new ColumnPageCodecMeta(); + meta.srcDataType = stats.getDataType(); + meta.targetDataType = targetDataType; + meta.nullBitSet = stats.getNullBits(); + meta.setType(CodecMetaFactory.converType(stats.getDataType())); + meta.setMaxValue(stats.getMax()); + meta.setMinValue(stats.getMin()); + meta.setDecimal(stats.getDecimalPoint()); + return meta; + } + + public DataType getTargetDataType() { + return targetDataType; + } + + public void setSrcDataType(char type) { + switch (type) { + case BYTE_VALUE_MEASURE: + srcDataType = DataType.BYTE; + break; + case SHORT_VALUE_MEASURE: + srcDataType = DataType.SHORT; + break; + case INT_VALUE_MEASURE: + srcDataType = DataType.INT; + break; + case BIG_INT_MEASURE: + srcDataType = DataType.LONG; + break; + case DOUBLE_MEASURE: + srcDataType = DataType.DOUBLE; + break; + case BIG_DECIMAL_MEASURE: + srcDataType = DataType.DECIMAL; + break; + default: + throw new RuntimeException("Unexpected type: " + type); + } + } + + private char getSrcDataTypeInChar() { + switch (srcDataType) { + case BYTE: + return BYTE_VALUE_MEASURE; + case SHORT: + return SHORT_VALUE_MEASURE; + case INT: + return INT_VALUE_MEASURE; + case LONG: + return BIG_INT_MEASURE; + case DOUBLE: + return DOUBLE_MEASURE; + case DECIMAL: + return BIG_DECIMAL_MEASURE; + default: + throw new RuntimeException("Unexpected type: " + targetDataType); + } + } + + public BitSet getNullBitSet() { + return nullBitSet; + } + + public void setNullBitSet(BitSet nullBitSet) { + this.nullBitSet = nullBitSet; + } + + public DataType getSrcDataType() { + return srcDataType; + } + + public byte[] serialize() { + ByteBuffer buffer = null; + switch (srcDataType) { + case BYTE: + buffer = ByteBuffer.allocate( + (CarbonCommonConstants.LONG_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE + + 3); + buffer.putChar(getSrcDataTypeInChar()); + buffer.put((byte) getMaxValue()); + buffer.put((byte) getMinValue()); + buffer.putLong((Long) 0L); // unique value is obsoleted, maintain for compatibility + break; + case SHORT: + buffer = ByteBuffer.allocate( + (CarbonCommonConstants.LONG_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE + + 3); + buffer.putChar(getSrcDataTypeInChar()); + buffer.putShort((short) getMaxValue()); + buffer.putShort((short) getMinValue()); + buffer.putLong((Long) 0L); // unique value is obsoleted, maintain for compatibility + break; + case INT: + buffer = ByteBuffer.allocate( + (CarbonCommonConstants.LONG_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE + + 3); + buffer.putChar(getSrcDataTypeInChar()); + buffer.putInt((int) getMaxValue()); + buffer.putInt((int) getMinValue()); + buffer.putLong((Long) 0L); // unique value is obsoleted, maintain for compatibility + break; + case LONG: + buffer = ByteBuffer.allocate( + (CarbonCommonConstants.LONG_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE + + 3); + buffer.putChar(getSrcDataTypeInChar()); + buffer.putLong((Long) getMaxValue()); + buffer.putLong((Long) getMinValue()); + buffer.putLong((Long) 0L); // unique value is obsoleted, maintain for compatibility + break; + case DOUBLE: + buffer = ByteBuffer.allocate( + (CarbonCommonConstants.DOUBLE_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE + + 3); + buffer.putChar(getSrcDataTypeInChar()); + buffer.putDouble((Double) getMaxValue()); + buffer.putDouble((Double) getMinValue()); + buffer.putDouble((Double) 0d); // unique value is obsoleted, maintain for compatibility + break; + case DECIMAL: + buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + 3); + buffer.putChar(getSrcDataTypeInChar()); + break; + } + buffer.putInt(getDecimal()); + buffer.put(getDataTypeSelected()); + buffer.flip(); + return buffer.array(); + } + + public void deserialize(byte[] encodeMeta) { + ByteBuffer buffer = ByteBuffer.wrap(encodeMeta); + char srcDataType = buffer.getChar(); + this.setSrcDataType(srcDataType); + switch (srcDataType) { + case DOUBLE_MEASURE: + this.setMaxValue(buffer.getDouble()); + this.setMinValue(buffer.getDouble()); + buffer.getDouble(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case BIG_DECIMAL_MEASURE: + this.setMaxValue(0.0); + this.setMinValue(0.0); + break; + case BYTE_VALUE_MEASURE: + this.setMaxValue(buffer.get()); + this.setMinValue(buffer.get()); + buffer.getLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case SHORT_VALUE_MEASURE: + this.setMaxValue(buffer.getShort()); + this.setMinValue(buffer.getShort()); + buffer.getLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case INT_VALUE_MEASURE: + this.setMaxValue(buffer.getInt()); + this.setMinValue(buffer.getInt()); + buffer.getLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + case BIG_INT_MEASURE: + this.setMaxValue(buffer.getLong()); + this.setMinValue(buffer.getLong()); + buffer.getLong(); // for non exist value which is obsoleted, it is backward compatibility; + break; + default: + throw new IllegalArgumentException("invalid measure type"); + } + this.setDecimal(buffer.getInt()); + buffer.get(); // for selectedDataType, obsoleted + } + + public byte[] getMaxAsBytes() { + return getValueAsBytes(getMaxValue()); + } + + public byte[] getMinAsBytes() { + return getValueAsBytes(getMinValue()); + } + + /** + * convert value to byte array + */ + private byte[] getValueAsBytes(Object value) { + ByteBuffer b; + switch (srcDataType) { + case BYTE: + b = ByteBuffer.allocate(8); + b.putLong((byte) value); + b.flip(); + return b.array(); + case SHORT: + b = ByteBuffer.allocate(8); + b.putLong((short) value); + b.flip(); + return b.array(); + case INT: + b = ByteBuffer.allocate(8); + b.putLong((int) value); + b.flip(); + return b.array(); + case LONG: + b = ByteBuffer.allocate(8); + b.putLong((long) value); + b.flip(); + return b.array(); + case DOUBLE: + b = ByteBuffer.allocate(8); + b.putDouble((double) value); + b.flip(); + return b.array(); + case DECIMAL: + case BYTE_ARRAY: + return new byte[8]; + default: + throw new IllegalArgumentException("Invalid data type: " + targetDataType); + } + } + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/metadata/ValueEncoderMeta.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/ValueEncoderMeta.java b/core/src/main/java/org/apache/carbondata/core/metadata/ValueEncoderMeta.java index 741b999..971359d 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/ValueEncoderMeta.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/ValueEncoderMeta.java @@ -109,4 +109,4 @@ public class ValueEncoderMeta implements Serializable { public void setDataTypeSelected(byte dataTypeSelected) { this.dataTypeSelected = dataTypeSelected; } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java index ad17240..4dadcc2 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java @@ -27,7 +27,6 @@ import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; import org.apache.carbondata.core.scan.collector.ScannedResultCollector; import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo; import org.apache.carbondata.core.scan.executor.infos.DimensionInfo; -import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; import org.apache.carbondata.core.scan.executor.infos.MeasureInfo; import org.apache.carbondata.core.scan.model.QueryMeasure; import org.apache.carbondata.core.scan.result.AbstractScannedResult; @@ -42,11 +41,6 @@ public abstract class AbstractScannedResultCollector implements ScannedResultCol LogServiceFactory.getLogService(AbstractScannedResultCollector.class.getName()); /** - * restructuring info - */ - private KeyStructureInfo restructureInfos; - - /** * table block execution infos */ protected BlockExecutionInfo tableBlockExecutionInfos; http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java index e89ce12..db5010f 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java @@ -16,10 +16,8 @@ */ package org.apache.carbondata.core.util; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -28,10 +26,10 @@ import java.util.Set; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; -import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.compression.CompressorFactory; -import org.apache.carbondata.core.datastore.page.statistics.MeasurePageStatsVO; +import org.apache.carbondata.core.datastore.page.EncodedTablePage; +import org.apache.carbondata.core.datastore.page.statistics.TablePageStatistics; import org.apache.carbondata.core.metadata.BlockletInfoColumnar; import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.metadata.ValueEncoderMeta; @@ -75,11 +73,10 @@ public class CarbonMetadataUtil { * It converts list of BlockletInfoColumnar to FileFooter thrift objects * * @param infoList - * @param numCols * @param cardinalities * @return FileFooter */ - public static FileFooter convertFileFooter(List<BlockletInfoColumnar> infoList, int numCols, + public static FileFooter convertFileFooter(List<BlockletInfoColumnar> infoList, int[] cardinalities, List<ColumnSchema> columnSchemaList, SegmentProperties segmentProperties) throws IOException { FileFooter footer = getFileFooter(infoList, cardinalities, columnSchemaList); @@ -243,15 +240,19 @@ public class CarbonMetadataUtil { return blockletIndex; } - public static BlockletIndex getBlockletIndex(List<NodeHolder> nodeHolderList, + public static BlockletIndex getBlockletIndex(List<EncodedTablePage> encodedTablePageList, List<CarbonMeasure> carbonMeasureList) { BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex(); // Calculating min/max for every each column. - byte[][] minCol = nodeHolderList.get(0).getDimensionColumnMinData().clone(); - byte[][] maxCol = nodeHolderList.get(0).getDimensionColumnMaxData().clone(); - for (NodeHolder nodeHolder : nodeHolderList) { - byte[][] columnMaxData = nodeHolder.getDimensionColumnMaxData(); - byte[][] columnMinData = nodeHolder.getDimensionColumnMinData(); + TablePageStatistics stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), + encodedTablePageList.get(0).getMeasures()); + byte[][] minCol = stats.getDimensionMinValue().clone(); + byte[][] maxCol = stats.getDimensionMaxValue().clone(); + for (EncodedTablePage encodedTablePage : encodedTablePageList) { + stats = new TablePageStatistics(encodedTablePage.getDimensions(), + encodedTablePage.getMeasures()); + byte[][] columnMaxData = stats.getDimensionMaxValue(); + byte[][] columnMinData = stats.getDimensionMinValue(); for (int i = 0; i < maxCol.length; i++) { if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMaxData[i], maxCol[i]) > 0) { maxCol[i] = columnMaxData[i]; @@ -269,14 +270,18 @@ public class CarbonMetadataUtil { blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min)); } - byte[][] measureMaxValue = nodeHolderList.get(0).getMeasureColumnMaxData().clone(); - byte[][] measureMinValue = nodeHolderList.get(0).getMeasureColumnMinData().clone(); + stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), + encodedTablePageList.get(0).getMeasures()); + byte[][] measureMaxValue = stats.getMeasureMaxValue().clone(); + byte[][] measureMinValue = stats.getMeasureMinValue().clone(); byte[] minVal = null; byte[] maxVal = null; - for (int i = 1; i < nodeHolderList.size(); i++) { + for (int i = 1; i < encodedTablePageList.size(); i++) { for (int j = 0; j < measureMinValue.length; j++) { - minVal = nodeHolderList.get(i).getMeasureColumnMinData()[j]; - maxVal = nodeHolderList.get(i).getMeasureColumnMaxData()[j]; + stats = new TablePageStatistics( + encodedTablePageList.get(i).getDimensions(), encodedTablePageList.get(i).getMeasures()); + minVal = stats.getMeasureMinValue()[j]; + maxVal = stats.getMeasureMaxValue()[j]; if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) { measureMaxValue[j] = maxVal.clone(); @@ -295,8 +300,11 @@ public class CarbonMetadataUtil { blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min)); } BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex(); - blockletBTreeIndex.setStart_key(nodeHolderList.get(0).getStartKey()); - blockletBTreeIndex.setEnd_key(nodeHolderList.get(nodeHolderList.size() - 1).getEndKey()); + byte[] startKey = encodedTablePageList.get(0).getPageKey().serializeStartKey(); + blockletBTreeIndex.setStart_key(startKey); + byte[] endKey = encodedTablePageList.get( + encodedTablePageList.size() - 1).getPageKey().serializeEndKey(); + blockletBTreeIndex.setEnd_key(endKey); BlockletIndex blockletIndex = new BlockletIndex(); blockletIndex.setMin_max_index(blockletMinMaxIndex); blockletIndex.setB_tree_index(blockletBTreeIndex); @@ -332,10 +340,9 @@ public class CarbonMetadataUtil { int aggregateIndex = 0; boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn(); boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock(); - boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks(); for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) { DataChunk dataChunk = new DataChunk(); - dataChunk.setChunk_meta(getChunkCompressionMeta()); + dataChunk.setChunk_meta(getSnappyChunkCompressionMeta()); List<Encoding> encodings = new ArrayList<Encoding>(); if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) { encodings.add(Encoding.DICTIONARY); @@ -343,7 +350,6 @@ public class CarbonMetadataUtil { if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) { encodings.add(Encoding.DIRECT_DICTIONARY); } - dataChunk.setRowMajor(colGrpblock[i]); // TODO : Once schema PR is merged and information needs to be passed // here. dataChunk.setColumn_ids(new ArrayList<Integer>()); @@ -376,7 +382,7 @@ public class CarbonMetadataUtil { for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) { DataChunk dataChunk = new DataChunk(); - dataChunk.setChunk_meta(getChunkCompressionMeta()); + dataChunk.setChunk_meta(getSnappyChunkCompressionMeta()); dataChunk.setRowMajor(false); // TODO : Once schema PR is merged and information needs to be passed // here. @@ -399,8 +405,10 @@ public class CarbonMetadataUtil { // dataChunk.setPresence(new PresenceMeta()); // TODO : Need to write ValueCompression meta here. List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>(); - encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta( - createValueEncoderMeta(blockletInfoColumnar.getStats(), i)))); + encoderMetaList.add( + ByteBuffer.wrap( + serializeEncoderMeta( + blockletInfoColumnar.getEncodedTablePage().getMeasure(i).getMetaData()))); dataChunk.setEncoder_meta(encoderMetaList); colDataChunks.add(dataChunk); } @@ -440,37 +448,10 @@ public class CarbonMetadataUtil { return aos.toByteArray(); } - private static ValueEncoderMeta createValueEncoderMeta(MeasurePageStatsVO stats, - int index) { - ValueEncoderMeta encoderMeta = new ValueEncoderMeta(); - encoderMeta.setMaxValue(stats.getMax(index)); - encoderMeta.setMinValue(stats.getMin(index)); - encoderMeta.setDataTypeSelected(stats.getDataTypeSelected(index)); - encoderMeta.setDecimal(stats.getDecimal(index)); - encoderMeta.setType(getTypeInChar(stats.getDataType(index))); - encoderMeta.setUniqueValue(stats.getNonExistValue(index)); - return encoderMeta; - } - - private static char getTypeInChar(DataType type) { - switch (type) { - case SHORT: - case INT: - case LONG: - return CarbonCommonConstants.BIG_INT_MEASURE; - case DOUBLE: - return CarbonCommonConstants.DOUBLE_MEASURE; - case DECIMAL: - return CarbonCommonConstants.BIG_DECIMAL_MEASURE; - default: - throw new RuntimeException("unsupported type: " + type); - } - } - /** * Right now it is set to default values. We may use this in future */ - private static ChunkCompressionMeta getChunkCompressionMeta() { + public static ChunkCompressionMeta getSnappyChunkCompressionMeta() { ChunkCompressionMeta chunkCompressionMeta = new ChunkCompressionMeta(); chunkCompressionMeta.setCompression_codec(CompressionCodec.SNAPPY); chunkCompressionMeta.setTotal_compressed_size(0); @@ -478,111 +459,16 @@ public class CarbonMetadataUtil { return chunkCompressionMeta; } + /** - * It converts FileFooter thrift object to list of BlockletInfoColumnar - * objects - * - * @param footer - * @return + * Right now it is set to default values. We may use this in future */ - public static List<BlockletInfoColumnar> convertBlockletInfo(FileFooter footer) - throws IOException { - List<BlockletInfoColumnar> listOfNodeInfo = - new ArrayList<BlockletInfoColumnar>(CarbonCommonConstants.CONSTANT_SIZE_TEN); - for (BlockletInfo blockletInfo : footer.getBlocklet_info_list()) { - BlockletInfoColumnar blockletInfoColumnar = new BlockletInfoColumnar(); - blockletInfoColumnar.setNumberOfKeys(blockletInfo.getNum_rows()); - List<DataChunk> columnChunks = blockletInfo.getColumn_data_chunks(); - List<DataChunk> dictChunks = new ArrayList<DataChunk>(); - List<DataChunk> nonDictColChunks = new ArrayList<DataChunk>(); - for (DataChunk dataChunk : columnChunks) { - if (dataChunk.getEncoders().get(0).equals(Encoding.DICTIONARY)) { - dictChunks.add(dataChunk); - } else { - nonDictColChunks.add(dataChunk); - } - } - int[] keyLengths = new int[dictChunks.size()]; - long[] keyOffSets = new long[dictChunks.size()]; - long[] keyBlockIndexOffsets = new long[dictChunks.size()]; - int[] keyBlockIndexLens = new int[dictChunks.size()]; - long[] indexMapOffsets = new long[dictChunks.size()]; - int[] indexMapLens = new int[dictChunks.size()]; - boolean[] sortState = new boolean[dictChunks.size()]; - int i = 0; - for (DataChunk dataChunk : dictChunks) { - keyLengths[i] = dataChunk.getData_page_length(); - keyOffSets[i] = dataChunk.getData_page_offset(); - keyBlockIndexOffsets[i] = dataChunk.getRowid_page_offset(); - keyBlockIndexLens[i] = dataChunk.getRowid_page_length(); - indexMapOffsets[i] = dataChunk.getRle_page_offset(); - indexMapLens[i] = dataChunk.getRle_page_length(); - sortState[i] = dataChunk.getSort_state().equals(SortState.SORT_EXPLICIT); - i++; - } - blockletInfoColumnar.setKeyLengths(keyLengths); - blockletInfoColumnar.setKeyOffSets(keyOffSets); - blockletInfoColumnar.setKeyBlockIndexOffSets(keyBlockIndexOffsets); - blockletInfoColumnar.setKeyBlockIndexLength(keyBlockIndexLens); - blockletInfoColumnar.setDataIndexMapOffsets(indexMapOffsets); - blockletInfoColumnar.setDataIndexMapLength(indexMapLens); - blockletInfoColumnar.setIsSortedKeyColumn(sortState); - - int[] msrLens = new int[nonDictColChunks.size()]; - long[] msrOffsets = new long[nonDictColChunks.size()]; - ValueEncoderMeta[] encoderMetas = new ValueEncoderMeta[nonDictColChunks.size()]; - i = 0; - for (DataChunk msrChunk : nonDictColChunks) { - msrLens[i] = msrChunk.getData_page_length(); - msrOffsets[i] = msrChunk.getData_page_offset(); - encoderMetas[i] = deserializeValueEncoderMeta(msrChunk.getEncoder_meta().get(0)); - i++; - } - blockletInfoColumnar.setMeasureLength(msrLens); - blockletInfoColumnar.setMeasureOffset(msrOffsets); - blockletInfoColumnar.setStats(getMeasurePageStats(encoderMetas)); - listOfNodeInfo.add(blockletInfoColumnar); - } - - setBlockletIndex(footer, listOfNodeInfo); - return listOfNodeInfo; - } - - private static ValueEncoderMeta deserializeValueEncoderMeta(ByteBuffer byteBuffer) - throws IOException { - ByteArrayInputStream bis = new ByteArrayInputStream(byteBuffer.array()); - ObjectInputStream objStream = new ObjectInputStream(bis); - ValueEncoderMeta encoderMeta = null; - try { - encoderMeta = (ValueEncoderMeta) objStream.readObject(); - } catch (ClassNotFoundException e) { - LOGGER.error("Error while reading ValueEncoderMeta"); - } - return encoderMeta; - - } - - private static MeasurePageStatsVO getMeasurePageStats(ValueEncoderMeta[] encoderMetas) { - return MeasurePageStatsVO.build(encoderMetas); - } - - private static void setBlockletIndex(FileFooter footer, - List<BlockletInfoColumnar> listOfNodeInfo) { - List<BlockletIndex> blockletIndexList = footer.getBlocklet_index_list(); - for (int i = 0; i < blockletIndexList.size(); i++) { - BlockletBTreeIndex bTreeIndexList = blockletIndexList.get(i).getB_tree_index(); - BlockletMinMaxIndex minMaxIndexList = blockletIndexList.get(i).getMin_max_index(); - - listOfNodeInfo.get(i).setStartKey(bTreeIndexList.getStart_key()); - listOfNodeInfo.get(i).setEndKey(bTreeIndexList.getEnd_key()); - byte[][] min = new byte[minMaxIndexList.getMin_values().size()][]; - byte[][] max = new byte[minMaxIndexList.getMax_values().size()][]; - for (int j = 0; j < minMaxIndexList.getMax_valuesSize(); j++) { - min[j] = minMaxIndexList.getMin_values().get(j).array(); - max[j] = minMaxIndexList.getMax_values().get(j).array(); - } - listOfNodeInfo.get(i).setColumnMaxData(max); - } + private static ChunkCompressionMeta getChunkCompressionMeta() { + ChunkCompressionMeta chunkCompressionMeta = new ChunkCompressionMeta(); + chunkCompressionMeta.setCompression_codec(CompressionCodec.SNAPPY); + chunkCompressionMeta.setTotal_compressed_size(0); + chunkCompressionMeta.setTotal_uncompressed_size(0); + return chunkCompressionMeta; } /** @@ -651,7 +537,6 @@ public class CarbonMetadataUtil { int aggregateIndex = 0; boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn(); boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock(); - boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks(); for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) { DataChunk2 dataChunk = new DataChunk2(); dataChunk.setChunk_meta(getChunkCompressionMeta()); @@ -662,7 +547,6 @@ public class CarbonMetadataUtil { if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) { encodings.add(Encoding.DIRECT_DICTIONARY); } - dataChunk.setRowMajor(colGrpblock[i]); // TODO : Once schema PR is merged and information needs to be passed // here. dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]); @@ -710,8 +594,10 @@ public class CarbonMetadataUtil { // dataChunk.setPresence(new PresenceMeta()); // TODO : Need to write ValueCompression meta here. List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>(); - encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta( - createValueEncoderMeta(blockletInfoColumnar.getStats(), i)))); + encoderMetaList.add( + ByteBuffer.wrap( + serializeEncoderMeta( + blockletInfoColumnar.getEncodedTablePage().getMeasure(i).getMetaData()))); dataChunk.setEncoder_meta(encoderMetaList); colDataChunks.add(dataChunk); } @@ -719,102 +605,18 @@ public class CarbonMetadataUtil { } /** - * Below method will be used to get the data chunk object for all the columns - * - * @param nodeHolderList blocklet info - * @param columnSchema list of columns - * @param segmentProperties segment properties - * @return list of data chunks - * @throws IOException + * return DataChunk3 that contains the input DataChunk2 list */ - private static List<DataChunk2> getDatachunk2(List<NodeHolder> nodeHolderList, - List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, - boolean isDimensionColumn) throws IOException { - List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>(); - DataChunk2 dataChunk = null; - NodeHolder nodeHolder = null; - for (int i = 0; i < nodeHolderList.size(); i++) { - nodeHolder = nodeHolderList.get(i); - dataChunk = new DataChunk2(); - dataChunk.min_max = new BlockletMinMaxIndex(); - dataChunk.setChunk_meta(getChunkCompressionMeta()); - dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount()); - List<Encoding> encodings = new ArrayList<Encoding>(); - if (isDimensionColumn) { - dataChunk.setData_page_length(nodeHolder.getKeyLengths()[index]); - if (containsEncoding(index, Encoding.DICTIONARY, columnSchema, segmentProperties)) { - encodings.add(Encoding.DICTIONARY); - } - if (containsEncoding(index, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) { - encodings.add(Encoding.DIRECT_DICTIONARY); - } - dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[index]); - // TODO : Once schema PR is merged and information needs to be passed - // here. - if (nodeHolder.getRleEncodingForDictDim()[index]) { - dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[index]); - encodings.add(Encoding.RLE); - } - dataChunk.setSort_state(nodeHolder.getIsSortedKeyBlock()[index] ? - SortState.SORT_EXPLICIT : - SortState.SORT_NATIVE); - - if (!nodeHolder.getIsSortedKeyBlock()[index]) { - dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[index]); - encodings.add(Encoding.INVERTED_INDEX); - } - dataChunk.min_max.addToMax_values( - ByteBuffer.wrap(nodeHolder.getDimensionColumnMaxData()[index])); - dataChunk.min_max.addToMin_values( - ByteBuffer.wrap(nodeHolder.getDimensionColumnMinData()[index])); - } else { - dataChunk.setData_page_length(nodeHolder.getDataArray()[index].length); - // TODO : Right now the encodings are happening at runtime. change as - // per this encoders. - dataChunk.setEncoders(encodings); - - dataChunk.setRowMajor(false); - // TODO : Right now the encodings are happening at runtime. change as - // per this encoders. - encodings.add(Encoding.DELTA); - dataChunk.setEncoders(encodings); - // TODO writing dummy presence meta need to set actual presence - // meta - PresenceMeta presenceMeta = new PresenceMeta(); - presenceMeta.setPresent_bit_streamIsSet(true); - presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor() - .compressByte(nodeHolder.getMeasureNullValueIndex()[index].toByteArray())); - dataChunk.setPresence(presenceMeta); - List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>(); - encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer( - createValueEncoderMeta(nodeHolder.getStats(), index)))); - dataChunk.setEncoder_meta(encoderMetaList); - dataChunk.min_max - .addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[index])); - dataChunk.min_max - .addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[index])); - } - dataChunk.setEncoders(encodings); - colDataChunks.add(dataChunk); - } - return colDataChunks; - } - - public static DataChunk3 getDataChunk3(List<NodeHolder> nodeHolderList, - List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, - boolean isDimensionColumn) throws IOException { - List<DataChunk2> dataChunksList = - getDatachunk2(nodeHolderList, columnSchema, segmentProperties, index, isDimensionColumn); + public static DataChunk3 getDataChunk3(List<DataChunk2> dataChunksList) { int offset = 0; DataChunk3 dataChunk = new DataChunk3(); List<Integer> pageOffsets = new ArrayList<>(); List<Integer> pageLengths = new ArrayList<>(); int length = 0; - for (int i = 0; i < dataChunksList.size(); i++) { + for (DataChunk2 dataChunk2 : dataChunksList) { pageOffsets.add(offset); - length = - dataChunksList.get(i).getData_page_length() + dataChunksList.get(i).getRle_page_length() - + dataChunksList.get(i).getRowid_page_length(); + length = dataChunk2.getData_page_length() + dataChunk2.getRle_page_length() + + dataChunk2.getRowid_page_length(); pageLengths.add(length); offset += length; } @@ -824,36 +626,30 @@ public class CarbonMetadataUtil { return dataChunk; } - public static byte[] serializeEncodeMetaUsingByteBuffer(ValueEncoderMeta valueEncoderMeta) { - ByteBuffer buffer = null; - switch (valueEncoderMeta.getType()) { - case LONG: - buffer = ByteBuffer.allocate( - (CarbonCommonConstants.LONG_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE - + 3); - buffer.putChar(valueEncoderMeta.getTypeInChar()); - buffer.putLong((Long) valueEncoderMeta.getMaxValue()); - buffer.putLong((Long) valueEncoderMeta.getMinValue()); - buffer.putLong((Long) valueEncoderMeta.getUniqueValue()); - break; - case DOUBLE: - buffer = ByteBuffer.allocate( - (CarbonCommonConstants.DOUBLE_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE - + 3); - buffer.putChar(valueEncoderMeta.getTypeInChar()); - buffer.putDouble((Double) valueEncoderMeta.getMaxValue()); - buffer.putDouble((Double) valueEncoderMeta.getMinValue()); - buffer.putDouble((Double) valueEncoderMeta.getUniqueValue()); - break; - case DECIMAL: - buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + 3); - buffer.putChar(valueEncoderMeta.getTypeInChar()); - break; + /** + * return DataChunk3 for the dimension column (specifed by `columnIndex`) + * in `encodedTablePageList` + */ + public static DataChunk3 getDimensionDataChunk3(List<EncodedTablePage> encodedTablePageList, + int columnIndex) throws IOException { + List<DataChunk2> dataChunksList = new ArrayList<>(encodedTablePageList.size()); + for (EncodedTablePage encodedTablePage : encodedTablePageList) { + dataChunksList.add(encodedTablePage.getDimension(columnIndex).getDataChunk2()); } - buffer.putInt(valueEncoderMeta.getDecimal()); - buffer.put(valueEncoderMeta.getDataTypeSelected()); - buffer.flip(); - return buffer.array(); + return CarbonMetadataUtil.getDataChunk3(dataChunksList); + } + + /** + * return DataChunk3 for the measure column (specifed by `columnIndex`) + * in `encodedTablePageList` + */ + public static DataChunk3 getMeasureDataChunk3(List<EncodedTablePage> encodedTablePageList, + int columnIndex) throws IOException { + List<DataChunk2> dataChunksList = new ArrayList<>(encodedTablePageList.size()); + for (EncodedTablePage encodedTablePage : encodedTablePageList) { + dataChunksList.add(encodedTablePage.getMeasure(columnIndex).getDataChunk2()); + } + return CarbonMetadataUtil.getDataChunk3(dataChunksList); } public static int compareMeasureData(byte[] first, byte[] second, DataType dataType) { @@ -905,83 +701,4 @@ public class CarbonMetadataUtil { return fileHeader; } - /** - * Below method will be used to get the data chunk2 serialize object list - * - * @param nodeHolder node holder - * @param columnSchema table columns - * @param segmentProperties segment properties - * @param isDimensionColumn to get the list of dimension column or measure column - * @return list of data chunk2 - * @throws IOException - */ - public static List<byte[]> getDataChunk2(NodeHolder nodeHolder, List<ColumnSchema> columnSchema, - SegmentProperties segmentProperties, boolean isDimensionColumn) throws IOException { - List<byte[]> dataChunkBuffer = new ArrayList<>(); - if (isDimensionColumn) { - for (int i = 0; i < nodeHolder.getKeyArray().length; i++) { - DataChunk2 dataChunk = new DataChunk2(); - dataChunk.min_max = new BlockletMinMaxIndex(); - dataChunk.setChunk_meta(getChunkCompressionMeta()); - dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount()); - List<Encoding> encodings = new ArrayList<Encoding>(); - dataChunk.setData_page_length(nodeHolder.getKeyLengths()[i]); - if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) { - encodings.add(Encoding.DICTIONARY); - } - if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) { - encodings.add(Encoding.DIRECT_DICTIONARY); - } - dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[i]); - if (nodeHolder.getRleEncodingForDictDim()[i]) { - dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[i]); - encodings.add(Encoding.RLE); - } - dataChunk.setSort_state( - nodeHolder.getIsSortedKeyBlock()[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE); - if (!nodeHolder.getIsSortedKeyBlock()[i]) { - dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[i]); - encodings.add(Encoding.INVERTED_INDEX); - } - dataChunk.min_max.addToMax_values( - ByteBuffer.wrap(nodeHolder.getDimensionColumnMaxData()[i])); - dataChunk.min_max.addToMin_values( - ByteBuffer.wrap(nodeHolder.getDimensionColumnMinData()[i])); - dataChunk.setEncoders(encodings); - dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk)); - } - } else { - for (int i = 0; i < nodeHolder.getDataArray().length; i++) { - DataChunk2 dataChunk = new DataChunk2(); - dataChunk.min_max = new BlockletMinMaxIndex(); - dataChunk.setChunk_meta(getChunkCompressionMeta()); - dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount()); - dataChunk.setData_page_length(nodeHolder.getDataArray()[i].length); - List<Encoding> encodings = new ArrayList<Encoding>(); - // TODO : Right now the encodings are happening at runtime. change as - // per this encoders. - dataChunk.setEncoders(encodings); - dataChunk.setRowMajor(false); - // TODO : Right now the encodings are happening at runtime. change as - // per this encoders. - encodings.add(Encoding.DELTA); - dataChunk.setEncoders(encodings); - // TODO writing dummy presence meta need to set actual presence - // meta - PresenceMeta presenceMeta = new PresenceMeta(); - presenceMeta.setPresent_bit_streamIsSet(true); - presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor() - .compressByte(nodeHolder.getMeasureNullValueIndex()[i].toByteArray())); - dataChunk.setPresence(presenceMeta); - List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>(); - encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer( - createValueEncoderMeta(nodeHolder.getStats(), i)))); - dataChunk.setEncoder_meta(encoderMetaList); - dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[i])); - dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[i])); - dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk)); - } - } - return dataChunkBuffer; - } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 1b08263..68a78dd 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -51,7 +51,6 @@ import org.apache.carbondata.core.datastore.columnar.ColumnGroupModel; import org.apache.carbondata.core.datastore.columnar.UnBlockIndexer; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; import org.apache.carbondata.core.datastore.impl.FileFactory; -import org.apache.carbondata.core.datastore.page.statistics.MeasurePageStatsVO; import org.apache.carbondata.core.keygenerator.mdkey.NumberCompressor; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.ValueEncoderMeta; @@ -827,15 +826,6 @@ public final class CarbonUtil { } /** - * Below method will be used to get the stats of the measure data page - */ - public static MeasurePageStatsVO getMeasurePageStats( - List<ValueEncoderMeta> encodeMetaList) { - return MeasurePageStatsVO.build( - encodeMetaList.toArray(new ValueEncoderMeta[encodeMetaList.size()])); - } - - /** * Below method will be used to check whether particular encoding is present * in the dimension or not * @@ -1335,20 +1325,6 @@ public final class CarbonUtil { return thriftByteArray; } - /** - * Below method will be used to convert the bytearray to data chunk object - * - * @param dataChunkBytes datachunk thrift object in bytes - * @return data chunk thrift object - */ - public static DataChunk2 readDataChunk(byte[] dataChunkBytes, int offset, int length) - throws IOException { - return (DataChunk2) read(dataChunkBytes, new ThriftReader.TBaseCreator() { - @Override public TBase create() { - return new DataChunk2(); - } - }, offset, length); - } public static DataChunk3 readDataChunk3(ByteBuffer dataChunkBuffer, int offset, int length) throws IOException { @@ -1447,6 +1423,38 @@ public final class CarbonUtil { return valueEncoderMeta; } + public static byte[] serializeEncodeMetaUsingByteBuffer(ValueEncoderMeta valueEncoderMeta) { + ByteBuffer buffer = null; + switch (valueEncoderMeta.getType()) { + case LONG: + buffer = ByteBuffer.allocate( + (CarbonCommonConstants.LONG_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE + + 3); + buffer.putChar(valueEncoderMeta.getTypeInChar()); + buffer.putLong((Long) valueEncoderMeta.getMaxValue()); + buffer.putLong((Long) valueEncoderMeta.getMinValue()); + buffer.putLong(0L); // unique value, not used + break; + case DOUBLE: + buffer = ByteBuffer.allocate( + (CarbonCommonConstants.DOUBLE_SIZE_IN_BYTE * 3) + CarbonCommonConstants.INT_SIZE_IN_BYTE + + 3); + buffer.putChar(valueEncoderMeta.getTypeInChar()); + buffer.putDouble((Double) valueEncoderMeta.getMaxValue()); + buffer.putDouble((Double) valueEncoderMeta.getMinValue()); + buffer.putDouble(0d); // unique value, not used + break; + case DECIMAL: + buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + 3); + buffer.putChar(valueEncoderMeta.getTypeInChar()); + break; + } + buffer.putInt(0); // decimal point, not used + buffer.put(valueEncoderMeta.getDataTypeSelected()); + buffer.flip(); + return buffer.array(); + } + /** * Below method will be used to convert indexes in range * Indexes=[0,1,2,3,4,5,6,7,8,9] @@ -1717,5 +1725,47 @@ public final class CarbonUtil { public static boolean isValidBadStorePath(String badRecordsLocation) { return !(null == badRecordsLocation || badRecordsLocation.length() == 0); } + + public static byte[] getMaxValueAsBytes(ValueEncoderMeta meta) { + ByteBuffer b; + switch (meta.getType()) { + case LONG: + b = ByteBuffer.allocate(8); + b.putLong((long) meta.getMaxValue()); + b.flip(); + return b.array(); + case DOUBLE: + b = ByteBuffer.allocate(8); + b.putDouble((double) meta.getMaxValue()); + b.flip(); + return b.array(); + case DECIMAL: + case BYTE_ARRAY: + return new byte[8]; + default: + throw new IllegalArgumentException("Invalid data type: " + meta.getType()); + } + } + + public static byte[] getMinValueAsBytes(ValueEncoderMeta meta) { + ByteBuffer b; + switch (meta.getType()) { + case LONG: + b = ByteBuffer.allocate(8); + b.putLong((long) meta.getMinValue()); + b.flip(); + return b.array(); + case DOUBLE: + b = ByteBuffer.allocate(8); + b.putDouble((double) meta.getMinValue()); + b.flip(); + return b.array(); + case DECIMAL: + case BYTE_ARRAY: + return new byte[8]; + default: + throw new IllegalArgumentException("Invalid data type: " + meta.getType()); + } + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index 37ae5bb..39b8b3c 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -136,25 +136,6 @@ public final class DataTypeUtil { } /** - * This method will return the type of measure based on its data type - * - * @param dataType - * @return - */ - public static char getAggType(DataType dataType) { - switch (dataType) { - case DECIMAL: - return CarbonCommonConstants.BIG_DECIMAL_MEASURE; - case SHORT: - case INT: - case LONG: - return CarbonCommonConstants.BIG_INT_MEASURE; - default: - return CarbonCommonConstants.DOUBLE_MEASURE; - } - } - - /** * This method will convert a big decimal value to bytes * * @param num http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/util/NodeHolder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/NodeHolder.java b/core/src/main/java/org/apache/carbondata/core/util/NodeHolder.java index 95037b2..4afa9b6 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/NodeHolder.java +++ b/core/src/main/java/org/apache/carbondata/core/util/NodeHolder.java @@ -19,9 +19,12 @@ package org.apache.carbondata.core.util; import java.util.BitSet; -import org.apache.carbondata.core.datastore.page.statistics.MeasurePageStatsVO; +import org.apache.carbondata.core.datastore.page.EncodedTablePage; +import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult; public class NodeHolder { + private EncodedTablePage encodedData; + /** * keyArray */ @@ -94,7 +97,7 @@ public class NodeHolder { private byte[][] measureColumnMinData; - private MeasurePageStatsVO stats; + private SimpleStatsResult stats; /** * array of rleEncodingForDictDim flag to identify the rleEncodingForDictDim @@ -418,11 +421,37 @@ public class NodeHolder { return this.writeAll; } - public MeasurePageStatsVO getStats() { + public SimpleStatsResult getStats() { return stats; } - public void setMeasureStats(MeasurePageStatsVO stats) { + public void setMeasureStats(SimpleStatsResult stats) { this.stats = stats; } + + public static byte[][] getKeyArray(EncodedTablePage encodedTablePage) { + int numDimensions = encodedTablePage.getNumDimensions(); + byte[][] keyArray = new byte[numDimensions][]; + for (int i = 0; i < numDimensions; i++) { + keyArray[i] = encodedTablePage.getDimension(i).getEncodedData(); + } + return keyArray; + } + + public static byte[][] getDataArray(EncodedTablePage encodedTablePage) { + int numMeasures = encodedTablePage.getNumMeasures(); + byte[][] dataArray = new byte[numMeasures][]; + for (int i = 0; i < numMeasures; i++) { + dataArray[i] = encodedTablePage.getMeasure(i).getEncodedData(); + } + return dataArray; + } + + public void setEncodedData(EncodedTablePage encodedData) { + this.encodedData = encodedData; + } + + public EncodedTablePage getEncodedData() { + return encodedData; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/fecafde8/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java new file mode 100644 index 0000000..d6ecfbc --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/util/NonDictionaryUtil.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.util; + +import java.nio.ByteBuffer; + +import org.apache.carbondata.core.datastore.row.WriteStepRowUtil; + +/** + * This is the utility class for No Dictionary changes. + */ +public class NonDictionaryUtil { + + /** + * This method will form one single byte [] for all the high card dims. + * For example if you need to pack 2 columns c1 and c2 , it stores in following way + * <total_len(short)><offsetLen(short)><offsetLen+c1_len(short)><c1(byte[])><c2(byte[])> + * @param byteBufferArr + * @return + */ + public static byte[] packByteBufferIntoSingleByteArray(byte[][] byteBufferArr) { + // for empty array means there is no data to remove dictionary. + if (null == byteBufferArr || byteBufferArr.length == 0) { + return null; + } + int noOfCol = byteBufferArr.length; + short toDetermineLengthOfByteArr = 2; + short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); + int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; + + ByteBuffer buffer = ByteBuffer.allocate(totalBytes); + + // write the length of the byte [] as first short + buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); + // writing the offset of the first element. + buffer.putShort(offsetLen); + + // prepare index for byte [] + for (int index = 0; index < byteBufferArr.length - 1; index++) { + int noOfBytes = byteBufferArr[index].length; + + buffer.putShort((short) (offsetLen + noOfBytes)); + offsetLen += noOfBytes; + } + + // put actual data. + for (int index = 0; index < byteBufferArr.length; index++) { + buffer.put(byteBufferArr[index]); + } + buffer.rewind(); + return buffer.array(); + + } + + /** + * To calculate the total bytes in byte Buffer[]. + * + * @param byteBufferArr + * @return + */ + private static int calculateTotalBytes(byte[][] byteBufferArr) { + int total = 0; + for (int index = 0; index < byteBufferArr.length; index++) { + total += byteBufferArr[index].length; + } + return total; + } + + /** + * Method to get the required Dimension from obj [] + * + * @param index + * @param row + * @return + */ + public static Integer getDimension(int index, Object[] row) { + + Integer[] dimensions = (Integer[]) row[WriteStepRowUtil.DICTIONARY_DIMENSION]; + + return dimensions[index]; + + } + + /** + * Method to get the required measure from obj [] + * + * @param index + * @param row + * @return + */ + public static Object getMeasure(int index, Object[] row) { + Object[] measures = (Object[]) row[WriteStepRowUtil.MEASURE]; + return measures[index]; + } + + public static byte[] getByteArrayForNoDictionaryCols(Object[] row) { + + return (byte[]) row[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX]; + } + + public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] byteBufferArr, + Object[] measureArray) { + + out[WriteStepRowUtil.DICTIONARY_DIMENSION] = dimArray; + out[WriteStepRowUtil.NO_DICTIONARY_AND_COMPLEX] = byteBufferArr; + out[WriteStepRowUtil.MEASURE] = measureArray; + + } + + /** + * This method will extract the single dimension from the complete high card dims byte[].+ * + * The format of the byte [] will be, Totallength,CompleteStartOffsets,Dat + * + * @param highCardArr + * @param index + * @param highCardinalityCount + * @param outBuffer + */ + public static void extractSingleHighCardDims(byte[] highCardArr, int index, + int highCardinalityCount, ByteBuffer outBuffer) { + ByteBuffer buff = null; + short secIndex = 0; + short firstIndex = 0; + int length; + // if the requested index is a last one then we need to calculate length + // based on byte[] length. + if (index == highCardinalityCount - 1) { + // need to read 2 bytes(1 short) to determine starting offset and + // length can be calculated by array length. + buff = ByteBuffer.wrap(highCardArr, (index * 2) + 2, 2); + } else { + // need to read 4 bytes(2 short) to determine starting offset and + // length. + buff = ByteBuffer.wrap(highCardArr, (index * 2) + 2, 4); + } + + firstIndex = buff.getShort(); + // if it is a last dimension in high card then this will be last + // offset.so calculate length from total length + if (index == highCardinalityCount - 1) { + secIndex = (short) highCardArr.length; + } else { + secIndex = buff.getShort(); + } + + length = secIndex - firstIndex; + + outBuffer.position(firstIndex); + outBuffer.limit(outBuffer.position() + length); + + } +}
