Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2729#discussion_r225135585
  
    --- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/HighCardDictDimensionIndexCodec.java
 ---
    @@ -17,71 +17,117 @@
     
     package 
org.apache.carbondata.core.datastore.page.encoding.dimension.legacy;
     
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
     import java.util.ArrayList;
     import java.util.List;
     import java.util.Map;
     
    -import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorage;
    -import 
org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort;
    -import 
org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort;
    +import 
org.apache.carbondata.core.datastore.columnar.BinaryPageIndexGenerator;
    +import org.apache.carbondata.core.datastore.columnar.PageIndexGenerator;
     import org.apache.carbondata.core.datastore.compression.Compressor;
     import org.apache.carbondata.core.datastore.compression.CompressorFactory;
     import org.apache.carbondata.core.datastore.page.ColumnPage;
     import 
org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder;
    +import org.apache.carbondata.core.memory.MemoryException;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
    +import org.apache.carbondata.core.metadata.datatype.DataTypes;
     import org.apache.carbondata.core.util.ByteUtil;
     import org.apache.carbondata.format.Encoding;
     
    +/**
    + * Codec class for binary/String data type columns
    + */
     public class HighCardDictDimensionIndexCodec extends IndexStorageCodec {
    -  /**
    -   * whether this column is varchar data type(long string)
    -   */
    -  private boolean isVarcharType;
     
    -  public HighCardDictDimensionIndexCodec(boolean isSort, boolean 
isInvertedIndex,
    -      boolean isVarcharType) {
    -    super(isSort, isInvertedIndex);
    -    this.isVarcharType = isVarcharType;
    +  private final List<Encoding> encodingList;
    +
    +  public HighCardDictDimensionIndexCodec(boolean isSort) {
    +    super(isSort);
    +    encodingList = new ArrayList<>();
    +    encodingList.add(Encoding.DIRECT_STRING);
       }
     
       @Override
       public String getName() {
         return "HighCardDictDimensionIndexCodec";
       }
     
    -  @Override
    -  public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
    -    return new IndexStorageEncoder() {
    -
    +  @Override public ColumnPageEncoder createEncoder(Map<String, Object> 
parameter) {
    +    return new IndexStorageEncoder(true, null, encodingList) {
    +      private final int THREE_BYTES_MAX = (int) Math.pow(2, 23) - 1;
    +      private final int THREE_BYTES_MIN = - THREE_BYTES_MAX - 1;
           @Override
    -      protected void encodeIndexStorage(ColumnPage input) {
    -        BlockIndexerStorage<byte[][]> indexStorage;
    +      protected void encodeIndexStorage(ColumnPage input) throws 
MemoryException, IOException {
    +        PageIndexGenerator<byte[][]> pageIndexGenerator;
    +        // get actual data
             byte[][] data = input.getByteArrayPage();
    -        boolean isDictionary = input.isLocalDictGeneratedPage();
    -        if (isInvertedIndex) {
    -          indexStorage = new BlockIndexerStorageForShort(data, 
isDictionary, !isDictionary, isSort);
    -        } else {
    -          indexStorage =
    -              new BlockIndexerStorageForNoInvertedIndexForShort(data, 
isDictionary);
    +        // fill length array
    +        int[] lengthArray = new int[data.length];
    +        int max = Integer.MIN_VALUE;
    +        int min = Integer.MAX_VALUE;
    +        int currentDataLength;
    +        int size = 0;
    +        for (int i = 0; i < lengthArray.length; i++) {
    +          currentDataLength = data[i].length;
    +          lengthArray[i] = currentDataLength;
    +          size += currentDataLength;
    +          if (max < currentDataLength) {
    +            max = currentDataLength;
    +          }
    +          if (min > currentDataLength) {
    +            min = currentDataLength;
    +          }
             }
    -        byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage());
             Compressor compressor = 
CompressorFactory.getInstance().getCompressor(
                 input.getColumnCompressorName());
    +        pageIndexGenerator =
    +              new BinaryPageIndexGenerator(data, isSort, lengthArray);
    +        // free memory
    +        selectedDataType = fitLongMinMax(max, min);
    +        byte[][] dataPage = pageIndexGenerator.getDataPage();
    +        ByteBuffer byteBuffer;
    +        if (DataTypes.BYTE == selectedDataType) {
    +          byteBuffer = ByteBuffer.allocate(lengthArray.length + size);
    +          for (int i = 0; i < lengthArray.length; i++) {
    +            byteBuffer.put((byte) lengthArray[i]);
    +            byteBuffer.put(dataPage[i]);
    +          }
    +        } else if (DataTypes.SHORT == selectedDataType) {
    +          byteBuffer = ByteBuffer.allocate((lengthArray.length * 2) + 
size);
    +          for (int i = 0; i < lengthArray.length; i++) {
    +            byteBuffer.putShort((short) lengthArray[i]);
    +            byteBuffer.put(dataPage[i]);
    +          }
    +        } else if (DataTypes.SHORT_INT == selectedDataType) {
    +          byteBuffer = ByteBuffer.allocate((lengthArray.length * 3) + 
size);
    +          for (int i = 0; i < lengthArray.length; i++) {
    +            byteBuffer.put(ByteUtil.to3Bytes(lengthArray[i]));
    +            byteBuffer.put(dataPage[i]);
    +          }
    +        } else {
    +          byteBuffer = ByteBuffer.allocate((lengthArray.length * 4) + 
size);
    +          for (int i = 0; i < lengthArray.length; i++) {
    +            byteBuffer.putInt(lengthArray[i]);
    +            byteBuffer.put(dataPage[i]);
    +          }
    +        }
    +        byteBuffer.rewind();
    +        byte[] flattened = byteBuffer.array();
             super.compressedDataPage = compressor.compressByte(flattened);
    -        super.indexStorage = indexStorage;
    +        super.pageIndexGenerator = pageIndexGenerator;
           }
     
    -      @Override
    -      protected List<Encoding> getEncodingList() {
    -        List<Encoding> encodings = new ArrayList<>();
    -        if (isVarcharType) {
    -          encodings.add(Encoding.DIRECT_COMPRESS_VARCHAR);
    -        } else if (indexStorage.getRowIdPageLengthInBytes() > 0) {
    -          encodings.add(Encoding.INVERTED_INDEX);
    -        }
    -        if (indexStorage.getDataRlePageLengthInBytes() > 0) {
    -          encodings.add(Encoding.RLE);
    +      private DataType fitLongMinMax(int max, int min) {
    --- End diff --
    
    ok


---

Reply via email to