[GitHub] drill pull request #1232: DRILL-6094: Decimal data type enhancements

vvysotskyi Wed, 25 Apr 2018 08:27:51 -0700

Github user vvysotskyi commented on a diff in the pull request:

    https://github.com/apache/drill/pull/1232#discussion_r184062425
  
    --- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
 ---
    @@ -248,27 +227,61 @@ protected void readField(long 
recordsToReadInThisPass) {
         }
       }
     
    -  static class DictionaryDecimal18Reader extends 
FixedByteAlignedReader<Decimal18Vector> {
    -    DictionaryDecimal18Reader(ParquetRecordReader parentReader, int 
allocateSize, ColumnDescriptor descriptor,
    -                           ColumnChunkMetaData columnChunkMetaData, 
boolean fixedLength, Decimal18Vector v,
    -                           SchemaElement schemaElement) throws 
ExecutionSetupException {
    +  static class DictionaryVarDecimalReader extends 
FixedByteAlignedReader<VarDecimalVector> {
    +
    +    DictionaryVarDecimalReader(ParquetRecordReader parentReader, int 
allocateSize, ColumnDescriptor descriptor,
    +        ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, 
VarDecimalVector v,
    +        SchemaElement schemaElement) throws ExecutionSetupException {
           super(parentReader, allocateSize, descriptor, columnChunkMetaData, 
fixedLength, v, schemaElement);
         }
     
         // this method is called by its superclass during a read loop
         @Override
         protected void readField(long recordsToReadInThisPass) {
    +      recordsReadInThisIteration =
    +          Math.min(pageReader.currentPageCount - pageReader.valuesRead,
    +              recordsToReadInThisPass - valuesReadInCurrentPass);
    +
    +      switch (columnDescriptor.getType()) {
    +        case INT32:
    +          if (usingDictionary) {
    +            for (int i = 0; i < recordsReadInThisIteration; i++) {
    +              byte[] bytes = 
Ints.toByteArray(pageReader.dictionaryValueReader.readInteger());
    +              setValueBytes(i, bytes);
    +            }
    +            setWriteIndex();
    +          } else {
    +            super.readField(recordsToReadInThisPass);
    +          }
    +          break;
    +        case INT64:
    +          if (usingDictionary) {
    +            for (int i = 0; i < recordsReadInThisIteration; i++) {
    +              byte[] bytes = 
Longs.toByteArray(pageReader.dictionaryValueReader.readLong());
    +              setValueBytes(i, bytes);
    +            }
    +            setWriteIndex();
    +          } else {
    +            super.readField(recordsToReadInThisPass);
    +          }
    +          break;
    +      }
    +    }
     
    -      recordsReadInThisIteration = Math.min(pageReader.currentPageCount
    -        - pageReader.valuesRead, recordsToReadInThisPass - 
valuesReadInCurrentPass);
    +    /**
    +     * Set the write Index. The next page that gets read might be a page 
that does not use dictionary encoding
    +     * and we will go into the else condition below. The readField method 
of the parent class requires the
    +     * writer index to be set correctly.
    +     */
    +    private void setWriteIndex() {
    +      readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
    +      readLength = (int) Math.ceil(readLengthInBits / 8.0);
    --- End diff --
    
    This is the number of bits in a byte, but a double value is used to avoid 
integer division. Thanks for pointing this, replaced by constant.

---

[GitHub] drill pull request #1232: DRILL-6094: Decimal data type enhancements

Reply via email to