[GitHub] carbondata pull request #2191: [CARBONDATA-2381] Improve compaction performa...

jackylk Mon, 23 Apr 2018 01:04:34 -0700

Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2191#discussion_r183305545
  
    --- Diff: 
core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedRawResultCollector.java
 ---
    @@ -150,105 +155,110 @@ private void initCurrentBlockKeyGenerator() {
        * it will keep track of how many record is processed, to handle limit 
scenario
        */
       @Override public List<Object[]> collectData(AbstractScannedResult 
scannedResult, int batchSize) {
    +    long startTime = System.currentTimeMillis();
         List<Object[]> listBasedResult = new ArrayList<>(batchSize);
         QueryMeasure[] queryMeasures = 
tableBlockExecutionInfos.getActualQueryMeasures();
         // scan the record and add to list
    -    int rowCounter = 0;
    -    while (scannedResult.hasNext() && rowCounter < batchSize) {
    -      scanResultAndGetData(scannedResult);
    -      if 
(scannedResult.containsDeletedRow(scannedResult.getCurrentRowId())) {
    -        continue;
    -      }
    -      // re-fill dictionary and no dictionary key arrays for the newly 
added columns
    -      if (dimensionInfo.isDictionaryColumnAdded()) {
    -        dictionaryKeyArray = 
fillDictionaryKeyArrayWithLatestSchema(dictionaryKeyArray);
    -      }
    -      if (dimensionInfo.isNoDictionaryColumnAdded()) {
    -        noDictionaryKeyArray = 
fillNoDictionaryKeyArrayWithLatestSchema(noDictionaryKeyArray);
    -      }
    -      prepareRow(scannedResult, listBasedResult, queryMeasures);
    -      rowCounter++;
    +    scanAndFillData(scannedResult, batchSize, listBasedResult, 
queryMeasures);
    +    // re-fill dictionary and no dictionary key arrays for the newly added 
columns
    +    if (dimensionInfo.isDictionaryColumnAdded()) {
    +      fillDictionaryKeyArrayBatchWithLatestSchema(listBasedResult);
    +    }
    +    if (dimensionInfo.isNoDictionaryColumnAdded()) {
    +      fillNoDictionaryKeyArrayBatchWithLatestSchema(listBasedResult);
         }
    +    QueryStatistic resultPrepTime = 
queryStatisticsModel.getStatisticsTypeAndObjMap()
    +        .get(QueryStatisticsConstants.RESULT_PREP_TIME);
    +    
resultPrepTime.addCountStatistic(QueryStatisticsConstants.RESULT_PREP_TIME,
    +        resultPrepTime.getCount() + (System.currentTimeMillis() - 
startTime));
         return listBasedResult;
       }
     
       /**
        * This method will fill the dictionary key array with newly added 
dictionary columns if any
        *
    -   * @param dictionaryKeyArray
    +   * @param rows
        * @return
        */
    -  private byte[] fillDictionaryKeyArrayWithLatestSchema(byte[] 
dictionaryKeyArray) {
    -    QueryDimension[] actualQueryDimensions = 
tableBlockExecutionInfos.getActualQueryDimensions();
    -    int newKeyArrayLength = dimensionInfo.getNewDictionaryColumnCount();
    -    long[] keyArray = null;
    -    if (null != updatedCurrentBlockKeyGenerator) {
    -      keyArray = 
updatedCurrentBlockKeyGenerator.getKeyArray(dictionaryKeyArray);
    -      newKeyArrayLength += keyArray.length;
    -    }
    -    long[] keyArrayWithNewAddedColumns = new long[newKeyArrayLength];
    -    int existingColumnKeyArrayIndex = 0;
    -    int newKeyArrayIndex = 0;
    -    for (int i = 0; i < dimensionInfo.getDimensionExists().length; i++) {
    -      if (CarbonUtil
    -          
.hasEncoding(actualQueryDimensions[i].getDimension().getEncoder(), 
Encoding.DICTIONARY)) {
    -        // if dimension exists then add the key array value else add the 
default value
    -        if (dimensionInfo.getDimensionExists()[i]) {
    -          keyArrayWithNewAddedColumns[newKeyArrayIndex++] = 
keyArray[existingColumnKeyArrayIndex++];
    -        } else {
    -          long defaultValueAsLong;
    -          Object defaultValue = dimensionInfo.getDefaultValues()[i];
    -          if (null != defaultValue) {
    -            defaultValueAsLong = ((Integer) defaultValue).longValue();
    +  private void fillDictionaryKeyArrayBatchWithLatestSchema(List<Object[]> 
rows) {
    +    for (Object[] row : rows) {
    +      ByteArrayWrapper byteArrayWrapper = (ByteArrayWrapper) row[0];
    +      byte[] dictKeyArray = byteArrayWrapper.getDictionaryKey();
    +      QueryDimension[] actualQueryDimensions = 
tableBlockExecutionInfos.getActualQueryDimensions();
    +      int newKeyArrayLength = dimensionInfo.getNewDictionaryColumnCount();
    +      long[] keyArray = null;
    +      if (null != updatedCurrentBlockKeyGenerator) {
    +        keyArray = 
updatedCurrentBlockKeyGenerator.getKeyArray(dictKeyArray);
    +        newKeyArrayLength += keyArray.length;
    +      }
    +      long[] keyArrayWithNewAddedColumns = new long[newKeyArrayLength];
    +      int existingColumnKeyArrayIndex = 0;
    +      int newKeyArrayIndex = 0;
    +      for (int i = 0; i < dimensionInfo.getDimensionExists().length; i++) {
    +        if 
(CarbonUtil.hasEncoding(actualQueryDimensions[i].getDimension().getEncoder(),
    +            Encoding.DICTIONARY)) {
    +          // if dimension exists then add the key array value else add the 
default value
    +          if (dimensionInfo.getDimensionExists()[i]) {
    +            keyArrayWithNewAddedColumns[newKeyArrayIndex++] =
    +                keyArray[existingColumnKeyArrayIndex++];
               } else {
    -            defaultValueAsLong = 
(long)CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
    +            long defaultValueAsLong;
    +            Object defaultValue = dimensionInfo.getDefaultValues()[i];
    +            if (null != defaultValue) {
    +              defaultValueAsLong = ((Integer) defaultValue).longValue();
    +            } else {
    +              defaultValueAsLong = (long) 
CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
    +            }
    +            keyArrayWithNewAddedColumns[newKeyArrayIndex++] = 
defaultValueAsLong;
               }
    -          keyArrayWithNewAddedColumns[newKeyArrayIndex++] = 
defaultValueAsLong;
             }
           }
    +      try {
    +        dictKeyArray = 
restructuredKeyGenerator.generateKey(keyArrayWithNewAddedColumns);
    +      } catch (KeyGenException e) {
    +        LOGGER.error(e, e.getMessage());
    --- End diff --
    
    No need to throw it?

---

[GitHub] carbondata pull request #2191: [CARBONDATA-2381] Improve compaction performa...

Reply via email to