Github user jackylk commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2191#discussion_r183305545 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedRawResultCollector.java --- @@ -150,105 +155,110 @@ private void initCurrentBlockKeyGenerator() { * it will keep track of how many record is processed, to handle limit scenario */ @Override public List<Object[]> collectData(AbstractScannedResult scannedResult, int batchSize) { + long startTime = System.currentTimeMillis(); List<Object[]> listBasedResult = new ArrayList<>(batchSize); QueryMeasure[] queryMeasures = tableBlockExecutionInfos.getActualQueryMeasures(); // scan the record and add to list - int rowCounter = 0; - while (scannedResult.hasNext() && rowCounter < batchSize) { - scanResultAndGetData(scannedResult); - if (scannedResult.containsDeletedRow(scannedResult.getCurrentRowId())) { - continue; - } - // re-fill dictionary and no dictionary key arrays for the newly added columns - if (dimensionInfo.isDictionaryColumnAdded()) { - dictionaryKeyArray = fillDictionaryKeyArrayWithLatestSchema(dictionaryKeyArray); - } - if (dimensionInfo.isNoDictionaryColumnAdded()) { - noDictionaryKeyArray = fillNoDictionaryKeyArrayWithLatestSchema(noDictionaryKeyArray); - } - prepareRow(scannedResult, listBasedResult, queryMeasures); - rowCounter++; + scanAndFillData(scannedResult, batchSize, listBasedResult, queryMeasures); + // re-fill dictionary and no dictionary key arrays for the newly added columns + if (dimensionInfo.isDictionaryColumnAdded()) { + fillDictionaryKeyArrayBatchWithLatestSchema(listBasedResult); + } + if (dimensionInfo.isNoDictionaryColumnAdded()) { + fillNoDictionaryKeyArrayBatchWithLatestSchema(listBasedResult); } + QueryStatistic resultPrepTime = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.RESULT_PREP_TIME); + resultPrepTime.addCountStatistic(QueryStatisticsConstants.RESULT_PREP_TIME, + resultPrepTime.getCount() + (System.currentTimeMillis() - startTime)); return listBasedResult; } /** * This method will fill the dictionary key array with newly added dictionary columns if any * - * @param dictionaryKeyArray + * @param rows * @return */ - private byte[] fillDictionaryKeyArrayWithLatestSchema(byte[] dictionaryKeyArray) { - QueryDimension[] actualQueryDimensions = tableBlockExecutionInfos.getActualQueryDimensions(); - int newKeyArrayLength = dimensionInfo.getNewDictionaryColumnCount(); - long[] keyArray = null; - if (null != updatedCurrentBlockKeyGenerator) { - keyArray = updatedCurrentBlockKeyGenerator.getKeyArray(dictionaryKeyArray); - newKeyArrayLength += keyArray.length; - } - long[] keyArrayWithNewAddedColumns = new long[newKeyArrayLength]; - int existingColumnKeyArrayIndex = 0; - int newKeyArrayIndex = 0; - for (int i = 0; i < dimensionInfo.getDimensionExists().length; i++) { - if (CarbonUtil - .hasEncoding(actualQueryDimensions[i].getDimension().getEncoder(), Encoding.DICTIONARY)) { - // if dimension exists then add the key array value else add the default value - if (dimensionInfo.getDimensionExists()[i]) { - keyArrayWithNewAddedColumns[newKeyArrayIndex++] = keyArray[existingColumnKeyArrayIndex++]; - } else { - long defaultValueAsLong; - Object defaultValue = dimensionInfo.getDefaultValues()[i]; - if (null != defaultValue) { - defaultValueAsLong = ((Integer) defaultValue).longValue(); + private void fillDictionaryKeyArrayBatchWithLatestSchema(List<Object[]> rows) { + for (Object[] row : rows) { + ByteArrayWrapper byteArrayWrapper = (ByteArrayWrapper) row[0]; + byte[] dictKeyArray = byteArrayWrapper.getDictionaryKey(); + QueryDimension[] actualQueryDimensions = tableBlockExecutionInfos.getActualQueryDimensions(); + int newKeyArrayLength = dimensionInfo.getNewDictionaryColumnCount(); + long[] keyArray = null; + if (null != updatedCurrentBlockKeyGenerator) { + keyArray = updatedCurrentBlockKeyGenerator.getKeyArray(dictKeyArray); + newKeyArrayLength += keyArray.length; + } + long[] keyArrayWithNewAddedColumns = new long[newKeyArrayLength]; + int existingColumnKeyArrayIndex = 0; + int newKeyArrayIndex = 0; + for (int i = 0; i < dimensionInfo.getDimensionExists().length; i++) { + if (CarbonUtil.hasEncoding(actualQueryDimensions[i].getDimension().getEncoder(), + Encoding.DICTIONARY)) { + // if dimension exists then add the key array value else add the default value + if (dimensionInfo.getDimensionExists()[i]) { + keyArrayWithNewAddedColumns[newKeyArrayIndex++] = + keyArray[existingColumnKeyArrayIndex++]; } else { - defaultValueAsLong = (long)CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; + long defaultValueAsLong; + Object defaultValue = dimensionInfo.getDefaultValues()[i]; + if (null != defaultValue) { + defaultValueAsLong = ((Integer) defaultValue).longValue(); + } else { + defaultValueAsLong = (long) CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; + } + keyArrayWithNewAddedColumns[newKeyArrayIndex++] = defaultValueAsLong; } - keyArrayWithNewAddedColumns[newKeyArrayIndex++] = defaultValueAsLong; } } + try { + dictKeyArray = restructuredKeyGenerator.generateKey(keyArrayWithNewAddedColumns); + } catch (KeyGenException e) { + LOGGER.error(e, e.getMessage()); --- End diff -- No need to throw it?
---