Github user jackylk commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2275#discussion_r189420105 --- Diff: datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapWriter.java --- @@ -175,52 +206,39 @@ public void onBlockletEnd(int blockletId) throws IOException { */ public void onPageAdded(int blockletId, int pageId, int pageSize, ColumnPage[] pages) throws IOException { + // save index data into ram, write into disk after one page finished + int columnsCount = pages.length; + if (columnsCount <= 0) { + LOGGER.warn("No data in the page " + pageId + "with blockletid " + blockletId + + " to write lucene datamap"); + return; + } for (int rowId = 0; rowId < pageSize; rowId++) { - // create a new document - Document doc = new Document(); - // add blocklet Id - doc.add(new IntPoint(BLOCKLETID_NAME, blockletId)); - doc.add(new StoredField(BLOCKLETID_NAME, blockletId)); - //doc.add(new NumericDocValuesField(BLOCKLETID_NAME,blockletId)); - - // add page id and row id in Fine Grain data map - if (isFineGrain) { - // add page Id - doc.add(new IntPoint(PAGEID_NAME, pageId)); - doc.add(new StoredField(PAGEID_NAME, pageId)); - //doc.add(new NumericDocValuesField(PAGEID_NAME,pageId)); - - // add row id - doc.add(new IntPoint(ROWID_NAME, rowId)); - doc.add(new StoredField(ROWID_NAME, rowId)); - //doc.add(new NumericDocValuesField(ROWID_NAME,rowId)); - } - // add indexed columns value into the document - List<CarbonColumn> indexColumns = getIndexColumns(); - for (int i = 0; i < pages.length; i++) { - // add to lucene only if value is not null - if (!pages[i].getNullBits().get(rowId)) { - addField(doc, pages[i].getData(rowId), indexColumns.get(i), Field.Store.NO); + LuceneColumnKeys columns = new LuceneColumnKeys(getIndexColumns().size()); + int i = 0; + for (ColumnPage page : pages) { + if (!page.getNullBits().get(rowId)) { + columns.colValues[i++] = getValue(page, rowId); } } - - // add this document - ramIndexWriter.addDocument(doc); + if (cacheSize > 0) { + addToCache(columns, rowId, pageId, blockletId, cache, intBuffer, storeBlockletWise); + } else { + addData(columns, rowId, pageId, blockletId, intBuffer, ramIndexWriter, getIndexColumns(), + storeBlockletWise); + } + } + if (cacheSize > 0) { + flushCacheIfPossible(); } - } - private boolean addField(Document doc, Object data, CarbonColumn column, Field.Store store) { + private static boolean addField(Document doc, Object key, String fieldName, Field.Store store) { --- End diff -- This method is always returning true and return value is not required in caller
---