Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2275#discussion_r189420105
--- Diff:
datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapWriter.java
---
@@ -175,52 +206,39 @@ public void onBlockletEnd(int blockletId) throws
IOException {
*/
public void onPageAdded(int blockletId, int pageId, int pageSize,
ColumnPage[] pages)
throws IOException {
+ // save index data into ram, write into disk after one page finished
+ int columnsCount = pages.length;
+ if (columnsCount <= 0) {
+ LOGGER.warn("No data in the page " + pageId + "with blockletid " +
blockletId
+ + " to write lucene datamap");
+ return;
+ }
for (int rowId = 0; rowId < pageSize; rowId++) {
- // create a new document
- Document doc = new Document();
- // add blocklet Id
- doc.add(new IntPoint(BLOCKLETID_NAME, blockletId));
- doc.add(new StoredField(BLOCKLETID_NAME, blockletId));
- //doc.add(new NumericDocValuesField(BLOCKLETID_NAME,blockletId));
-
- // add page id and row id in Fine Grain data map
- if (isFineGrain) {
- // add page Id
- doc.add(new IntPoint(PAGEID_NAME, pageId));
- doc.add(new StoredField(PAGEID_NAME, pageId));
- //doc.add(new NumericDocValuesField(PAGEID_NAME,pageId));
-
- // add row id
- doc.add(new IntPoint(ROWID_NAME, rowId));
- doc.add(new StoredField(ROWID_NAME, rowId));
- //doc.add(new NumericDocValuesField(ROWID_NAME,rowId));
- }
-
// add indexed columns value into the document
- List<CarbonColumn> indexColumns = getIndexColumns();
- for (int i = 0; i < pages.length; i++) {
- // add to lucene only if value is not null
- if (!pages[i].getNullBits().get(rowId)) {
- addField(doc, pages[i].getData(rowId), indexColumns.get(i),
Field.Store.NO);
+ LuceneColumnKeys columns = new
LuceneColumnKeys(getIndexColumns().size());
+ int i = 0;
+ for (ColumnPage page : pages) {
+ if (!page.getNullBits().get(rowId)) {
+ columns.colValues[i++] = getValue(page, rowId);
}
}
-
- // add this document
- ramIndexWriter.addDocument(doc);
+ if (cacheSize > 0) {
+ addToCache(columns, rowId, pageId, blockletId, cache, intBuffer,
storeBlockletWise);
+ } else {
+ addData(columns, rowId, pageId, blockletId, intBuffer,
ramIndexWriter, getIndexColumns(),
+ storeBlockletWise);
+ }
+ }
+ if (cacheSize > 0) {
+ flushCacheIfPossible();
}
-
}
- private boolean addField(Document doc, Object data, CarbonColumn column,
Field.Store store) {
+ private static boolean addField(Document doc, Object key, String
fieldName, Field.Store store) {
--- End diff --
This method is always returning true and return value is not required in
caller
---