[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3196: [CARBONDATA-3366] Support SDK reader to read blocklet level split

GitBox Wed, 15 May 2019 22:54:48 -0700

ajantha-bhat commented on a change in pull request #3196: [CARBONDATA-3366] 
Support SDK reader to read blocklet level split
URL: https://github.com/apache/carbondata/pull/3196#discussion_r284547805


 ##########
 File path: 
store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
 ##########
 @@ -184,48 +175,116 @@ public CarbonReaderBuilder withRowRecordReader() {
       }
       format.setColumnProjection(job.getConfiguration(), projectionColumns);
     }
+    if ((disableLoadBlockDataMap) && (filterExpression == null)) {
+      job.getConfiguration().set("filter_blocks", "false");
+    }
+    return format;
+  }
 
+  private <T> RecordReader getRecordReader(Job job, CarbonFileInputFormat 
format,
+      List<RecordReader<Void, T>> readers, InputSplit split)
+      throws IOException, InterruptedException {
+    TaskAttemptContextImpl attempt =
+        new TaskAttemptContextImpl(job.getConfiguration(), new 
TaskAttemptID());
+    RecordReader reader;
+    QueryModel queryModel = format.createQueryModel(split, attempt);
+    boolean hasComplex = false;
+    for (ProjectionDimension projectionDimension : 
queryModel.getProjectionDimensions()) {
+      if (projectionDimension.getDimension().isComplex()) {
+        hasComplex = true;
+        break;
+      }
+    }
+    if (useVectorReader && !hasComplex) {
+      queryModel.setDirectVectorFill(filterExpression == null);
+      reader = new CarbonVectorizedRecordReader(queryModel);
+    } else {
+      reader = format.createRecordReader(split, attempt);
+    }
     try {
+      reader.initialize(split, attempt);
+    } catch (Exception e) {
+      CarbonUtil.closeStreams(readers.toArray(new RecordReader[0]));
+      throw e;
+    }
+    return reader;
+  }
 
-      if (filterExpression == null) {
-        job.getConfiguration().set("filter_blocks", "false");
-      }
+  /**
+   * Build CarbonReader
+   *
+   * @param <T>
+   * @return CarbonReader
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public <T> CarbonReader<T> build()
+      throws IOException, InterruptedException {
+    if (hadoopConf == null) {
+      hadoopConf = FileFactory.getConfiguration();
+    }
+    final Job job = new Job(new JobConf(hadoopConf));
+    CarbonFileInputFormat format = prepareFileInputFormat(job, false, true);
+    try {
       List<InputSplit> splits =
           format.getSplits(new JobContextImpl(job.getConfiguration(), new 
JobID()));
       List<RecordReader<Void, T>> readers = new ArrayList<>(splits.size());
       for (InputSplit split : splits) {
-        TaskAttemptContextImpl attempt =
-            new TaskAttemptContextImpl(job.getConfiguration(), new 
TaskAttemptID());
-        RecordReader reader;
-        QueryModel queryModel = format.createQueryModel(split, attempt);
-        boolean hasComplex = false;
-        for (ProjectionDimension projectionDimension : 
queryModel.getProjectionDimensions()) {
-          if (projectionDimension.getDimension().isComplex()) {
-            hasComplex = true;
-            break;
-          }
-        }
-        if (useVectorReader && !hasComplex) {
-          queryModel.setDirectVectorFill(filterExpression == null);
-          reader = new CarbonVectorizedRecordReader(queryModel);
-        } else {
-          reader = format.createRecordReader(split, attempt);
-        }
-        try {
-          reader.initialize(split, attempt);
-          readers.add(reader);
-        } catch (Exception e) {
-          CarbonUtil.closeStreams(readers.toArray(new RecordReader[0]));
-          throw e;
-        }
+        RecordReader reader = getRecordReader(job, format, readers, split);
+        readers.add(reader);
       }
       return new CarbonReader<>(readers);
     } catch (Exception ex) {
       // Clear the datamap cache as it can get added in getSplits() method
       DataMapStoreManager.getInstance()
-          .clearDataMaps(table.getAbsoluteTableIdentifier());
+          .clearDataMaps(format.getAbsoluteTableIdentifier(hadoopConf));
       throw ex;
     }
   }
 
+  /**
+   * Build blocklet level CarbonReaders
+   *
+   * @param <T>
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public <T> CarbonReader<T>[] buildBlockletReaders() throws IOException, 
InterruptedException {
 
 Review comment:
   This interface is removed now and just uses the builld().
   
   but we have now  getplits() interface, it takes Boolean to set cache level 
to block or blocklet.
   
   `public InputSplit[] getSplits(boolean enableBlockletDistribution) throws 
IOException`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3196: [CARBONDATA-3366] Support SDK reader to read blocklet level split

Reply via email to