This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new c5f464f  [CARBONDATA-4050]Avoid redundant RPC calls to get file status 
when CarbonFile is instantiated with fileStatus constructor
c5f464f is described below

commit c5f464fab7117000aa76150c30a616ca02b5d443
Author: Venu Reddy <[email protected]>
AuthorDate: Thu Nov 19 00:33:20 2020 +0530

    [CARBONDATA-4050]Avoid redundant RPC calls to get file status when 
CarbonFile is
    instantiated with fileStatus constructor
    
    Why is this PR needed?
    In createCarbonDataFileBlockMetaInfoMapping method, we get list of 
carbondata
    files in the segment, loop through all the carbon files and make a map of
    fileNameToMetaInfoMapping<path-string, BlockMetaInfo>
    
    In that carbon files loop, if the file is of AbstractDFSCarbonFile type, we 
get
    the org.apache.hadoop.fs.FileStatus thrice for each file. And the method to 
get
    file status is an RPC call(fileSystem.getFileStatus(path)). It takes ~2ms 
in the
    cluster for each call. Thus, incur an overhead of ~6ms per file. So overall 
driver
    side query processing time has increased significantly when there are more 
carbon
    files. Hence caused TPC-DS queries performance degradation.
    
    What changes were proposed in this PR?
    Avoided redundant RPC calls to get file status in getAbsolutePath(), 
getSize() and
    getLocations() methods when CarbonFile is instantiated with FileStatus 
constructor
    
    This closes #4010
---
 .../core/datastore/filesystem/AbstractDFSCarbonFile.java      | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
index 338076e..25337c7 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
@@ -107,6 +107,9 @@ public abstract class AbstractDFSCarbonFile implements 
CarbonFile {
   @Override
   public String getAbsolutePath() {
     try {
+      if (fileStatus != null) {
+        return fileStatus.getPath().toString();
+      }
       return fileSystem.getFileStatus(path).getPath().toString();
     } catch (IOException e) {
       throw new CarbonFileException("Unable to get file status: ", e);
@@ -155,6 +158,9 @@ public abstract class AbstractDFSCarbonFile implements 
CarbonFile {
   @Override
   public long getSize() {
     try {
+      if (fileStatus != null) {
+        return fileStatus.getLen();
+      }
       return fileSystem.getFileStatus(path).getLen();
     } catch (IOException e) {
       throw new CarbonFileException("Unable to get file status for " + 
path.toString(), e);
@@ -541,7 +547,10 @@ public abstract class AbstractDFSCarbonFile implements 
CarbonFile {
   @Override
   public String[] getLocations() throws IOException {
     BlockLocation[] blkLocations;
-    FileStatus fileStatus = fileSystem.getFileStatus(path);
+    FileStatus fileStatus = this.fileStatus;
+    if (fileStatus == null) {
+      fileStatus = fileSystem.getFileStatus(path);
+    }
     if (fileStatus instanceof LocatedFileStatus) {
       blkLocations = ((LocatedFileStatus) fileStatus).getBlockLocations();
     } else {

Reply via email to