QiangCai commented on a change in pull request #3816:
URL: https://github.com/apache/carbondata/pull/3816#discussion_r447373586



##########
File path: 
hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
##########
@@ -232,47 +233,40 @@
   private List<Segment> getFilteredSegment(JobContext job, List<Segment> 
validSegments,
       boolean validationRequired, ReadCommittedScope readCommittedScope) {
     Segment[] segmentsToAccess = getSegmentsToAccess(job, readCommittedScope);
-    List<Segment> segmentToAccessSet =
-        new ArrayList<>(new HashSet<>(Arrays.asList(segmentsToAccess)));
-    List<Segment> filteredSegmentToAccess = new ArrayList<>();
     if (segmentsToAccess.length == 0 || 
segmentsToAccess[0].getSegmentNo().equalsIgnoreCase("*")) {
-      filteredSegmentToAccess.addAll(validSegments);
+      return validSegments;
     } else {

Review comment:
       remove this line

##########
File path: 
hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
##########
@@ -232,47 +233,40 @@
   private List<Segment> getFilteredSegment(JobContext job, List<Segment> 
validSegments,
       boolean validationRequired, ReadCommittedScope readCommittedScope) {
     Segment[] segmentsToAccess = getSegmentsToAccess(job, readCommittedScope);
-    List<Segment> segmentToAccessSet =
-        new ArrayList<>(new HashSet<>(Arrays.asList(segmentsToAccess)));
-    List<Segment> filteredSegmentToAccess = new ArrayList<>();
     if (segmentsToAccess.length == 0 || 
segmentsToAccess[0].getSegmentNo().equalsIgnoreCase("*")) {
-      filteredSegmentToAccess.addAll(validSegments);
+      return validSegments;
     } else {
+      Map<String, Segment> segmentToAccessMap = new 
HashMap<>(segmentsToAccess.length);
+      for (Segment segment : segmentsToAccess) {
+        segmentToAccessMap.put(segment.getSegmentNo(), segment);
+      }
+      Map<String, Segment> filteredSegmentToAccess = new HashMap<>();
       for (Segment validSegment : validSegments) {
-        int index = segmentToAccessSet.indexOf(validSegment);
-        if (index > -1) {
-          // In case of in progress reading segment, segment file name is set 
to the property itself
-          if (segmentToAccessSet.get(index).getSegmentFileName() != null
-              && validSegment.getSegmentFileName() == null) {
-            filteredSegmentToAccess.add(segmentToAccessSet.get(index));
+        String segmentNoOfValidSegment = validSegment.getSegmentNo();
+        if (segmentToAccessMap.containsKey(segmentNoOfValidSegment)) {
+          if (segmentToAccessMap.get(segmentNoOfValidSegment)
+              .getSegmentFileName() != null && 
validSegment.getSegmentFileName() == null) {
+            filteredSegmentToAccess.put(segmentNoOfValidSegment,
+                    segmentToAccessMap.get(segmentNoOfValidSegment));
           } else {
-            filteredSegmentToAccess.add(validSegment);
-          }
-        }
-      }
-      if (filteredSegmentToAccess.size() != segmentToAccessSet.size() && 
!validationRequired) {
-        for (Segment segment : segmentToAccessSet) {
-          if (!filteredSegmentToAccess.contains(segment)) {
-            filteredSegmentToAccess.add(segment);
+            filteredSegmentToAccess.put(segmentNoOfValidSegment, validSegment);
           }

Review comment:
       String validSegmentNo = validSegment.getSegmentNo();
         Segment segmentToAccess = segmentToAccessMap.get(validSegmentNo);
         if (null != segmentToAccess) {
           if (validSegment.getSegmentFileName() != null ||
               segmentToAccess.getSegmentFileName() == null) {
             segmentToAccess = validSegment;
           }
           filteredSegmentToAccess.put(validSegmentNo, segmentToAccess);
         }

##########
File path: 
hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
##########
@@ -232,47 +233,40 @@
   private List<Segment> getFilteredSegment(JobContext job, List<Segment> 
validSegments,
       boolean validationRequired, ReadCommittedScope readCommittedScope) {
     Segment[] segmentsToAccess = getSegmentsToAccess(job, readCommittedScope);
-    List<Segment> segmentToAccessSet =
-        new ArrayList<>(new HashSet<>(Arrays.asList(segmentsToAccess)));
-    List<Segment> filteredSegmentToAccess = new ArrayList<>();
     if (segmentsToAccess.length == 0 || 
segmentsToAccess[0].getSegmentNo().equalsIgnoreCase("*")) {
-      filteredSegmentToAccess.addAll(validSegments);
+      return validSegments;
     } else {
+      Map<String, Segment> segmentToAccessMap = new 
HashMap<>(segmentsToAccess.length);
+      for (Segment segment : segmentsToAccess) {
+        segmentToAccessMap.put(segment.getSegmentNo(), segment);
+      }
+      Map<String, Segment> filteredSegmentToAccess = new HashMap<>();
       for (Segment validSegment : validSegments) {
-        int index = segmentToAccessSet.indexOf(validSegment);
-        if (index > -1) {
-          // In case of in progress reading segment, segment file name is set 
to the property itself
-          if (segmentToAccessSet.get(index).getSegmentFileName() != null
-              && validSegment.getSegmentFileName() == null) {
-            filteredSegmentToAccess.add(segmentToAccessSet.get(index));
+        String segmentNoOfValidSegment = validSegment.getSegmentNo();
+        if (segmentToAccessMap.containsKey(segmentNoOfValidSegment)) {
+          if (segmentToAccessMap.get(segmentNoOfValidSegment)
+              .getSegmentFileName() != null && 
validSegment.getSegmentFileName() == null) {
+            filteredSegmentToAccess.put(segmentNoOfValidSegment,
+                    segmentToAccessMap.get(segmentNoOfValidSegment));
           } else {
-            filteredSegmentToAccess.add(validSegment);
-          }
-        }
-      }
-      if (filteredSegmentToAccess.size() != segmentToAccessSet.size() && 
!validationRequired) {
-        for (Segment segment : segmentToAccessSet) {
-          if (!filteredSegmentToAccess.contains(segment)) {
-            filteredSegmentToAccess.add(segment);
+            filteredSegmentToAccess.put(segmentNoOfValidSegment, validSegment);
           }
         }
       }
-      // TODO: add validation for set segments access based on valid segments 
in table status
-      if (filteredSegmentToAccess.size() != segmentToAccessSet.size() && 
!validationRequired) {
-        for (Segment segment : segmentToAccessSet) {
-          if (!filteredSegmentToAccess.contains(segment)) {
-            filteredSegmentToAccess.add(segment);
+      if (filteredSegmentToAccess.size() != segmentToAccessMap.size() && 
!validationRequired) {

Review comment:
       !validationRequired && filteredSegmentToAccess.size() != 
segmentToAccessMap.size()

##########
File path: 
hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
##########
@@ -232,47 +233,40 @@
   private List<Segment> getFilteredSegment(JobContext job, List<Segment> 
validSegments,
       boolean validationRequired, ReadCommittedScope readCommittedScope) {
     Segment[] segmentsToAccess = getSegmentsToAccess(job, readCommittedScope);
-    List<Segment> segmentToAccessSet =
-        new ArrayList<>(new HashSet<>(Arrays.asList(segmentsToAccess)));
-    List<Segment> filteredSegmentToAccess = new ArrayList<>();
     if (segmentsToAccess.length == 0 || 
segmentsToAccess[0].getSegmentNo().equalsIgnoreCase("*")) {
-      filteredSegmentToAccess.addAll(validSegments);
+      return validSegments;
     } else {
+      Map<String, Segment> segmentToAccessMap = new 
HashMap<>(segmentsToAccess.length);
+      for (Segment segment : segmentsToAccess) {
+        segmentToAccessMap.put(segment.getSegmentNo(), segment);
+      }

Review comment:
       Map<String, Segment> segmentToAccessMap = 
Arrays.stream(segmentsToAccess).collect(Collectors.toMap(Segment::getSegmentNo, 
segment -> segment));




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to