keith-turner commented on code in PR #2811:
URL: https://github.com/apache/accumulo/pull/2811#discussion_r982208133


##########
core/src/main/java/org/apache/accumulo/core/file/rfile/RelativeKey.java:
##########
@@ -221,6 +188,17 @@ public void readFields(DataInput in) throws IOException {
     this.prevKey = this.key;
   }
 
+  private byte[] getData(DataInput in, byte same, byte commonPrefix, 
Supplier<ByteSequence> data)

Review Comment:
   I suggested using `fieldBit` in a previous comment instead of `rowSame`.  
What ever you end using in the other method it would be good to make the 
variable name here `same` consistent with the name in the other method.  Could 
change `rowSame` to `same` in the other method to make it consistent with this 
method.
   
   Below are suggested renamings for overall consistency.
   
   ```suggestion
     private byte[] getData(DataInput in, byte fieldBit, byte commonPrefix, 
Supplier<ByteSequence> fieldReader)
   ```



##########
core/src/main/java/org/apache/accumulo/core/file/rfile/RelativeKey.java:
##########
@@ -135,6 +118,17 @@ else if (cvCommonPrefixLen > 1)
       fieldsSame |= DELETED;
   }
 
+  private int getCommonPrefixLen(ByteSequence prevKeyScratch, ByteSequence 
keyScratch, byte rowSame,
+      byte commonPrefix) {
+    int commonPrefixLen = getCommonPrefix(prevKeyScratch, keyScratch);
+    if (commonPrefixLen == -1) {
+      fieldsSame |= rowSame;

Review Comment:
   Need some other variable name for `rowSame` (because it not always a row 
field thats being dealt with), tried using `fieldBit` instead.
   
   ```suggestion
     private int getCommonPrefixLen(ByteSequence prevKeyScratch, ByteSequence 
keyScratch, byte fieldBit,
         byte commonPrefix) {
       int commonPrefixLen = getCommonPrefix(prevKeyScratch, keyScratch);
       if (commonPrefixLen == -1) {
         fieldsSame |= fieldBit;
   ```



##########
core/src/main/java/org/apache/accumulo/core/file/rfile/RelativeKey.java:
##########
@@ -174,40 +168,13 @@ public void readFields(DataInput in) throws IOException {
       fieldsPrefixed = 0;
     }
 
-    byte[] row, cf, cq, cv;
-    long ts;
-
-    if ((fieldsSame & ROW_SAME) == ROW_SAME) {
-      row = prevKey.getRowData().toArray();
-    } else if ((fieldsPrefixed & ROW_COMMON_PREFIX) == ROW_COMMON_PREFIX) {
-      row = readPrefix(in, prevKey.getRowData());
-    } else {
-      row = read(in);
-    }
-
-    if ((fieldsSame & CF_SAME) == CF_SAME) {
-      cf = prevKey.getColumnFamilyData().toArray();
-    } else if ((fieldsPrefixed & CF_COMMON_PREFIX) == CF_COMMON_PREFIX) {
-      cf = readPrefix(in, prevKey.getColumnFamilyData());
-    } else {
-      cf = read(in);
-    }
-
-    if ((fieldsSame & CQ_SAME) == CQ_SAME) {
-      cq = prevKey.getColumnQualifierData().toArray();
-    } else if ((fieldsPrefixed & CQ_COMMON_PREFIX) == CQ_COMMON_PREFIX) {
-      cq = readPrefix(in, prevKey.getColumnQualifierData());
-    } else {
-      cq = read(in);
-    }
+    final byte[] row, cf, cq, cv;
+    final long ts;
 
-    if ((fieldsSame & CV_SAME) == CV_SAME) {
-      cv = prevKey.getColumnVisibilityData().toArray();
-    } else if ((fieldsPrefixed & CV_COMMON_PREFIX) == CV_COMMON_PREFIX) {
-      cv = readPrefix(in, prevKey.getColumnVisibilityData());
-    } else {
-      cv = read(in);
-    }
+    row = getData(in, ROW_SAME, ROW_COMMON_PREFIX, () -> prevKey.getRowData());

Review Comment:
   This is a really nice change.



##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java:
##########
@@ -396,80 +396,89 @@ Set<StoredTabletFile> getCandidates(Set<StoredTabletFile> 
currFiles, CompactionK
           if (isCompactionStratConfigured)
             return Set.of();
 
-          switch (selectStatus) {
-            case NOT_ACTIVE:
-            case CANCELED: {
-              Set<StoredTabletFile> candidates = new HashSet<>(currFiles);
-              candidates.removeAll(allCompactingFiles);
-              return Collections.unmodifiableSet(candidates);
-            }
-            case NEW:
-            case SELECTING:
-              return Set.of();
-            case SELECTED: {
-              Set<StoredTabletFile> candidates = new HashSet<>(currFiles);
-              candidates.removeAll(allCompactingFiles);
-              if (getNanoTime() - selectedTimeNanos < 
selectedExpirationDuration.toNanos()) {
-                candidates.removeAll(selectedFiles);
-              }
-              return Collections.unmodifiableSet(candidates);
-            }
-            case RESERVED: {
-              Set<StoredTabletFile> candidates = new HashSet<>(currFiles);
-              candidates.removeAll(allCompactingFiles);
-              candidates.removeAll(selectedFiles);
-              return Collections.unmodifiableSet(candidates);
-            }
-            default:
-              throw new AssertionError();
-          }
+          return handleSystemCompaction(currFiles);
         }
         case SELECTOR:
           // intentional fall through
         case USER:
-          switch (selectStatus) {
-            case NOT_ACTIVE:
-            case NEW:
-            case SELECTING:
-            case CANCELED:
-              return Set.of();
-            case SELECTED:
-            case RESERVED: {
-              if (selectKind == kind) {
-                Set<StoredTabletFile> candidates = new 
HashSet<>(selectedFiles);
-                candidates.removeAll(allCompactingFiles);
-                candidates = Collections.unmodifiableSet(candidates);
-                Preconditions.checkState(currFiles.containsAll(candidates),
-                    "selected files not in all files %s %s", candidates, 
currFiles);
-                return candidates;
-              } else {
-                return Set.of();
-              }
-            }
-            default:
-              throw new AssertionError();
-          }
+          return handleUserSelectorCompaction(currFiles, kind);
         case CHOP: {
-          switch (chopStatus) {
-            case NOT_ACTIVE:
-            case SELECTING:
-            case MARKING:
-              return Set.of();
-            case SELECTED: {
-              if (selectStatus == FileSelectionStatus.NEW
-                  || selectStatus == FileSelectionStatus.SELECTING)
-                return Set.of();
-
-              var filesToChop = getFilesToChop(currFiles);
-              filesToChop.removeAll(allCompactingFiles);
-              if (selectStatus == FileSelectionStatus.SELECTED
-                  || selectStatus == FileSelectionStatus.RESERVED)
-                filesToChop.removeAll(selectedFiles);
-              return Collections.unmodifiableSet(filesToChop);
-            }
-            default:
-              throw new AssertionError();
+          return handleChopCompaction(currFiles);
+        }
+        default:
+          throw new AssertionError();
+      }
+    }
+
+    private Set<StoredTabletFile> handleChopCompaction(Set<StoredTabletFile> 
currFiles) {
+      switch (chopStatus) {
+        case NOT_ACTIVE:
+        case SELECTING:
+        case MARKING:
+          return Set.of();
+        case SELECTED: {
+          if (selectStatus == FileSelectionStatus.NEW
+              || selectStatus == FileSelectionStatus.SELECTING)
+            return Set.of();
+
+          var filesToChop = getFilesToChop(currFiles);
+          filesToChop.removeAll(allCompactingFiles);
+          if (selectStatus == FileSelectionStatus.SELECTED
+              || selectStatus == FileSelectionStatus.RESERVED)
+            filesToChop.removeAll(selectedFiles);
+          return Collections.unmodifiableSet(filesToChop);
+        }
+        default:
+          throw new AssertionError();
+      }
+    }
+
+    private Set<StoredTabletFile> 
handleUserSelectorCompaction(Set<StoredTabletFile> currFiles,
+        CompactionKind kind) {
+      switch (selectStatus) {
+        case NOT_ACTIVE:
+        case NEW:
+        case SELECTING:
+        case CANCELED:
+          return Set.of();
+        case SELECTED:
+        case RESERVED: {
+          if (selectKind == kind) {
+            Set<StoredTabletFile> candidates = Sets.difference(selectedFiles, 
allCompactingFiles);
+            Preconditions.checkState(currFiles.containsAll(candidates),
+                "selected files not in all files %s %s", candidates, 
currFiles);
+            return Collections.unmodifiableSet(candidates);

Review Comment:
   This code is called in a sync block in which `selectedFiles` and 
`allCompactingFiles` will not be changing.  The functions `Sets.difference()` 
and `Collections.unmodifiableSet` may keep references to the passed in sets but 
will not copy.  So this function may return a set that internally references   
`selectedFiles` and `allCompactingFiles` which may be changing.  Its therefore 
important to copy as the original code did.
   
   The following change will copy.  I have no idea if its less or more 
efficient than the original code, but I think `Sets.difference()` plus 
`Set.copyOf()` is more readable than the original code so I would go with it. 
   
   ```suggestion
               //must create a copy because the sets passed to Sets.difference 
could change after this method returns
               return Set.copyOf(candidates);
   ```
   
   I looked around at the other changes and I think all of those copy, but it 
would be good if you could double check for any other cases like this in the 
changes in this file.



##########
server/tserver/src/main/java/org/apache/accumulo/tserver/scan/LookupTask.java:
##########
@@ -95,9 +95,14 @@ public void run() {
       // check the time so that the read ahead thread is not monopolized
       while (iter.hasNext() && bytesAdded < maxResultsSize
           && (System.currentTimeMillis() - startTime) < maxScanTime) {
-        Entry<KeyExtent,List<Range>> entry = iter.next();
-        KeyExtent extent = entry.getKey();
-        List<Range> ranges = entry.getValue();
+        final KeyExtent extent;
+        final List<Range> ranges;
+        {
+          final Entry<KeyExtent,List<Range>> entry = iter.next();
+          extent = entry.getKey();
+          ranges = entry.getValue();
+        }

Review Comment:
   Was this little code block created just to narrowly scope `entry`?



##########
core/src/main/java/org/apache/accumulo/core/iteratorsImpl/system/LocalityGroupIterator.java:
##########
@@ -207,35 +208,33 @@ static final Collection<LocalityGroup> _seek(HeapIterator 
hiter, LocalityGroupCo
        * other
        */
       if (!inclusive) {
-        for (Entry<ByteSequence,LocalityGroup> entry : 
lgContext.groupByCf.entrySet()) {
-          if (!cfSet.contains(entry.getKey())) {
-            groups.add(entry.getValue());
-          }
-        }
+        lgContext.groupByCf.entrySet().stream().filter(entry -> 
!cfSet.contains(entry.getKey()))
+            .map(Entry::getValue).forEach(groups::add);
       } else if (lgContext.groupByCf.size() <= cfSet.size()) {
-        for (Entry<ByteSequence,LocalityGroup> entry : 
lgContext.groupByCf.entrySet()) {
-          if (cfSet.contains(entry.getKey())) {
-            groups.add(entry.getValue());
-          }
-        }
+        lgContext.groupByCf.entrySet().stream().filter(entry -> 
cfSet.contains(entry.getKey()))
+            .map(Entry::getValue).forEach(groups::add);
       } else {
-        for (ByteSequence cf : cfSet) {
-          LocalityGroup group = lgContext.groupByCf.get(cf);
-          if (group != null) {
-            groups.add(group);
-          }
-        }
+        
cfSet.stream().map(lgContext.groupByCf::get).filter(Objects::nonNull).forEach(groups::add);
       }
     }
 
-    for (LocalityGroup lgr : groups) {
-      lgr.getIterator().seek(range, EMPTY_CF_SET, false);
-      hiter.addSource(lgr.getIterator());
-    }
-
     return groups;
   }
 
+  private static Set<ByteSequence> getCfSet(Collection<ByteSequence> 
columnFamilies) {
+    final Set<ByteSequence> cfSet;
+    if (columnFamilies.isEmpty()) {
+      cfSet = Collections.emptySet();
+    } else {
+      if (columnFamilies instanceof Set<?>) {
+        cfSet = (Set<ByteSequence>) columnFamilies;
+      } else {
+        cfSet = new HashSet<>(columnFamilies);

Review Comment:
   Could do the following, might be better since its a fixed size set that is 
not later mutated.
   
   ```suggestion
           cfSet = Set.copyOf(columnFamilies);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to