keith-turner commented on code in PR #2811:
URL: https://github.com/apache/accumulo/pull/2811#discussion_r982208133
##########
core/src/main/java/org/apache/accumulo/core/file/rfile/RelativeKey.java:
##########
@@ -221,6 +188,17 @@ public void readFields(DataInput in) throws IOException {
this.prevKey = this.key;
}
+ private byte[] getData(DataInput in, byte same, byte commonPrefix,
Supplier<ByteSequence> data)
Review Comment:
I suggested using `fieldBit` in a previous comment instead of `rowSame`.
What ever you end using in the other method it would be good to make the
variable name here `same` consistent with the name in the other method. Could
change `rowSame` to `same` in the other method to make it consistent with this
method.
Below are suggested renamings for overall consistency.
```suggestion
private byte[] getData(DataInput in, byte fieldBit, byte commonPrefix,
Supplier<ByteSequence> fieldReader)
```
##########
core/src/main/java/org/apache/accumulo/core/file/rfile/RelativeKey.java:
##########
@@ -135,6 +118,17 @@ else if (cvCommonPrefixLen > 1)
fieldsSame |= DELETED;
}
+ private int getCommonPrefixLen(ByteSequence prevKeyScratch, ByteSequence
keyScratch, byte rowSame,
+ byte commonPrefix) {
+ int commonPrefixLen = getCommonPrefix(prevKeyScratch, keyScratch);
+ if (commonPrefixLen == -1) {
+ fieldsSame |= rowSame;
Review Comment:
Need some other variable name for `rowSame` (because it not always a row
field thats being dealt with), tried using `fieldBit` instead.
```suggestion
private int getCommonPrefixLen(ByteSequence prevKeyScratch, ByteSequence
keyScratch, byte fieldBit,
byte commonPrefix) {
int commonPrefixLen = getCommonPrefix(prevKeyScratch, keyScratch);
if (commonPrefixLen == -1) {
fieldsSame |= fieldBit;
```
##########
core/src/main/java/org/apache/accumulo/core/file/rfile/RelativeKey.java:
##########
@@ -174,40 +168,13 @@ public void readFields(DataInput in) throws IOException {
fieldsPrefixed = 0;
}
- byte[] row, cf, cq, cv;
- long ts;
-
- if ((fieldsSame & ROW_SAME) == ROW_SAME) {
- row = prevKey.getRowData().toArray();
- } else if ((fieldsPrefixed & ROW_COMMON_PREFIX) == ROW_COMMON_PREFIX) {
- row = readPrefix(in, prevKey.getRowData());
- } else {
- row = read(in);
- }
-
- if ((fieldsSame & CF_SAME) == CF_SAME) {
- cf = prevKey.getColumnFamilyData().toArray();
- } else if ((fieldsPrefixed & CF_COMMON_PREFIX) == CF_COMMON_PREFIX) {
- cf = readPrefix(in, prevKey.getColumnFamilyData());
- } else {
- cf = read(in);
- }
-
- if ((fieldsSame & CQ_SAME) == CQ_SAME) {
- cq = prevKey.getColumnQualifierData().toArray();
- } else if ((fieldsPrefixed & CQ_COMMON_PREFIX) == CQ_COMMON_PREFIX) {
- cq = readPrefix(in, prevKey.getColumnQualifierData());
- } else {
- cq = read(in);
- }
+ final byte[] row, cf, cq, cv;
+ final long ts;
- if ((fieldsSame & CV_SAME) == CV_SAME) {
- cv = prevKey.getColumnVisibilityData().toArray();
- } else if ((fieldsPrefixed & CV_COMMON_PREFIX) == CV_COMMON_PREFIX) {
- cv = readPrefix(in, prevKey.getColumnVisibilityData());
- } else {
- cv = read(in);
- }
+ row = getData(in, ROW_SAME, ROW_COMMON_PREFIX, () -> prevKey.getRowData());
Review Comment:
This is a really nice change.
##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java:
##########
@@ -396,80 +396,89 @@ Set<StoredTabletFile> getCandidates(Set<StoredTabletFile>
currFiles, CompactionK
if (isCompactionStratConfigured)
return Set.of();
- switch (selectStatus) {
- case NOT_ACTIVE:
- case CANCELED: {
- Set<StoredTabletFile> candidates = new HashSet<>(currFiles);
- candidates.removeAll(allCompactingFiles);
- return Collections.unmodifiableSet(candidates);
- }
- case NEW:
- case SELECTING:
- return Set.of();
- case SELECTED: {
- Set<StoredTabletFile> candidates = new HashSet<>(currFiles);
- candidates.removeAll(allCompactingFiles);
- if (getNanoTime() - selectedTimeNanos <
selectedExpirationDuration.toNanos()) {
- candidates.removeAll(selectedFiles);
- }
- return Collections.unmodifiableSet(candidates);
- }
- case RESERVED: {
- Set<StoredTabletFile> candidates = new HashSet<>(currFiles);
- candidates.removeAll(allCompactingFiles);
- candidates.removeAll(selectedFiles);
- return Collections.unmodifiableSet(candidates);
- }
- default:
- throw new AssertionError();
- }
+ return handleSystemCompaction(currFiles);
}
case SELECTOR:
// intentional fall through
case USER:
- switch (selectStatus) {
- case NOT_ACTIVE:
- case NEW:
- case SELECTING:
- case CANCELED:
- return Set.of();
- case SELECTED:
- case RESERVED: {
- if (selectKind == kind) {
- Set<StoredTabletFile> candidates = new
HashSet<>(selectedFiles);
- candidates.removeAll(allCompactingFiles);
- candidates = Collections.unmodifiableSet(candidates);
- Preconditions.checkState(currFiles.containsAll(candidates),
- "selected files not in all files %s %s", candidates,
currFiles);
- return candidates;
- } else {
- return Set.of();
- }
- }
- default:
- throw new AssertionError();
- }
+ return handleUserSelectorCompaction(currFiles, kind);
case CHOP: {
- switch (chopStatus) {
- case NOT_ACTIVE:
- case SELECTING:
- case MARKING:
- return Set.of();
- case SELECTED: {
- if (selectStatus == FileSelectionStatus.NEW
- || selectStatus == FileSelectionStatus.SELECTING)
- return Set.of();
-
- var filesToChop = getFilesToChop(currFiles);
- filesToChop.removeAll(allCompactingFiles);
- if (selectStatus == FileSelectionStatus.SELECTED
- || selectStatus == FileSelectionStatus.RESERVED)
- filesToChop.removeAll(selectedFiles);
- return Collections.unmodifiableSet(filesToChop);
- }
- default:
- throw new AssertionError();
+ return handleChopCompaction(currFiles);
+ }
+ default:
+ throw new AssertionError();
+ }
+ }
+
+ private Set<StoredTabletFile> handleChopCompaction(Set<StoredTabletFile>
currFiles) {
+ switch (chopStatus) {
+ case NOT_ACTIVE:
+ case SELECTING:
+ case MARKING:
+ return Set.of();
+ case SELECTED: {
+ if (selectStatus == FileSelectionStatus.NEW
+ || selectStatus == FileSelectionStatus.SELECTING)
+ return Set.of();
+
+ var filesToChop = getFilesToChop(currFiles);
+ filesToChop.removeAll(allCompactingFiles);
+ if (selectStatus == FileSelectionStatus.SELECTED
+ || selectStatus == FileSelectionStatus.RESERVED)
+ filesToChop.removeAll(selectedFiles);
+ return Collections.unmodifiableSet(filesToChop);
+ }
+ default:
+ throw new AssertionError();
+ }
+ }
+
+ private Set<StoredTabletFile>
handleUserSelectorCompaction(Set<StoredTabletFile> currFiles,
+ CompactionKind kind) {
+ switch (selectStatus) {
+ case NOT_ACTIVE:
+ case NEW:
+ case SELECTING:
+ case CANCELED:
+ return Set.of();
+ case SELECTED:
+ case RESERVED: {
+ if (selectKind == kind) {
+ Set<StoredTabletFile> candidates = Sets.difference(selectedFiles,
allCompactingFiles);
+ Preconditions.checkState(currFiles.containsAll(candidates),
+ "selected files not in all files %s %s", candidates,
currFiles);
+ return Collections.unmodifiableSet(candidates);
Review Comment:
This code is called in a sync block in which `selectedFiles` and
`allCompactingFiles` will not be changing. The functions `Sets.difference()`
and `Collections.unmodifiableSet` may keep references to the passed in sets but
will not copy. So this function may return a set that internally references
`selectedFiles` and `allCompactingFiles` which may be changing. Its therefore
important to copy as the original code did.
The following change will copy. I have no idea if its less or more
efficient than the original code, but I think `Sets.difference()` plus
`Set.copyOf()` is more readable than the original code so I would go with it.
```suggestion
//must create a copy because the sets passed to Sets.difference
could change after this method returns
return Set.copyOf(candidates);
```
I looked around at the other changes and I think all of those copy, but it
would be good if you could double check for any other cases like this in the
changes in this file.
##########
server/tserver/src/main/java/org/apache/accumulo/tserver/scan/LookupTask.java:
##########
@@ -95,9 +95,14 @@ public void run() {
// check the time so that the read ahead thread is not monopolized
while (iter.hasNext() && bytesAdded < maxResultsSize
&& (System.currentTimeMillis() - startTime) < maxScanTime) {
- Entry<KeyExtent,List<Range>> entry = iter.next();
- KeyExtent extent = entry.getKey();
- List<Range> ranges = entry.getValue();
+ final KeyExtent extent;
+ final List<Range> ranges;
+ {
+ final Entry<KeyExtent,List<Range>> entry = iter.next();
+ extent = entry.getKey();
+ ranges = entry.getValue();
+ }
Review Comment:
Was this little code block created just to narrowly scope `entry`?
##########
core/src/main/java/org/apache/accumulo/core/iteratorsImpl/system/LocalityGroupIterator.java:
##########
@@ -207,35 +208,33 @@ static final Collection<LocalityGroup> _seek(HeapIterator
hiter, LocalityGroupCo
* other
*/
if (!inclusive) {
- for (Entry<ByteSequence,LocalityGroup> entry :
lgContext.groupByCf.entrySet()) {
- if (!cfSet.contains(entry.getKey())) {
- groups.add(entry.getValue());
- }
- }
+ lgContext.groupByCf.entrySet().stream().filter(entry ->
!cfSet.contains(entry.getKey()))
+ .map(Entry::getValue).forEach(groups::add);
} else if (lgContext.groupByCf.size() <= cfSet.size()) {
- for (Entry<ByteSequence,LocalityGroup> entry :
lgContext.groupByCf.entrySet()) {
- if (cfSet.contains(entry.getKey())) {
- groups.add(entry.getValue());
- }
- }
+ lgContext.groupByCf.entrySet().stream().filter(entry ->
cfSet.contains(entry.getKey()))
+ .map(Entry::getValue).forEach(groups::add);
} else {
- for (ByteSequence cf : cfSet) {
- LocalityGroup group = lgContext.groupByCf.get(cf);
- if (group != null) {
- groups.add(group);
- }
- }
+
cfSet.stream().map(lgContext.groupByCf::get).filter(Objects::nonNull).forEach(groups::add);
}
}
- for (LocalityGroup lgr : groups) {
- lgr.getIterator().seek(range, EMPTY_CF_SET, false);
- hiter.addSource(lgr.getIterator());
- }
-
return groups;
}
+ private static Set<ByteSequence> getCfSet(Collection<ByteSequence>
columnFamilies) {
+ final Set<ByteSequence> cfSet;
+ if (columnFamilies.isEmpty()) {
+ cfSet = Collections.emptySet();
+ } else {
+ if (columnFamilies instanceof Set<?>) {
+ cfSet = (Set<ByteSequence>) columnFamilies;
+ } else {
+ cfSet = new HashSet<>(columnFamilies);
Review Comment:
Could do the following, might be better since its a fixed size set that is
not later mutated.
```suggestion
cfSet = Set.copyOf(columnFamilies);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]