EdColeman commented on code in PR #4127:
URL: https://github.com/apache/accumulo/pull/4127#discussion_r1440933311
##########
core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java:
##########
@@ -312,6 +336,73 @@ public CompactionPlan makePlan(PlanningParameters params) {
}
}
+ static int getMaxTabletFiles(ServiceEnvironment.Configuration configuration)
{
+ int maxTabletFiles =
Integer.parseInt(configuration.get(Property.TABLE_FILE_MAX.getKey()));
+ if (maxTabletFiles <= 0) {
+ maxTabletFiles =
+
Integer.parseInt(configuration.get(Property.TSERV_SCAN_MAX_OPENFILES.getKey()))
- 1;
+ }
+ return maxTabletFiles;
+ }
+
+ /**
+ * Searches for the highest compaction ratio that is less than the
configured ratio that will
+ * lower the number of files.
+ */
+ private Collection<CompactableFile>
findFilesToCompactWithLowerRatio(PlanningParameters params,
+ long maxSizeToCompact, int maxTabletFiles) {
+ double lowRatio = 1.0;
+ double highRatio = params.getRatio();
+
+ Preconditions.checkArgument(highRatio >= lowRatio);
+
+ var candidates = Set.copyOf(params.getCandidates());
+ Collection<CompactableFile> found = Set.of();
+
+ int goalCompactionSize = candidates.size() - maxTabletFiles + 1;
+ if (goalCompactionSize > maxFilesToCompact) {
+ // The tablet is way over max tablet files, so multiple compactions will
be needed. Therefore,
+ // do not set a goal size for this compaction and find the largest
compaction ratio that will
+ // compact some set of files.
+ goalCompactionSize = 0;
+ }
+
+ // Do a binary search of the compaction ratios.
+ while (highRatio - lowRatio > .1) {
+ double ratioToCheck = (highRatio - lowRatio) / 2 + lowRatio;
+
+ // This is continually resorting the list of files in the following
call, could optimize this
+ var filesToCompact =
+ findDataFilesToCompact(candidates, ratioToCheck, maxFilesToCompact,
maxSizeToCompact);
+
+ log.trace("Tried ratio {} and found {} {} {}", ratioToCheck,
filesToCompact,
+ filesToCompact.size() >= goalCompactionSize, goalCompactionSize);
+
+ if (filesToCompact.isEmpty() || filesToCompact.size() <
goalCompactionSize) {
+ highRatio = ratioToCheck;
+ } else {
+ lowRatio = ratioToCheck;
+ found = filesToCompact;
+ }
+ }
+
+ if (found.isEmpty() && lowRatio == 1.0) {
+ // in this case the data must be really skewed, operator intervention
may be needed.
+ log.warn(
+ "Attempted to lower compaction ration from {} to {} for {} because
there are {} files "
+ + "and the max tablet files is {}, however no set of files to
compact were found.",
+ params.getRatio(), highRatio, params.getTableId(),
params.getCandidates().size(),
+ maxTabletFiles);
+ }
+
+ log.trace(
Review Comment:
This may be better if logged at info, maybe even warn?
The case for info would be to make sure it is showing up in the logs so that
it can be traced why the compactions are occurring when they do not meet the
configured compaction ration.
The case for warn to would be to call attention that the current compaction
configuration may not be adequate - maybe they would want to raise the number
of files, increase the allowed file size, or something else?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]