Github user Ethanlm commented on a diff in the pull request:
https://github.com/apache/storm/pull/2754#discussion_r208726809
--- Diff:
storm-webapp/src/main/java/org/apache/storm/daemon/logviewer/handler/LogviewerLogSearchHandler.java
---
@@ -388,63 +414,83 @@ private Integer tryParseIntParam(String paramName,
String value) throws InvalidR
}
}
+ /**
+ * Find the first N matches of target string in files.
+ * @param logs all candidate log files to search
+ * @param numMatches number of matches expected
+ * @param fileOffset
Unclear metrics
+ * @param startByteOffset number of byte to be ignored in each log file
+ * @param targetStr searched string
+ * @return all matched results
+ */
@VisibleForTesting
- Matched findNMatches(List<File> logs, int numMatches, int fileOffset,
int offset, String search) {
+ Matched findNMatches(List<File> logs, int numMatches, int fileOffset,
int startByteOffset, String targetStr) {
logs = drop(logs, fileOffset);
+ LOG.debug("{} files to scan", logs.size());
List<Map<String, Object>> matches = new ArrayList<>();
int matchCount = 0;
+ int scannedFiles = 0;
+ //TODO: Unnecessarily convoluted loop that should be optimized
while (true) {
if (logs.isEmpty()) {
break;
}
File firstLog = logs.get(0);
- Map<String, Object> theseMatches;
+ Map<String, Object> matchInLog;
try {
LOG.debug("Looking through {}", firstLog);
- theseMatches = substringSearch(firstLog, search,
numMatches - matchCount, offset);
+ matchInLog = substringSearch(firstLog, targetStr,
numMatches - matchCount, startByteOffset);
+ scannedFiles++;
} catch (InvalidRequestException e) {
LOG.error("Can't search past end of file.", e);
- theseMatches = new HashMap<>();
+ matchInLog = new HashMap<>();
}
String fileName =
WorkerLogs.getTopologyPortWorkerLog(firstLog);
+ //This section simply put the formatted log filename and
corresponding port in the matching.
final List<Map<String, Object>> newMatches = new
ArrayList<>(matches);
- Map<String, Object> currentFileMatch = new
HashMap<>(theseMatches);
+ Map<String, Object> currentFileMatch = new
HashMap<>(matchInLog);
currentFileMatch.put("fileName", fileName);
Path firstLogAbsPath;
try {
firstLogAbsPath = firstLog.getCanonicalFile().toPath();
} catch (IOException e) {
throw new RuntimeException(e);
}
+ //Why do we need to start from scratch to retrieve just the
port here?
currentFileMatch.put("port",
truncatePathToLastElements(firstLogAbsPath, 2).getName(0).toString());
newMatches.add(currentFileMatch);
- int newCount = matchCount +
((List<?>)theseMatches.get("matches")).size();
+ int newCount = matchCount +
((List<?>)matchInLog.get("matches")).size();
- //theseMatches is never empty! As guaranteed by the
#get().size() method above
+ //matchInLog is never empty! As guaranteed by the
#get().size() method above
if (newCount == matchCount) {
// matches and matchCount is not changed
logs = rest(logs);
- offset = 0;
+ startByteOffset = 0;
fileOffset = fileOffset + 1;
} else if (newCount >= numMatches) {
matches = newMatches;
break;
} else {
matches = newMatches;
logs = rest(logs);
- offset = 0;
+ startByteOffset = 0;
fileOffset = fileOffset + 1;
matchCount = newCount;
}
}
- return new Matched(fileOffset, search, matches);
+ LOG.info("scanned {} files", scannedFiles);
+ //fileOffset is not being used and it behaves inconsistently
(showing
+ // (index of files search ends on - 1) if [enough matches] else
(index of files search ends on))
+ // I don't think we should expose the data to public if it's not
used.
+ // can I dropped this field or change its behavior so it's used
for metrics [numScannedFiles]?
--- End diff --
You can file a separate jira for this
---