================
@@ -240,6 +241,121 @@ inline std::string StripAnsiTerminalCodes(llvm::StringRef
str) {
return stripped;
}
+inline size_t ColumnWidth(llvm::StringRef str) {
+ std::string stripped = ansi::StripAnsiTerminalCodes(str);
+ return llvm::sys::locale::columnWidth(stripped);
+}
+
+/// Trim the given string to the given visible length, at a word boundary.
+/// Visible length means its width when rendered to the terminal.
+/// The string can include ANSI codes and Unicode.
+///
+/// For a single word string, that word is returned in its entirety regardless
+/// of it's visible length.
+///
+/// This function is similar to TrimAndPad, except that it must split on a word
+/// boundary. So there are some noteable differences:
+/// * Has a special case for single words that exceed desired visible
+/// length.
+/// * Must track whether the most recent modifications was on a word boundary
+/// or not.
+/// * If the trimming finishes without the result ending on a word boundary,
+/// it must find the nearest boundary to that trim point by trimming more.
+inline std::string TrimAtWordBoundary(llvm::StringRef str,
+ size_t visible_length) {
+ str = str.trim();
+ if (str.empty())
+ return str.str();
+
+ auto first_whitespace = str.find_first_of(" \t\n");
+ // No whitespace means a single word, which we cannot split.
+ if (first_whitespace == llvm::StringRef::npos)
+ return str.str();
+
+ // If the first word of a multi-word string is too wide, return that whole
+ // word only.
+ auto to_first_word_boundary = str.substr(0, first_whitespace);
+ // We use ansi::ColumnWidth here because it can handle ANSI and Unicode.
+ if (static_cast<size_t>(ansi::ColumnWidth(to_first_word_boundary)) >
+ visible_length)
+ return to_first_word_boundary.str();
+
+ std::string result;
+ result.reserve(visible_length);
+ // When there is Unicode or ANSI codes, the visible length will not equal
+ // result.size(), so we track it separately.
+ size_t result_visible_length = 0;
+
+ // The loop below makes many adjustments, and we never know which will be the
+ // last. This tracks whether the most recent adjustment put us at a word
+ // boundary and is checked after the main loop.
+ bool at_word_boundary = false;
+
+ // Trim the string to the given visible length.
+ while (!str.empty() && result_visible_length < visible_length) {
+ auto [left, escape, right] = FindNextAnsiSequence(str);
+ str = right;
+
+ // We know that left does not include ANSI codes. Compute its visible
length
+ // and if it fits, append it together with the invisible escape code.
+ size_t column_width = llvm::sys::locale::columnWidth(left);
+ if (result_visible_length + column_width <= visible_length) {
+ result.append(left).append(escape);
+ result_visible_length += column_width;
+ at_word_boundary = right.empty() || std::isspace(right[0]);
+
+ continue;
+ }
+
+ // The string might contain unicode which means it's not safe to truncate.
+ // Repeatedly trim the string until it its valid unicode and fits.
+ llvm::StringRef trimmed = left;
+
+ // A word break can happen at the character we trim to, or the one we
+ // trimmed before that (we are going backwards, so before in the loop is
+ // after in the string).
+
+ // A word break can happen at the point we trim, or just beyond that point.
+ // In other words: at the current back of trimmed, or what was the back
last
+ // time around. following_char records the character popped in the previous
+ // loop iteration.
+ std::optional<char> following_char = std::nullopt;
+ while (!trimmed.empty()) {
+ int trimmed_width = llvm::sys::locale::columnWidth(trimmed);
+ if (
+ // If we have a partial Unicode character, keep trimming.
+ trimmed_width !=
+ llvm::sys::unicode::ColumnWidthErrors::ErrorInvalidUTF8 &&
----------------
DavidSpickett wrote:
I'm applying this fix to TrimAndPad also, in
https://github.com/llvm/llvm-project/pull/183299.
https://github.com/llvm/llvm-project/pull/183314
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits