llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-lldb Author: David Spickett (DavidSpickett) <details> <summary>Changes</summary> As seen in #<!-- -->177570, this code has a bunch of corner cases, does not handle ANSI codes properly and does not handle unicode at all. That's enough to fix that we need some tests to make it clear where we're starting from. The body of OutputFormattedUsageText is moved into a utility in the AnsiTerminal.h header and tests added to the existing AnsiTerminalTest.cpp. Some results are known to be wrong. Some that cause crashes are commented out, to be enabled once fixed. --- Full diff: https://github.com/llvm/llvm-project/pull/180947.diff 3 Files Affected: - (modified) lldb/include/lldb/Utility/AnsiTerminal.h (+64) - (modified) lldb/source/Interpreter/Options.cpp (+1-50) - (modified) lldb/unittests/Utility/AnsiTerminalTest.cpp (+68) ``````````diff diff --git a/lldb/include/lldb/Utility/AnsiTerminal.h b/lldb/include/lldb/Utility/AnsiTerminal.h index aaaf94c6c5f7f..0e696e49eda0b 100644 --- a/lldb/include/lldb/Utility/AnsiTerminal.h +++ b/lldb/include/lldb/Utility/AnsiTerminal.h @@ -99,6 +99,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Locale.h" +#include "lldb/Utility/Stream.h" + #include <string> namespace lldb_private { @@ -287,6 +289,68 @@ inline size_t ColumnWidth(llvm::StringRef str) { return llvm::sys::locale::columnWidth(stripped); } +// Output text that may contain ANSI codes, word wrapped (wrapped at whitespace) +// to the given stream. The indent level of the stream is counted towards the +// output line length. +// FIXME: This contains several bugs and does not handle unicode. +inline void OutputWordWrappedLines(Stream &strm, + // FIXME: should be StringRef, but triggers + // a crash when changed. + const std::string &text, + uint32_t output_max_columns) { + const size_t visible_length = ansi::ColumnWidth(text); + + // Will it all fit on one line? + if (static_cast<uint32_t>(visible_length + strm.GetIndentLevel()) < + output_max_columns) { + // Output it as a single line. + strm.Indent(text); + strm.EOL(); + return; + } + + // We need to break it up into multiple lines. We can do this based on the + // formatted text because we know that: + // * We only break lines on whitespace, therefore we will not break in the + // middle of a Unicode character or escape code. + // * Escape codes are so far not applied to multiple words, so there is no + // risk of breaking up a phrase and the escape code being incorrectly + // applied to the indent too. + + const int max_text_width = output_max_columns - strm.GetIndentLevel() - 1; + int start = 0; + int end = start; + const int final_end = visible_length; + + while (end < final_end) { + // Don't start the 'text' on a space, since we're already outputting the + // indentation. + while ((start < final_end) && (text[start] == ' ')) + start++; + + end = start + max_text_width; + if (end > final_end) { + end = final_end; + } else { + // If we're not at the end of the text, make sure we break the line on + // white space. + while (end > start && text[end] != ' ' && text[end] != '\t' && + text[end] != '\n') + end--; + } + + const int sub_len = end - start; + if (start != 0) + strm.EOL(); + strm.Indent(); + assert(start < final_end); + assert(start + sub_len <= final_end); + strm.PutCString(llvm::StringRef(text.c_str() + start, sub_len)); + start = end + 1; + } + strm.EOL(); +} + } // namespace ansi } // namespace lldb_private diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp index 521882ab3be8c..0bda2a912e1a1 100644 --- a/lldb/source/Interpreter/Options.cpp +++ b/lldb/source/Interpreter/Options.cpp @@ -274,57 +274,8 @@ void Options::OutputFormattedUsageText(Stream &strm, } actual_text.append( ansi::FormatAnsiTerminalCodes(option_def.usage_text, use_color)); - const size_t visible_length = ansi::ColumnWidth(actual_text); - - // Will it all fit on one line? - if (static_cast<uint32_t>(visible_length + strm.GetIndentLevel()) < - output_max_columns) { - // Output it as a single line. - strm.Indent(actual_text); - strm.EOL(); - return; - } - // We need to break it up into multiple lines. We can do this based on the - // formatted text because we know that: - // * We only break lines on whitespace, therefore we will not break in the - // middle of a Unicode character or escape code. - // * Escape codes are so far not applied to multiple words, so there is no - // risk of breaking up a phrase and the escape code being incorrectly - // applied to the indent too. - - const int max_text_width = output_max_columns - strm.GetIndentLevel() - 1; - int start = 0; - int end = start; - const int final_end = visible_length; - - while (end < final_end) { - // Don't start the 'text' on a space, since we're already outputting the - // indentation. - while ((start < final_end) && (actual_text[start] == ' ')) - start++; - - end = start + max_text_width; - if (end > final_end) { - end = final_end; - } else { - // If we're not at the end of the text, make sure we break the line on - // white space. - while (end > start && actual_text[end] != ' ' && - actual_text[end] != '\t' && actual_text[end] != '\n') - end--; - } - - const int sub_len = end - start; - if (start != 0) - strm.EOL(); - strm.Indent(); - assert(start < final_end); - assert(start + sub_len <= final_end); - strm.PutCString(llvm::StringRef(actual_text.c_str() + start, sub_len)); - start = end + 1; - } - strm.EOL(); + ansi::OutputWordWrappedLines(strm, actual_text, output_max_columns); } bool Options::SupportsLongOption(const char *long_option) { diff --git a/lldb/unittests/Utility/AnsiTerminalTest.cpp b/lldb/unittests/Utility/AnsiTerminalTest.cpp index cef73ffaf9136..19c27de4cd886 100644 --- a/lldb/unittests/Utility/AnsiTerminalTest.cpp +++ b/lldb/unittests/Utility/AnsiTerminalTest.cpp @@ -9,6 +9,7 @@ #include "gtest/gtest.h" #include "lldb/Utility/AnsiTerminal.h" +#include "lldb/Utility/StreamString.h" using namespace lldb_private; @@ -116,3 +117,70 @@ TEST(AnsiTerminal, TrimAndPad) { EXPECT_EQ("12❤️4❤️", ansi::TrimAndPad("12❤️4❤️", 5)); EXPECT_EQ("12❤️45", ansi::TrimAndPad("12❤️45❤️", 5)); } + +static void TestLines(const std::string &input, int indent, + uint32_t output_max_columns, + const llvm::StringRef &expected) { + StreamString strm; + strm.SetIndentLevel(indent); + ansi::OutputWordWrappedLines(strm, input, output_max_columns); + EXPECT_EQ(expected, strm.GetString()); +} + +TEST(AnsiTerminal, OutputWordWrappedLines) { + TestLines("", 0, 0, "\n"); + TestLines("", 0, 1, "\n"); + TestLines("", 2, 1, "\n"); + + // FIXME: crashes + // TestLines("abc", 0, 1, "\n"); + // FIXME: crashes + // TestLines("abc", 0, 2, "\n"); + // FIXME: should be "ab\nc\n" + TestLines("abc", 0, 3, "\n\nc\n"); + TestLines("abc", 0, 4, "abc\n"); + // Indent is counted as using up columns. + TestLines("abc", 1, 5, " abc\n"); + // FIXME: This output is correctly indented but the content + // is mangled. + TestLines("abc", 2, 5, " \n \n c\n"); + + // FIXME: Should skip leading whitespace and result in "abc\n". + TestLines(" abc", 0, 4, "\n\nbc\n"); + + // FIXME: Should be "abc\ndef\n". + TestLines("abc def", 0, 4, "abc\n\nef\n"); + TestLines("abc def", 0, 5, "abc\ndef\n"); + // Length is 6, 7 required. Has to split at whitespace. + TestLines("abc def", 0, 6, "abc\ndef\n"); + // FIXME: This should split after abc, and not print + // more whitespace on the end of the line or the start + // of the new one. Resulting in "abc\ndef\n". + TestLines("abc def", 0, 6, "abc \ndef\n"); + + const char *fox_str = "The quick brown fox."; + TestLines(fox_str, 0, 30, "The quick brown fox.\n"); + TestLines(fox_str, 5, 30, " The quick brown fox.\n"); + TestLines(fox_str, 0, 15, "The quick\nbrown fox.\n"); + // FIXME: Trim the spaces off of the end of the first line. + TestLines("The quick brown fox.", 0, 15, + "The quick \nbrown fox.\n"); + + // As ANSI codes do not add to visible length, the results + // should be the same as the plain text verison. + const char *fox_str_ansi = "\x1B[4mT\x1B[0mhe quick brown fox."; + TestLines(fox_str_ansi, 0, 30, "\x1B[4mT\x1B[0mhe quick brown fox.\n"); + TestLines(fox_str_ansi, 5, 30, " \x1B[4mT\x1B[0mhe quick brown fox.\n"); + // FIXME: Account for ANSI codes not contributing to visible length. + TestLines(fox_str_ansi, 0, 15, "\x1B[4mT\x1B[0mhe\nquick br\n"); + + const std::string fox_str_emoji = "🦊 The quick brown fox. 🦊"; + TestLines(fox_str_emoji, 0, 30, "🦊 The quick brown fox. 🦊\n"); + // FIXME: This crashes when max columns is exactly 31. + // TestLines(fox_str_emoji, 5, 31, " 🦊 The quick brown fox. 🦊\n"); + TestLines(fox_str_emoji, 5, 32, " 🦊 The quick brown fox. 🦊\n"); + // FIXME: Final fox is missing. + TestLines(fox_str_emoji, 0, 15, "🦊 The quick\nbrown fox. \n"); + // FIXME: should not split the middle of an emoji. + TestLines("🦊🦊🦊 🦊🦊", 0, 5, "\n\n\n\n\n\n\n\n\xF0\x9F\xA6\n"); +} `````````` </details> https://github.com/llvm/llvm-project/pull/180947 _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
