Whilst investigating PR preprocessor/78324 I noticed that the
substring location code currently doesn't handle raw strings
correctly, by not skipping the 'R', opening quote, delimiter
and opening parenthesis.

For example, an attempt to underline chars 4-7 with caret at 6 of
this raw string yields this erroneous output:
   __emit_string_literal_range (R"foo(0123456789)foo",
                                    ~~^~

With the patch, the correct range/caret is printed:

   __emit_string_literal_range (R"foo(0123456789)foo",
                                          ~~^~

Successfully bootstrapped&regrtested on x86_64-pc-linux-gnu.

Committed to trunk as r242552.

gcc/ChangeLog:
        * input.c (selftest::test_lexer_string_locations_long_line): New
        function.
        (selftest::test_lexer_string_locations_raw_string_multiline): New
        function.
        (selftest::input_c_tests): Call the new functions, via
        for_each_line_table_case.

gcc/testsuite/ChangeLog:
        * gcc.dg/plugin/diagnostic-test-string-literals-1.c
        (test_raw_string_one_liner): New function.
        (test_raw_string_multiline): New function.

libcpp/ChangeLog:
        * charset.c (cpp_interpret_string_1): Skip locations from
        loc_reader when advancing 'p' when handling raw strings.
---
 gcc/input.c                                        | 74 ++++++++++++++++++++++
 .../plugin/diagnostic-test-string-literals-1.c     | 33 ++++++++++
 libcpp/charset.c                                   | 13 +++-
 3 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/gcc/input.c b/gcc/input.c
index c2042e8..728f4dd 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -3156,6 +3156,78 @@ test_lexer_string_locations_long_line (const 
line_table_case &case_)
                          i, 2, 7 + i, 7 + i);
 }
 
+/* Test of locations within a raw string that doesn't contain a newline.  */
+
+static void
+test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
+{
+  /* .....................00.0000000111111111122.
+     .....................12.3456789012345678901.  */
+  const char *content = ("R\"foo(0123456789)foo\"\n");
+  lexer_test test (case_, content, NULL);
+
+  /* Verify that we get the expected token back.  */
+  const cpp_token *tok = test.get_token ();
+  ASSERT_EQ (tok->type, CPP_STRING);
+
+  /* Verify that cpp_interpret_string works.  */
+  cpp_string dst_string;
+  const enum cpp_ttype type = CPP_STRING;
+  bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
+                                     &dst_string, type);
+  ASSERT_TRUE (result);
+  ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
+  free (const_cast <unsigned char *> (dst_string.text));
+
+  if (!should_have_column_data_p (line_table->highest_location))
+    return;
+
+  /* 0-9, plus the nil terminator.  */
+  ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
+  for (int i = 0; i < 11; i++)
+    ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
+                         i, 1, 7 + i, 7 + i);
+}
+
+/* Test of locations within a raw string that contains a newline.  */
+
+static void
+test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
+{
+  /* .....................00.0000.
+     .....................12.3456.  */
+  const char *content = ("R\"foo(\n"
+  /* .....................00000.
+     .....................12345.  */
+                        "hello\n"
+                        "world\n"
+  /* .....................00000.
+     .....................12345.  */
+                        ")foo\"\n");
+  lexer_test test (case_, content, NULL);
+
+  /* Verify that we get the expected token back.  */
+  const cpp_token *tok = test.get_token ();
+  ASSERT_EQ (tok->type, CPP_STRING);
+
+  /* Verify that cpp_interpret_string works.  */
+  cpp_string dst_string;
+  const enum cpp_ttype type = CPP_STRING;
+  bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
+                                     &dst_string, type);
+  ASSERT_TRUE (result);
+  ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
+  free (const_cast <unsigned char *> (dst_string.text));
+
+  if (!should_have_column_data_p (line_table->highest_location))
+    return;
+
+  /* Currently we don't support locations within raw strings that
+     contain newlines.  */
+  ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
+                                 "range endpoints are on different lines");
+}
+
 /* Test of lexing char constants.  */
 
 static void
@@ -3297,6 +3369,8 @@ input_c_tests ()
   for_each_line_table_case 
(test_lexer_string_locations_stringified_macro_argument);
   for_each_line_table_case (test_lexer_string_locations_non_string);
   for_each_line_table_case (test_lexer_string_locations_long_line);
+  for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
+  for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
   for_each_line_table_case (test_lexer_char_constants);
 
   test_reading_source_line ();
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c 
b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
index 3d5ff6d..3e44936 100644
--- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
@@ -194,6 +194,39 @@ test_L (void)
 }
 
 void
+test_raw_string_one_liner (void)
+{
+  /* Digits 0-9.  */
+  __emit_string_literal_range (R"foo(0123456789)foo", /* { dg-warning "range" 
} */
+                              6, 4, 7);
+/* { dg-begin-multiline-output "" }
+   __emit_string_literal_range (R"foo(0123456789)foo",
+                                          ~~^~
+   { dg-end-multiline-output "" } */
+}
+
+void
+test_raw_string_multiline (void)
+{
+  __emit_string_literal_range (R"foo(
+hello
+world
+)foo",
+                              6, 4, 7);
+  /* { dg-error "unable to read substring location: range endpoints are on 
different lines" "" { target *-*-* } .-5 } */
+  /* { dg-begin-multiline-output "" }
+   __emit_string_literal_range (R"foo(
+                                ^~~~~~
+ hello
+ ~~~~~                           
+ world
+ ~~~~~                           
+ )foo",
+ ~~~~~                           
+   { dg-end-multiline-output "" } */
+}
+
+void
 test_macro (void)
 {
 #define START "01234"  /* { dg-warning "range" } */
diff --git a/libcpp/charset.c b/libcpp/charset.c
index e77270a..9cd1e10 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -1564,10 +1564,21 @@ cpp_interpret_string_1 (cpp_reader *pfile, const 
cpp_string *from, size_t count,
 
          /* Skip over 'R"'.  */
          p += 2;
+         if (loc_reader)
+           {
+             loc_reader->get_next ();
+             loc_reader->get_next ();
+           }
          prefix = p;
          while (*p != '(')
-           p++;
+           {
+             p++;
+             if (loc_reader)
+               loc_reader->get_next ();
+           }
          p++;
+         if (loc_reader)
+           loc_reader->get_next ();
          limit = from[i].text + from[i].len;
          if (limit >= p + (p - prefix) + 1)
            limit -= (p - prefix) + 1;
-- 
1.8.5.3

Reply via email to