On 8/6/25 9:26 AM, David Faust wrote:
CTF array encoding uses uint32 for number of elements.  This means there
is a hard upper limit on array types which the format can represent.


Hi David,

Thanks for the patch.

Few nits. But otherwise the patch looks good. As you noted in the bugzilla, more fixes will be needed to fix the ctf_die_bitsize () return type and its consumers...

GCC internally was also using a uint32_t for this, which would overflow
when translating from DWARF for arrays with more with more than

typo: "with more with more"

UINT32_MAX elements.  Use an unsigned HOST_WIDE_INT instead to fetch
the array bound, and fall back to CTF_K_UNKNOWN if the array cannot
be represented in CTF.

Tested on x86_64-pc-linux-gnu.

        PR debug/121411

gcc/

        * dwarf2ctf.cc (gen_ctf_subrange_type): Use unsigned HWI for
        array_num_elements.  Fallback to CTF_K_UNKNOWN if the array
        type has too many elements for CTF to represent.

gcc/testsuite/

        * gcc.dg/debug/ctf/ctf-array-7.c: New test.
---
  gcc/dwarf2ctf.cc                             | 12 +++++++---
  gcc/testsuite/gcc.dg/debug/ctf/ctf-array-7.c | 23 ++++++++++++++++++++
  2 files changed, 32 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-7.c

diff --git a/gcc/dwarf2ctf.cc b/gcc/dwarf2ctf.cc
index 7de3696a4d7..b0ad18a556c 100644
--- a/gcc/dwarf2ctf.cc
+++ b/gcc/dwarf2ctf.cc
@@ -361,7 +361,7 @@ gen_ctf_subrange_type (ctf_container_ref ctfc, 
ctf_dtdef_ref array_elems_type,
dw_attr_node *upper_bound_at;
    dw_die_ref array_index_type;
-  uint32_t array_num_elements;
+  unsigned HOST_WIDE_INT array_num_elements;
if (dw_get_die_tag (c) == DW_TAG_subrange_type)
      {
@@ -376,9 +376,9 @@ gen_ctf_subrange_type (ctf_container_ref ctfc, 
ctf_dtdef_ref array_elems_type,
        if (upper_bound_at
          && AT_class (upper_bound_at) == dw_val_class_unsigned_const)
        /* This is the upper bound index.  */
-       array_num_elements = get_AT_unsigned (c, DW_AT_upper_bound) + 1;
+       array_num_elements = AT_unsigned (get_AT (c, DW_AT_upper_bound)) + 1;
        else if (get_AT (c, DW_AT_count))
-       array_num_elements = get_AT_unsigned (c, DW_AT_count);
+       array_num_elements = AT_unsigned (get_AT (c, DW_AT_count));
        else
        {
          /* This is a VLA of some kind.  */
@@ -388,6 +388,12 @@ gen_ctf_subrange_type (ctf_container_ref ctfc, 
ctf_dtdef_ref array_elems_type,
    else
      gcc_unreachable ();
+ if (array_num_elements > UINT32_MAX)
+    {
+      /* The array cannot be encoded in CTF.  */
+      return gen_ctf_unknown_type (ctfc);
+    }
+

Is it possible to annotate this with "TBD_CTF_REPRESENTATION_LIMIT" in the comment ?

    /* Ok, mount and register the array type.  Note how the array
       type we register here is the type of the elements in
       subsequent "dimensions", if there are any.  */
diff --git a/gcc/testsuite/gcc.dg/debug/ctf/ctf-array-7.c 
b/gcc/testsuite/gcc.dg/debug/ctf/ctf-array-7.c
new file mode 100644
index 00000000000..01accc7c18f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/debug/ctf/ctf-array-7.c
@@ -0,0 +1,23 @@
+/* CTF generation for array which cannot be encoded in CTF.
+
+   CTF encoding uses a uint32 for number of elements in an array which
+   means there is a hard upper limit on sizes of arrays which can be
+   represented.  Arrays with too many elements are encoded with
+   CTF_K_UNKNOWN to indicate that they cannot be represented.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0 -gctf -dA" } */
+
+int   rep[0xffffffff];
+int unrep[0x100000000];
+
+/* One dimension can be represented, other cannot.
+   Result is a (representable) array with unknown element type.  */
+int unrepdim [0xab][0x100000007];
+
+/* Two CTF_K_ARRAY, one (shared) CTF_K_UNKNOWN.  */
+/* { dg-final { scan-assembler-times "0x12000000\[\t \]+\[^\n\]*ctt_info" 2 } 
} */
+/* { dg-final { scan-assembler-times "0x2000000\[\t \]+\[^\n\]*ctt_info" 1 } } 
*/
+
+/* { dg-final { scan-assembler-times "\[\t \]+0xffffffff\[\t 
\]+\[^\n\]*cta_nelems" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]+0xab\[\t \]+\[^\n\]*cta_nelems" 
1 } } */

Reply via email to