This patch reimplements the analyzer's implementation of strcpy using
the region_model::scan_for_null_terminator infrastructure, so that e.g.
it can complain about out-of-bounds reads/writes, unterminated strings,
etc.

gcc/analyzer/ChangeLog:
        PR analyzer/105899
        * kf.cc (kf_strcpy::impl_call_pre): Reimplement using
        check_for_null_terminated_string_arg.
        * region-model.cc (region_model::get_store_bytes): Shortcut
        reading all of a string_region.
        (region_model::scan_for_null_terminator): Use get_store_value for
        the bytes rather than "unknown" when returning an unknown length.
        (region_model::write_bytes): New.
        * region-model.h (region_model::write_bytes): New decl.

gcc/testsuite/ChangeLog:
        PR analyzer/105899
        * gcc.dg/analyzer/out-of-bounds-diagram-16.c: New test.
        * gcc.dg/analyzer/strcpy-1.c: Add test coverage.
        * gcc.dg/analyzer/strcpy-3.c: Likewise.
        * gcc.dg/analyzer/strcpy-4.c: New test.
---
 gcc/analyzer/kf.cc                            | 32 +++++-------
 gcc/analyzer/region-model.cc                  | 32 ++++++++++--
 gcc/analyzer/region-model.h                   |  4 ++
 .../analyzer/out-of-bounds-diagram-16.c       | 31 +++++++++++
 gcc/testsuite/gcc.dg/analyzer/strcpy-1.c      | 22 ++++++++
 gcc/testsuite/gcc.dg/analyzer/strcpy-3.c      |  1 +
 gcc/testsuite/gcc.dg/analyzer/strcpy-4.c      | 51 +++++++++++++++++++
 7 files changed, 150 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcpy-4.c

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 59f46bab581c..6b33cd159dac 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1135,29 +1135,25 @@ void
 kf_strcpy::impl_call_pre (const call_details &cd) const
 {
   region_model *model = cd.get_model ();
-  region_model_manager *mgr = cd.get_manager ();
+  region_model_context *ctxt = cd.get_ctxt ();
 
   const svalue *dest_sval = cd.get_arg_svalue (0);
   const region *dest_reg = model->deref_rvalue (dest_sval, cd.get_arg_tree (0),
-                                        cd.get_ctxt ());
-  const svalue *src_sval = cd.get_arg_svalue (1);
-  const region *src_reg = model->deref_rvalue (src_sval, cd.get_arg_tree (1),
-                                       cd.get_ctxt ());
-  const svalue *src_contents_sval = model->get_store_value (src_reg,
-                                                           cd.get_ctxt ());
-  cd.check_for_null_terminated_string_arg (1);
-
+                                                   ctxt);
+  /* strcpy returns the initial param.  */
   cd.maybe_set_lhs (dest_sval);
 
-  /* Try to get the string size if SRC_REG is a string_region.  */
-  const svalue *copied_bytes_sval = model->get_string_size (src_reg);
-  /* Otherwise, check if the contents of SRC_REG is a string.  */
-  if (copied_bytes_sval->get_kind () == SK_UNKNOWN)
-    copied_bytes_sval = model->get_string_size (src_contents_sval);
-
-  const region *sized_dest_reg
-    = mgr->get_sized_region (dest_reg, NULL_TREE, copied_bytes_sval);
-  model->set_value (sized_dest_reg, src_contents_sval, cd.get_ctxt ());
+  const svalue *bytes_to_copy;
+  if (const svalue *num_bytes_read_sval
+       = cd.check_for_null_terminated_string_arg (1, &bytes_to_copy))
+    {
+      model->write_bytes (dest_reg, num_bytes_read_sval, bytes_to_copy, ctxt);
+    }
+  else
+    {
+      if (cd.get_ctxt ())
+       cd.get_ctxt ()->terminate_path ();
+    }
 }
 
 /* Handler for "strdup" and "__builtin_strdup".  */
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 7a2f81f36e0f..cc8d895d9665 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3460,6 +3460,13 @@ region_model::get_store_bytes (const region *base_reg,
                               const byte_range &bytes,
                               region_model_context *ctxt) const
 {
+  /* Shortcut reading all of a string_region.  */
+  if (bytes.get_start_byte_offset () == 0)
+    if (const string_region *string_reg = base_reg->dyn_cast_string_region ())
+      if (bytes.m_size_in_bytes
+         == TREE_STRING_LENGTH (string_reg->get_string_cst ()))
+       return m_mgr->get_or_create_initial_value (base_reg);
+
   const svalue *index_sval
     = m_mgr->get_or_create_int_cst (size_type_node,
                                    bytes.get_start_byte_offset ());
@@ -3533,14 +3540,14 @@ region_model::scan_for_null_terminator (const region 
*reg,
   if (offset.symbolic_p ())
     {
       if (out_sval)
-       *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+       *out_sval = get_store_value (reg, nullptr);
       return m_mgr->get_or_create_unknown_svalue (size_type_node);
     }
   byte_offset_t src_byte_offset;
   if (!offset.get_concrete_byte_offset (&src_byte_offset))
     {
       if (out_sval)
-       *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+       *out_sval = get_store_value (reg, nullptr);
       return m_mgr->get_or_create_unknown_svalue (size_type_node);
     }
   const byte_offset_t initial_src_byte_offset = src_byte_offset;
@@ -3582,7 +3589,7 @@ region_model::scan_for_null_terminator (const region *reg,
          if (is_terminated.is_unknown ())
            {
              if (out_sval)
-               *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+               *out_sval = get_store_value (reg, nullptr);
              return m_mgr->get_or_create_unknown_svalue (size_type_node);
            }
 
@@ -3621,7 +3628,7 @@ region_model::scan_for_null_terminator (const region *reg,
   if (c.has_symbolic_bindings_p ())
     {
       if (out_sval)
-       *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+       *out_sval = get_store_value (reg, nullptr);
       return m_mgr->get_or_create_unknown_svalue (size_type_node);
     }
 
@@ -3638,7 +3645,7 @@ region_model::scan_for_null_terminator (const region *reg,
   if (base_reg->can_have_initial_svalue_p ())
     {
       if (out_sval)
-       *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+       *out_sval = get_store_value (reg, nullptr);
       return m_mgr->get_or_create_unknown_svalue (size_type_node);
     }
   else
@@ -3801,6 +3808,21 @@ region_model::zero_fill_region (const region *reg)
   m_store.zero_fill_region (m_mgr->get_store_manager(), reg);
 }
 
+/* Copy NUM_BYTES_SVAL of SVAL to DEST_REG.
+   Use CTXT to report any warnings associated with the copy
+   (e.g. out-of-bounds writes).  */
+
+void
+region_model::write_bytes (const region *dest_reg,
+                          const svalue *num_bytes_sval,
+                          const svalue *sval,
+                          region_model_context *ctxt)
+{
+  const region *sized_dest_reg
+    = m_mgr->get_sized_region (dest_reg, NULL_TREE, num_bytes_sval);
+  set_value (sized_dest_reg, sval, ctxt);
+}
+
 /* Mark REG as having unknown content.  */
 
 void
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 3979bf124783..9c6e60bbe824 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -367,6 +367,10 @@ class region_model
   void purge_region (const region *reg);
   void fill_region (const region *reg, const svalue *sval);
   void zero_fill_region (const region *reg);
+  void write_bytes (const region *dest_reg,
+                   const svalue *num_bytes_sval,
+                   const svalue *sval,
+                   region_model_context *ctxt);
   void mark_region_as_unknown (const region *reg, uncertainty_t *uncertainty);
 
   tristate eval_condition (const svalue *lhs,
diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c 
b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
new file mode 100644
index 000000000000..b0fb409267ea
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */
+
+#include <string.h>
+#include "analyzer-decls.h"
+
+char *test_fixed_size_heap_2_invalid (void)
+{
+  char str[] = "abc";
+  char *p = __builtin_malloc (strlen (str)); /* { dg-message "\\(1\\) 
capacity: 3 bytes" } */
+  if (!p)
+    return NULL;
+  strcpy (p, str); /* { dg-warning "heap-based buffer overflow" } */
+  return p;
+}
+
+/* { dg-begin-multiline-output "" }
+  ┌──────────────────────────────────────────────────────────────────────┐
+  │                           write of 4 bytes                           │
+  └──────────────────────────────────────────────────────────────────────┘
+                            │                                   │
+                            │                                   │
+                            v                                   v
+  ┌───────────────────────────────────────────────────┐┌─────────────────┐
+  │          buffer allocated on heap at (1)          ││after valid range│
+  └───────────────────────────────────────────────────┘└─────────────────┘
+  ├─────────────────────────┬─────────────────────────┤├────────┬────────┤
+                            │                                   │
+                   ╭────────┴────────╮                ╭─────────┴────────╮
+                   │capacity: 3 bytes│                │overflow of 1 byte│
+                   ╰─────────────────╯                ╰──────────────────╯
+   { dg-end-multiline-output "" } */
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c 
b/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c
index d21e77175119..30341061f4cc 100644
--- a/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c
@@ -30,3 +30,25 @@ char *test_uninitialized (char *dst)
   return strcpy (dst, buf); /* { dg-warning "use of uninitialized value 
'buf\\\[0\\\]'" } */
   /* { dg-message "while looking for null terminator for argument 2 
\\('&buf'\\) of 'strcpy'..." "event" { target *-*-* } .-1 } */
 }
+
+extern void external_fn (void *ptr);
+
+char *test_external_fn (void)
+{
+  char src[10];
+  char dst[10];
+  external_fn (src);
+  strcpy (dst, src);
+  __analyzer_eval (strlen (dst) == strlen (src)); /* { dg-warning "UNKNOWN" } 
*/
+  // TODO: ideally would be TRUE  
+}
+
+void test_sprintf_strcpy (const char *a, const char *b)
+{
+  char buf_1[10];
+  char buf_2[10];
+  __builtin_sprintf (buf_1, "%s/%s", a, b);
+  strcpy (buf_2, buf_1);
+  __analyzer_eval (strlen (buf_1) == strlen (buf_2)); /* { dg-warning 
"UNKNOWN" } */
+  // TODO: ideally would be TRUE  
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c 
b/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
index a38f9a7641fe..abb49bc39f27 100644
--- a/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
+++ b/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
@@ -20,4 +20,5 @@ void test_1 (void)
   __analyzer_eval (result[3] == 'l'); /* { dg-warning "TRUE" } */
   __analyzer_eval (result[4] == 'o'); /* { dg-warning "TRUE" } */
   __analyzer_eval (result[5] == 0); /* { dg-warning "TRUE" } */
+  __analyzer_eval (strlen (result) == 5); /* { dg-warning "TRUE" } */
 }
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcpy-4.c 
b/gcc/testsuite/gcc.dg/analyzer/strcpy-4.c
new file mode 100644
index 000000000000..435a4cadee9d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/strcpy-4.c
@@ -0,0 +1,51 @@
+/* { dg-additional-options "-Wno-stringop-overflow" } */
+
+#include <string.h>
+#include "analyzer-decls.h"
+
+void
+test_fixed_size_stack_1 (void)
+{
+  char buf[3];
+  strcpy (buf, "abc"); /* { dg-warning "stack-based buffer overflow" } */
+}
+
+char *test_fixed_size_heap_1 (void)
+{
+  char str[] = "abc";
+  char *p = __builtin_malloc (3);
+  if (!p)
+    return NULL;
+  strcpy (p, str); /* { dg-warning "heap-based buffer overflow" } */
+  return p;
+}
+
+char *test_fixed_size_heap_2_invalid (void)
+{
+  char str[] = "abc";
+  char *p = __builtin_malloc (strlen (str));
+  if (!p)
+    return NULL;
+  strcpy (p, str); /* { dg-warning "heap-based buffer overflow" } */
+  return p;
+}
+
+char *test_fixed_size_heap_2_valid (void)
+{
+  char str[] = "abc";
+  char *p = __builtin_malloc (strlen (str) + 1);
+  if (!p)
+    return NULL;
+  strcpy (p, str); /* { dg-bogus "" } */
+  __analyzer_eval (strlen (p) == 3); /* { dg-warning "TRUE" } */
+  return p;
+}
+
+char *test_dynamic_size_heap_1 (const char *src)
+{
+  char *p = __builtin_malloc (strlen (src));
+  if (!p)
+    return NULL;
+  strcpy (p, src); // TODO: write of null terminator is oob
+  return p;
+}
-- 
2.26.3

Reply via email to