The attached patch has been bootstrapped and regression tested. It addresses PR 
analyzer/98797, which is built off the expected failures in 
gcc/testsuite/gcc.dg/analyzer/casts-1.c It fixes those failures and additional 
manifestations. That testsuite file has been updated to no longer expect 
failures and to include additional tests addressing the other manifestations I 
found.

Some additional work can probably be done to handle further cases, but this is 
now substantially more robust than the current status.

Thanks.
commit f3654c5a322f241c8cc551f88257a495689ed9d9
Author: Brian Sobulefsky <brian.sobulef...@protonmail.com>
Date:   Tue Feb 9 16:25:43 2021 -0800

    Address the bug found in test file gcc/testsuite/gcc.dg/analyzer/casts-1.c, 
as
    recorded by the XFAIL, and some simpler and more complex versions found 
during
    the investigation of it. PR analyzer/98797 was opened for these bugs.
    
    The simplest manifestation of this bug can be replicated with:
    
    char arr[] = {'A', 'B', 'C', 'D'};
    char *pt = ar;
    __analyzer_eval(*(pt + 0) == 'A');
    __analyzer_eval(*(pt + 1) == 'B');
    //etc
    
    The result of the eval call is "UNKNOWN" because the analyzer is unable to
    determine the value at the pointer. Note that array access (such as arr[0]) 
is
    correctly handled. The code responsible for this is in file
    region-model-manager.cc, function 
region_model_manager::maybe_fold_sub_svalue.
    The relevant section is commented /* Handle getting individual chars from
    STRING_CST */. This section only had a case for an element_region. A case
    needed to be added for an offset_region.
    
    Additionally, when the offset was 0, such as in *pt or *(pt + 0), the 
function
    region_model_manager::get_offset_region was failing to make the needed 
offset
    region at all. This was due to the test for a constant 0 pointer that was 
then
    returning get_cast_region. A special case is needed for when PARENT is of 
type
    array_type whose type matches TYPE. In this case, get_offset_region is 
allowed
    to continue to normal conclusion.
    
    The original bug noted in gcc/testsuite/gcc.dg/analyzer/casts-1.c was for 
the
    case:
    
    struct s1
    {
      char a;
      char b;
      char c;
      char d;
    };
    
    struct s2
    {
      char arr[4];
    };
    
    struct s2 x = {{'A', 'B', 'C', 'D'}};
    struct s1 *p = (struct s1 *)&x;
    __analyzer_eval (p->a == 'A');
    //etc
    
    This requires a case added to region_model_manager::maybe_fold_sub_svalue in
    the individual characters from string constant section for a field region.
    
    Finally, the prior only works for the case where struct s2 was a single 
field
    struct. The more general case is:
    
    struct s2
    {
      char arr1[2];
      char arr2[2];
    };
    
    struct s2 x = {{'A', 'B'}, {'C', 'D'}};
    
    Here the array will never be found in the get_any_binding routines. A new
    routine is added to class binding_cluster that checks if the region being
    searched is a field_region of a cast_region, and if so, tries to find a 
field
    of the original region that contains the region under examination. This new
    function is named binding_cluster::get_parent_cast_binding. It is called 
from
    get_binding_recursive.
    
    gcc/analyzer/ChangeLog:
            PR analyzer/98797
            * region-model-manager.cc region_model_manager::get_offset_region: 
Add
            check for a PARENT array whose type matches TYPE, and have this skip
            returning get_cast_region and rather conclude the function normally.
            * region-model-manager.cc 
region_model_manager::maybe_fold_sub_svalue
            Update the get character from string_cst section to include cases 
for
            an offset_region and a field_region.
            * store.cc binding_cluster::get_binding_recursive: Add case for call
            to new function get_parent_cast_binding.
            * store.cc binding_cluster::get_parent_cast_binding: New function.
            * store.h class binding_cluster: Add declaration for new member
            function get_parent_class_binding and macros for bit to byte
            conversion.
    
    gcc/testsuite/ChangeLog:
            PR analyzer/98797
            * gcc.dg/analyzer/casts-1.c: Update file to no longer expect 
failures
            and add test cases for additional bugs solved.

diff --git a/gcc/analyzer/region-model-manager.cc 
b/gcc/analyzer/region-model-manager.cc
index dfd2413e914..9aee4c498c6 100644
--- a/gcc/analyzer/region-model-manager.cc
+++ b/gcc/analyzer/region-model-manager.cc
@@ -602,16 +602,46 @@ region_model_manager::maybe_fold_sub_svalue (tree type,
   /* Handle getting individual chars from a STRING_CST.  */
   if (tree cst = parent_svalue->maybe_get_constant ())
     if (TREE_CODE (cst) == STRING_CST)
-      if (const element_region *element_reg
+      {
+       if (const element_region *element_reg
            = subregion->dyn_cast_element_region ())
-       {
-         const svalue *idx_sval = element_reg->get_index ();
-         if (tree cst_idx = idx_sval->maybe_get_constant ())
-           if (const svalue *char_sval
-               = maybe_get_char_from_string_cst (cst, cst_idx))
-             return get_or_create_cast (type, char_sval);
-       }
-
+         {
+           const svalue *idx_sval = element_reg->get_index ();
+           if (tree cst_idx = idx_sval->maybe_get_constant ())
+             if (const svalue *char_sval
+                 = maybe_get_char_from_string_cst (cst, cst_idx))
+               return get_or_create_cast (type, char_sval);
+         }
+       else if (const offset_region *offset_reg
+             = subregion->dyn_cast_offset_region ())
+         {
+           const svalue *offset_sval = offset_reg->get_byte_offset();
+           if (tree cst_offset = offset_sval->maybe_get_constant ())
+             if (const svalue *char_sval
+                 = maybe_get_char_from_string_cst (cst, cst_offset))
+               return get_or_create_cast (type, char_sval);
+         }
+       else if (const field_region *field_reg
+             = subregion->dyn_cast_field_region ())
+         {
+           region_offset field_offset = field_reg->get_offset ();
+           if (!field_offset.symbolic_p ())
+             {
+               bit_offset_t field_bit_offset = field_offset.get_bit_offset ();
+               HOST_WIDE_INT field_offset_bits
+                   = field_bit_offset.get_val ()[0];
+               if(!(field_offset_bits & BYTE_BOUNDARY_MASK))
+                 {
+                   tree cst_offset
+                      = build_int_cst (integer_type_node,
+                                       field_offset_bits >> BYTE_BIT_CONVERT);
+                   if (const svalue *char_sval
+                       = maybe_get_char_from_string_cst (cst, cst_offset))
+                     return  get_or_create_cast (type, char_sval);
+                 }
+             }
+         }
+      }
   /* SUB(INIT(r)).FIELD -> INIT(r.FIELD)
      i.e.
      Subvalue(InitialValue(R1), FieldRegion(R2, F))
@@ -867,8 +897,19 @@ region_model_manager::get_offset_region (const region 
*parent,
 {
   /* If BYTE_OFFSET is zero, return PARENT.  */
   if (tree cst_offset = byte_offset->maybe_get_constant ())
-    if (zerop (cst_offset))
-      return get_cast_region (parent, type);
+    {
+      /* Special case: PARENT type is array_type whose type matches TYPE */
+      if (parent->get_type ()
+           &&  TREE_CODE(parent->get_type ()) == ARRAY_TYPE
+           &&  TREE_TYPE(parent->get_type ()) == type)
+       {
+         tree offset_int
+             = build_int_cst (integer_type_node, cst_offset->int_cst.val[0]);
+         byte_offset = get_or_create_constant_svalue (offset_int);
+       }
+      else if (zerop (cst_offset))
+       return get_cast_region (parent, type);
+    }
 
   /* Fold OFFSET_REGION(OFFSET_REGION(REG, X), Y)
      to   OFFSET_REGION(REG, (X + Y)).  */
diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index abdb336da91..3afba55ce90 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -996,14 +996,129 @@ binding_cluster::get_binding_recursive (store_manager 
*mgr,
     return sval;
   if (reg != m_base_region)
     if (const region *parent_reg = reg->get_parent_region ())
-      if (const svalue *parent_sval
-         = get_binding_recursive (mgr, parent_reg, kind))
+      {
+       if (const svalue *parent_sval
+           = get_binding_recursive (mgr, parent_reg, kind))
+         {
+           /* Extract child svalue from parent svalue.  */
+           region_model_manager *rmm_mgr = mgr->get_svalue_manager ();
+           return rmm_mgr->get_or_create_sub_svalue (reg->get_type (),
+                                                     parent_sval, reg);
+         }
+       else if (const svalue *parent_cast_field_sval
+           = get_parent_cast_binding (mgr, reg, kind))
+         {
+           /* PARENT_REG is a cast_region and we found a covering binding
+              in the original_region */
+           return parent_cast_field_sval;
+         }
+      }
+  return NULL;
+}
+
+/* Get a binding for access to a field of a cast_region.
+   Note the original motivation for this was access of the form:
+   struct s1 x = {{'A', 'B'}; {'C', 'D'}}; struct s2 p = (struct s2) &x;
+   __analyzer_eval (p->a == 'A');
+   et al. for the other fields. get_binding_recursive fails because it is
+   unidirectional (from the field_region p->a up the chain of parents). The
+   routine can probably be expanded, and even further broken out, as other 
cases
+   are discovered and understood. */
+const svalue *
+binding_cluster::get_parent_cast_binding (store_manager *mgr,
+                                         const region *reg,
+                                         enum binding_kind kind) const
+{
+
+  /* If we are sure this will never be called incorrectly, we can remove */
+  if (!(mgr && reg))
+    return NULL;
+
+  const region *parent_reg = reg->get_parent_region ();
+
+  /* If it is impossible for a region's parent to be NULL, remove guard */
+  if(!parent_reg)
+    return NULL;
+
+  const cast_region *parent_cast = parent_reg->dyn_cast_cast_region ();
+  const field_region *field_reg = reg->dyn_cast_field_region ();
+
+  /* The first two guards are just safety, this is the real condition */
+  if (!(parent_cast && field_reg))
+    return NULL;
+
+  const region *original_reg = parent_cast->get_original_region ();
+  region_offset reg_offset = field_reg->get_offset ();
+  bit_size_t reg_bit_size;
+  tree original_tree = original_reg->get_type ();
+  tree original_field;
+
+  /* Note assignment to ORIGINAL_FIELD in compound test to
+     save some indentation space (80 comes quickly) */
+  if (original_tree && TREE_CODE(original_tree) == RECORD_TYPE
+      && (original_field = TYPE_FIELDS(original_tree))
+      && !reg_offset.symbolic_p() && field_reg->get_bit_size(&reg_bit_size))
+    {
+
+      bit_offset_t reg_bit_offset = reg_offset.get_bit_offset ();
+      HOST_WIDE_INT start_offset = reg_bit_offset.get_val ()[0];
+      HOST_WIDE_INT end_offset = start_offset + reg_bit_size.get_val ()[0] - 1;
+      tree covering_field = NULL_TREE;
+
+      /* get_field_at_bit_offset in region.cc has a test for RECORD_TYPE, maybe
+        this should be here too? The test is probably moot, since we have to
+        fully cover and survive the get_binding_recursive below. Also
+        consdider making the function in region.cc usable outside the file? */
+      for (covering_field = original_field;
+           (original_field = TREE_CHAIN (original_field));)
+       {
+         if (original_field->field_decl.bit_offset->int_cst.val[0]
+             <= start_offset)
+           covering_field = original_field;
+         else
+           break; /* Is the list guaranteed to be in order? */
+       }
+
+      if (covering_field && end_offset
+         <= covering_field->field_decl.bit_offset->int_cst.val[0]
+         + covering_field->decl_common.size->int_cst.val[0] - 1)
        {
-         /* Extract child svalue from parent svalue.  */
+         /* We found a field that entirely covers REG. The following code
+         should probably be generalized to more cases, as of now it will
+         basically handle the case where a char array has been initialized
+         into the original struct type. Further work might be to handle when
+         a field to a struct is itself a struct, which is likely recursive.*/
+
          region_model_manager *rmm_mgr = mgr->get_svalue_manager ();
-         return rmm_mgr->get_or_create_sub_svalue (reg->get_type (),
-                                                   parent_sval, reg);
+         const region *covering_field_reg = rmm_mgr
+             ->get_field_region (original_reg, covering_field);
+
+         if (const svalue *parent_sval = get_binding_recursive (mgr,
+                                               covering_field_reg, kind))
+           {
+
+             HOST_WIDE_INT covering_field_offset = covering_field
+                 ->field_decl.bit_offset->int_cst.val[0];
+
+             if (TREE_CODE(covering_field_reg->get_type()) == ARRAY_TYPE
+                 && !((start_offset - covering_field_offset)
+                       & BYTE_BOUNDARY_MASK))
+               {
+                 const svalue *arr_index = rmm_mgr
+                     ->get_or_create_constant_svalue (
+                         build_int_cst (integer_type_node,
+                           (start_offset - covering_field_offset)
+                            >> BYTE_BIT_CONVERT));
+                 const region *elmt_reg = rmm_mgr
+                     ->get_element_region (covering_field_reg,
+                         TREE_TYPE (covering_field_reg->get_type ()),
+                           arr_index);
+                 return rmm_mgr->get_or_create_sub_svalue (
+                     elmt_reg->get_type (), parent_sval, elmt_reg);
+               }
+           }
        }
+    }
   return NULL;
 }
 
diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h
index 2bcef6c398a..9ddc57fea01 100644
--- a/gcc/analyzer/store.h
+++ b/gcc/analyzer/store.h
@@ -416,6 +416,9 @@ public:
   const svalue *get_binding_recursive (store_manager *mgr,
                                        const region *reg,
                                        enum binding_kind kind) const;
+  const svalue *get_parent_cast_binding (store_manager *mgr,
+                                         const region *reg,
+                                         enum binding_kind kind) const;
   const svalue *get_any_binding (store_manager *mgr,
                                  const region *reg) const;
   const svalue *maybe_get_compound_binding (store_manager *mgr,
@@ -640,4 +643,7 @@ private:
 
 } // namespace ana
 
+#define BYTE_BIT_CONVERT 3
+#define BYTE_BOUNDARY_MASK ((1 << BYTE_BIT_CONVERT) - 1)
+
 #endif /* GCC_ANALYZER_STORE_H */
diff --git a/gcc/testsuite/gcc.dg/analyzer/casts-1.c 
b/gcc/testsuite/gcc.dg/analyzer/casts-1.c
index 15cd85f77cf..9340615b20f 100644
--- a/gcc/testsuite/gcc.dg/analyzer/casts-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/casts-1.c
@@ -13,6 +13,21 @@ struct s2
   char arr[4];
 };
 
+struct s3
+{
+  char arr1[2];
+  char arr2[2];
+};
+
+void test_0 ()
+{
+  char ar[] = {'A', 'B', 'C', 'D'};
+  char *pt = ar;
+  __analyzer_eval (*(pt + 0) == 'A'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (*(pt + 1) == 'B'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (*(pt + 2) == 'C'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (*(pt + 3) == 'D'); /* { dg-warning "TRUE" } */
+}
 void test_1 ()
 {
   struct s1 x = {'A', 'B', 'C', 'D'};
@@ -38,12 +53,22 @@ void test_2 ()
   __analyzer_eval (x.arr[2] == 'C'); /* { dg-warning "TRUE" } */
   __analyzer_eval (x.arr[3] == 'D'); /* { dg-warning "TRUE" } */
   struct s1 *p = (struct s1 *)&x;
-  __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" { xfail *-*-* } 
} */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
-  __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" { xfail *-*-* } 
} */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
-  __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" { xfail *-*-* } 
} */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
-  __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" { xfail *-*-* } 
} */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
+  __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */
+}
+
+void test_3 ()
+{
+  struct s3 x = {{'A', 'B'}, {'C', 'D'}};
+  __analyzer_eval (x.arr1[0] == 'A'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (x.arr1[1] == 'B'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (x.arr2[0] == 'C'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (x.arr2[1] == 'D'); /* { dg-warning "TRUE" } */
+  struct s1 *p = (struct s1 *)&x;
+  __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */
 }

Reply via email to