I apologize. I left a dangling space in the patch. I am guessing
that this is against whitespace policy. I had actually corrected
it but forgot to add the file to the commit. Please see the
attached.
commit a9b60f36b499463b49095f28ce8053f6387c506d
Author: Brian Sobulefsky <brian.sobulef...@protonmail.com>
Date:   Tue Feb 9 16:25:43 2021 -0800

    Address the bug found in test file gcc/testsuite/gcc.dg/analyzer/casts-1.c, as
    recorded by the XFAIL, and some simpler and more complex versions found during
    the investigation of it. PR analyzer/98797 was opened for these bugs.
    
    The simplest manifestation of this bug can be replicated with:
    
    char arr[] = {'A', 'B', 'C', 'D'};
    char *pt = ar;
    __analyzer_eval(*(pt + 0) == 'A');
    __analyzer_eval(*(pt + 1) == 'B');
    //etc
    
    The result of the eval call is "UNKNOWN" because the analyzer is unable to
    determine the value at the pointer. Note that array access (such as arr[0]) is
    correctly handled. The code responsible for this is in file
    region-model-manager.cc, function region_model_manager::maybe_fold_sub_svalue.
    The relevant section is commented /* Handle getting individual chars from
    STRING_CST */. This section only had a case for an element_region. A case
    needed to be added for an offset_region.
    
    Additionally, when the offset was 0, such as in *pt or *(pt + 0), the function
    region_model_manager::get_offset_region was failing to make the needed offset
    region at all. This was due to the test for a constant 0 pointer that was then
    returning get_cast_region. A special case is needed for when PARENT is of type
    array_type whose type matches TYPE. In this case, get_offset_region is allowed
    to continue to normal conclusion.
    
    The original bug noted in gcc/testsuite/gcc.dg/analyzer/casts-1.c was for the
    case:
    
    struct s1
    {
      char a;
      char b;
      char c;
      char d;
    };
    
    struct s2
    {
      char arr[4];
    };
    
    struct s2 x = {{'A', 'B', 'C', 'D'}};
    struct s1 *p = (struct s1 *)&x;
    __analyzer_eval (p->a == 'A');
    //etc
    
    This requires a case added to region_model_manager::maybe_fold_sub_svalue in
    the individual characters from string constant section for a field region.
    
    Finally, the prior only works for the case where struct s2 was a single field
    struct. The more general case is:
    
    struct s2
    {
      char arr1[2];
      char arr2[2];
    };
    
    struct s2 x = {{'A', 'B'}, {'C', 'D'}};
    
    Here the array will never be found in the get_any_binding routines. A new
    routine is added to class binding_cluster that checks if the region being
    searched is a field_region of a cast_region, and if so, tries to find a field
    of the original region that contains the region under examination. This new
    function is named binding_cluster::get_parent_cast_binding. It is called from
    get_binding_recursive.
    
    gcc/analyzer/ChangeLog:
            PR analyzer/98797
            * region-model-manager.cc region_model_manager::get_offset_region: Add
            check for a PARENT array whose type matches TYPE, and have this skip
            returning get_cast_region and rather conclude the function normally.
            * region-model-manager.cc region_model_manager::maybe_fold_sub_svalue
            Update the get character from string_cst section to include cases for
            an offset_region and a field_region.
            * store.cc binding_cluster::get_binding_recursive: Add case for call
            to new function get_parent_cast_binding.
            * store.cc binding_cluster::get_parent_cast_binding: New function.
            * store.h class binding_cluster: Add declaration for new member
            function get_parent_class_binding and macros for bit to byte
            conversion.
    
    gcc/testsuite/ChangeLog:
            PR analyzer/98797
            * gcc.dg/analyzer/casts-1.c: Update file to no longer expect failures
            and add test cases for additional bugs solved.

diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc
index dfd2413e914..1fd6b94f20a 100644
--- a/gcc/analyzer/region-model-manager.cc
+++ b/gcc/analyzer/region-model-manager.cc
@@ -602,16 +602,46 @@ region_model_manager::maybe_fold_sub_svalue (tree type,
   /* Handle getting individual chars from a STRING_CST.  */
   if (tree cst = parent_svalue->maybe_get_constant ())
     if (TREE_CODE (cst) == STRING_CST)
-      if (const element_region *element_reg
+      {
+	if (const element_region *element_reg
 	    = subregion->dyn_cast_element_region ())
-	{
-	  const svalue *idx_sval = element_reg->get_index ();
-	  if (tree cst_idx = idx_sval->maybe_get_constant ())
-	    if (const svalue *char_sval
-		= maybe_get_char_from_string_cst (cst, cst_idx))
-	      return get_or_create_cast (type, char_sval);
-	}
-
+	  {
+	    const svalue *idx_sval = element_reg->get_index ();
+	    if (tree cst_idx = idx_sval->maybe_get_constant ())
+	      if (const svalue *char_sval
+		  = maybe_get_char_from_string_cst (cst, cst_idx))
+		return get_or_create_cast (type, char_sval);
+	  }
+	else if (const offset_region *offset_reg
+	      = subregion->dyn_cast_offset_region ())
+	  {
+	    const svalue *offset_sval = offset_reg->get_byte_offset();
+	    if (tree cst_offset = offset_sval->maybe_get_constant ())
+	      if (const svalue *char_sval
+		  = maybe_get_char_from_string_cst (cst, cst_offset))
+		return get_or_create_cast (type, char_sval);
+	  }
+	else if (const field_region *field_reg
+	      = subregion->dyn_cast_field_region ())
+	  {
+	    region_offset field_offset = field_reg->get_offset ();
+	    if (!field_offset.symbolic_p ())
+	      {
+		bit_offset_t field_bit_offset = field_offset.get_bit_offset ();
+		HOST_WIDE_INT field_offset_bits
+		    = field_bit_offset.get_val ()[0];
+		if(!(field_offset_bits & BYTE_BOUNDARY_MASK))
+		  {
+		    tree cst_offset
+		       = build_int_cst (integer_type_node,
+					field_offset_bits >> BYTE_BIT_CONVERT);
+		    if (const svalue *char_sval
+			= maybe_get_char_from_string_cst (cst, cst_offset))
+		      return  get_or_create_cast (type, char_sval);
+		  }
+	      }
+	  }
+      }
   /* SUB(INIT(r)).FIELD -> INIT(r.FIELD)
      i.e.
      Subvalue(InitialValue(R1), FieldRegion(R2, F))
@@ -867,8 +897,19 @@ region_model_manager::get_offset_region (const region *parent,
 {
   /* If BYTE_OFFSET is zero, return PARENT.  */
   if (tree cst_offset = byte_offset->maybe_get_constant ())
-    if (zerop (cst_offset))
-      return get_cast_region (parent, type);
+    {
+      /* Special case: PARENT type is array_type whose type matches TYPE */
+      if (parent->get_type ()
+	    &&  TREE_CODE(parent->get_type ()) == ARRAY_TYPE
+	    &&  TREE_TYPE(parent->get_type ()) == type)
+	{
+	  tree offset_int
+	      = build_int_cst (integer_type_node, cst_offset->int_cst.val[0]);
+	  byte_offset = get_or_create_constant_svalue (offset_int);
+	}
+      else if (zerop (cst_offset))
+	return get_cast_region (parent, type);
+    }
 
   /* Fold OFFSET_REGION(OFFSET_REGION(REG, X), Y)
      to   OFFSET_REGION(REG, (X + Y)).  */
diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index abdb336da91..3afba55ce90 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -996,14 +996,129 @@ binding_cluster::get_binding_recursive (store_manager *mgr,
     return sval;
   if (reg != m_base_region)
     if (const region *parent_reg = reg->get_parent_region ())
-      if (const svalue *parent_sval
-	  = get_binding_recursive (mgr, parent_reg, kind))
+      {
+	if (const svalue *parent_sval
+	    = get_binding_recursive (mgr, parent_reg, kind))
+	  {
+	    /* Extract child svalue from parent svalue.  */
+	    region_model_manager *rmm_mgr = mgr->get_svalue_manager ();
+	    return rmm_mgr->get_or_create_sub_svalue (reg->get_type (),
+						      parent_sval, reg);
+	  }
+	else if (const svalue *parent_cast_field_sval
+	    = get_parent_cast_binding (mgr, reg, kind))
+	  {
+	    /* PARENT_REG is a cast_region and we found a covering binding
+	       in the original_region */
+	    return parent_cast_field_sval;
+	  }
+      }
+  return NULL;
+}
+
+/* Get a binding for access to a field of a cast_region.
+   Note the original motivation for this was access of the form:
+   struct s1 x = {{'A', 'B'}; {'C', 'D'}}; struct s2 p = (struct s2) &x;
+   __analyzer_eval (p->a == 'A');
+   et al. for the other fields. get_binding_recursive fails because it is
+   unidirectional (from the field_region p->a up the chain of parents). The
+   routine can probably be expanded, and even further broken out, as other cases
+   are discovered and understood. */
+const svalue *
+binding_cluster::get_parent_cast_binding (store_manager *mgr,
+					  const region *reg,
+					  enum binding_kind kind) const
+{
+
+  /* If we are sure this will never be called incorrectly, we can remove */
+  if (!(mgr && reg))
+    return NULL;
+
+  const region *parent_reg = reg->get_parent_region ();
+
+  /* If it is impossible for a region's parent to be NULL, remove guard */
+  if(!parent_reg)
+    return NULL;
+
+  const cast_region *parent_cast = parent_reg->dyn_cast_cast_region ();
+  const field_region *field_reg = reg->dyn_cast_field_region ();
+
+  /* The first two guards are just safety, this is the real condition */
+  if (!(parent_cast && field_reg))
+    return NULL;
+
+  const region *original_reg = parent_cast->get_original_region ();
+  region_offset reg_offset = field_reg->get_offset ();
+  bit_size_t reg_bit_size;
+  tree original_tree = original_reg->get_type ();
+  tree original_field;
+
+  /* Note assignment to ORIGINAL_FIELD in compound test to
+     save some indentation space (80 comes quickly) */
+  if (original_tree && TREE_CODE(original_tree) == RECORD_TYPE
+      && (original_field = TYPE_FIELDS(original_tree))
+      && !reg_offset.symbolic_p() && field_reg->get_bit_size(&reg_bit_size))
+    {
+
+      bit_offset_t reg_bit_offset = reg_offset.get_bit_offset ();
+      HOST_WIDE_INT start_offset = reg_bit_offset.get_val ()[0];
+      HOST_WIDE_INT end_offset = start_offset + reg_bit_size.get_val ()[0] - 1;
+      tree covering_field = NULL_TREE;
+
+      /* get_field_at_bit_offset in region.cc has a test for RECORD_TYPE, maybe
+	 this should be here too? The test is probably moot, since we have to
+	 fully cover and survive the get_binding_recursive below. Also
+	 consdider making the function in region.cc usable outside the file? */
+      for (covering_field = original_field;
+	    (original_field = TREE_CHAIN (original_field));)
+	{
+	  if (original_field->field_decl.bit_offset->int_cst.val[0]
+	      <= start_offset)
+	    covering_field = original_field;
+	  else
+	    break; /* Is the list guaranteed to be in order? */
+	}
+
+      if (covering_field && end_offset
+	  <= covering_field->field_decl.bit_offset->int_cst.val[0]
+	  + covering_field->decl_common.size->int_cst.val[0] - 1)
 	{
-	  /* Extract child svalue from parent svalue.  */
+	  /* We found a field that entirely covers REG. The following code
+	  should probably be generalized to more cases, as of now it will
+	  basically handle the case where a char array has been initialized
+	  into the original struct type. Further work might be to handle when
+	  a field to a struct is itself a struct, which is likely recursive.*/
+
 	  region_model_manager *rmm_mgr = mgr->get_svalue_manager ();
-	  return rmm_mgr->get_or_create_sub_svalue (reg->get_type (),
-						    parent_sval, reg);
+	  const region *covering_field_reg = rmm_mgr
+	      ->get_field_region (original_reg, covering_field);
+
+	  if (const svalue *parent_sval = get_binding_recursive (mgr,
+						covering_field_reg, kind))
+	    {
+
+	      HOST_WIDE_INT covering_field_offset = covering_field
+		  ->field_decl.bit_offset->int_cst.val[0];
+
+	      if (TREE_CODE(covering_field_reg->get_type()) == ARRAY_TYPE
+		  && !((start_offset - covering_field_offset)
+			& BYTE_BOUNDARY_MASK))
+		{
+		  const svalue *arr_index = rmm_mgr
+		      ->get_or_create_constant_svalue (
+			  build_int_cst (integer_type_node,
+			    (start_offset - covering_field_offset)
+			     >> BYTE_BIT_CONVERT));
+		  const region *elmt_reg = rmm_mgr
+		      ->get_element_region (covering_field_reg,
+			  TREE_TYPE (covering_field_reg->get_type ()),
+			    arr_index);
+		  return rmm_mgr->get_or_create_sub_svalue (
+		      elmt_reg->get_type (), parent_sval, elmt_reg);
+		}
+	    }
 	}
+    }
   return NULL;
 }
 
diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h
index 2bcef6c398a..9ddc57fea01 100644
--- a/gcc/analyzer/store.h
+++ b/gcc/analyzer/store.h
@@ -416,6 +416,9 @@ public:
   const svalue *get_binding_recursive (store_manager *mgr,
 					const region *reg,
 					enum binding_kind kind) const;
+  const svalue *get_parent_cast_binding (store_manager *mgr,
+					  const region *reg,
+					  enum binding_kind kind) const;
   const svalue *get_any_binding (store_manager *mgr,
 				  const region *reg) const;
   const svalue *maybe_get_compound_binding (store_manager *mgr,
@@ -640,4 +643,7 @@ private:
 
 } // namespace ana
 
+#define BYTE_BIT_CONVERT 3
+#define BYTE_BOUNDARY_MASK ((1 << BYTE_BIT_CONVERT) - 1)
+
 #endif /* GCC_ANALYZER_STORE_H */
diff --git a/gcc/testsuite/gcc.dg/analyzer/casts-1.c b/gcc/testsuite/gcc.dg/analyzer/casts-1.c
index 15cd85f77cf..9340615b20f 100644
--- a/gcc/testsuite/gcc.dg/analyzer/casts-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/casts-1.c
@@ -13,6 +13,21 @@ struct s2
   char arr[4];
 };
 
+struct s3
+{
+  char arr1[2];
+  char arr2[2];
+};
+
+void test_0 ()
+{
+  char ar[] = {'A', 'B', 'C', 'D'};
+  char *pt = ar;
+  __analyzer_eval (*(pt + 0) == 'A'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (*(pt + 1) == 'B'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (*(pt + 2) == 'C'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (*(pt + 3) == 'D'); /* { dg-warning "TRUE" } */
+}
 void test_1 ()
 {
   struct s1 x = {'A', 'B', 'C', 'D'};
@@ -38,12 +53,22 @@ void test_2 ()
   __analyzer_eval (x.arr[2] == 'C'); /* { dg-warning "TRUE" } */
   __analyzer_eval (x.arr[3] == 'D'); /* { dg-warning "TRUE" } */
   struct s1 *p = (struct s1 *)&x;
-  __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
-  __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
-  __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
-  __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
-  /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
+  __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */
+}
+
+void test_3 ()
+{
+  struct s3 x = {{'A', 'B'}, {'C', 'D'}};
+  __analyzer_eval (x.arr1[0] == 'A'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (x.arr1[1] == 'B'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (x.arr2[0] == 'C'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (x.arr2[1] == 'D'); /* { dg-warning "TRUE" } */
+  struct s1 *p = (struct s1 *)&x;
+  __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */
+  __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */
 }

Reply via email to