[pushed] analyzer: support diagnostics that don't have a stmt

2023-09-15 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4042-gb09193fb0686b7.

gcc/analyzer/ChangeLog:
* analyzer.cc (get_stmt_location): Handle null stmt.
* diagnostic-manager.cc (saved_diagnostic::saved_diagnostic): Copy
m_loc from ploc.
(saved_diagnostic::operator==): Compare m_loc.
(saved_diagnostic::calc_best_epath): Only use m_stmt_finder if
m_loc is unknown.
(dedupe_key::dedupe_key): Initialize m_loc.
(dedupe_key::operator==): Compare m_loc.
(dedupe_key::get_location): Use m_loc if it's known.
(dedupe_key::m_loc): New field.
(diagnostic_manager::emit_saved_diagnostic): Only call
get_emission_location if m_loc is unknown, preferring to use m_loc
if it's available.
* diagnostic-manager.h (saved_diagnostic::m_loc): New field.
(pending_location::pending_location): Initialize m_loc.  Add
overload taking a location_t rather than a stmt/stmt_finder.
(pending_location::m_loc): New field.
---
 gcc/analyzer/analyzer.cc   |  2 ++
 gcc/analyzer/diagnostic-manager.cc | 35 +++---
 gcc/analyzer/diagnostic-manager.h  | 19 +++-
 3 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc
index 5091fb7a583..94c5cf242b2 100644
--- a/gcc/analyzer/analyzer.cc
+++ b/gcc/analyzer/analyzer.cc
@@ -41,6 +41,8 @@ namespace ana {
 location_t
 get_stmt_location (const gimple *stmt, function *fun)
 {
+  if (!stmt)
+return UNKNOWN_LOCATION;
   if (get_pure_location (stmt->location) == UNKNOWN_LOCATION)
 {
   /* Workaround for missing location information for clobber
diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index b652e7032e9..972413a751a 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -678,14 +678,13 @@ saved_diagnostic::saved_diagnostic (const state_machine 
*sm,
   /* stmt_finder could be on-stack; we want our own copy that can
  outlive that.  */
   m_stmt_finder (ploc.m_finder ? ploc.m_finder->clone () : NULL),
+  m_loc (ploc.m_loc),
   m_var (var), m_sval (sval), m_state (state),
   m_d (std::move (d)), m_trailing_eedge (NULL),
   m_idx (idx),
   m_best_epath (NULL), m_problem (NULL),
   m_notes ()
 {
-  gcc_assert (m_stmt || m_stmt_finder);
-
   /* We must have an enode in order to be able to look for paths
  through the exploded_graph to this diagnostic.  */
   gcc_assert (m_enode);
@@ -704,6 +703,7 @@ saved_diagnostic::operator== (const saved_diagnostic 
) const
  && m_snode == other.m_snode
  && m_stmt == other.m_stmt
  /* We don't compare m_stmt_finder.  */
+ && m_loc == other.m_loc
  && pending_diagnostic::same_tree_p (m_var, other.m_var)
  && m_state == other.m_state
  && m_d->equal_p (*other.m_d)
@@ -833,8 +833,8 @@ saved_diagnostic::dump_as_dot_node (pretty_printer *pp) 
const
 
 /* Use PF to find the best exploded_path for this saved_diagnostic,
and store it in m_best_epath.
-   If m_stmt is still NULL, use m_stmt_finder on the epath to populate
-   m_stmt.
+   If we don't have a specific location in m_loc and m_stmt is still NULL,
+   use m_stmt_finder on the epath to populate m_stmt.
Return true if a best path was found.  */
 
 bool
@@ -853,12 +853,15 @@ saved_diagnostic::calc_best_epath (epath_finder *pf)
 return false;
 
   gcc_assert (m_best_epath);
-  if (m_stmt == NULL)
+  if (m_loc == UNKNOWN_LOCATION)
 {
-  gcc_assert (m_stmt_finder);
-  m_stmt = m_stmt_finder->find_stmt (*m_best_epath);
+  if (m_stmt == NULL)
+   {
+ gcc_assert (m_stmt_finder);
+ m_stmt = m_stmt_finder->find_stmt (*m_best_epath);
+   }
+  gcc_assert (m_stmt);
 }
-  gcc_assert (m_stmt);
 
   return true;
 }
@@ -1212,9 +1215,9 @@ class dedupe_key
 {
 public:
   dedupe_key (const saved_diagnostic )
-  : m_sd (sd), m_stmt (sd.m_stmt)
+  : m_sd (sd), m_stmt (sd.m_stmt), m_loc (sd.m_loc)
   {
-gcc_assert (m_stmt);
+gcc_assert (m_stmt || m_loc != UNKNOWN_LOCATION);
   }
 
   hashval_t hash () const
@@ -1227,11 +1230,15 @@ public:
   bool operator== (const dedupe_key ) const
   {
 return (m_sd == other.m_sd
-   && m_stmt == other.m_stmt);
+   && m_stmt == other.m_stmt
+   && m_loc == other.m_loc);
   }
 
   location_t get_location () const
   {
+if (m_loc != UNKNOWN_LOCATION)
+  return m_loc;
+gcc_assert (m_stmt);
 return m_stmt->location;
   }
 
@@ -1260,6 +1267,7 @@ public:
 
   const saved_diagnostic _sd;
   const gimple *m_stmt;
+  location_t m_loc;
 };
 
 /* Traits for use by dedupe_winners.  */
@@ -1543,8 +1551,9 @@ diagnostic_manager::emit_saved_diagnostic (const 
exploded_graph ,
 
   emission_path.prepare_for_emission (sd.m_d.get ());
 
-  location_t loc
-= get_emission_location (sd.m_stmt, 

[pushed] analyzer: introduce pending_location

2023-09-15 Thread David Malcolm via Gcc-patches
No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4041-g759a1a52ea615d.

gcc/analyzer/ChangeLog:
* analyzer.h (struct pending_location): New forward decl.
* diagnostic-manager.cc (saved_diagnostic::saved_diagnostic):
Replace params "enode", "snode", "stmt", and "stmt_finder" with
"ploc".
(diagnostic_manager::add_diagnostic): Likewise for both overloads.
* diagnostic-manager.h (saved_diagnostic::saved_diagnostic):
Likewise.
(struct pending_location): New.
(diagnostic_manager::add_diagnostic): Replace params "enode",
"snode", "stmt", and "stmt_finder" with "ploc".
* engine.cc (impl_region_model_context::warn): Update call to
add_diagnostic for above change.
(impl_sm_context::warn): Likewise.
(impl_region_model_context::on_state_leak): Likewise.
* infinite-recursion.cc
(exploded_graph::detect_infinite_recursion): Likewise.
---
 gcc/analyzer/analyzer.h|  1 +
 gcc/analyzer/diagnostic-manager.cc | 44 +-
 gcc/analyzer/diagnostic-manager.h  | 36 ++--
 gcc/analyzer/engine.cc | 28 +--
 gcc/analyzer/infinite-recursion.cc |  6 +++-
 5 files changed, 71 insertions(+), 44 deletions(-)

diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
index 208b85026fc..777293ff4b9 100644
--- a/gcc/analyzer/analyzer.h
+++ b/gcc/analyzer/analyzer.h
@@ -90,6 +90,7 @@ class reachable_regions;
 class bounded_ranges;
 class bounded_ranges_manager;
 
+struct pending_location;
 class pending_diagnostic;
 class pending_note;
 struct event_loc_info;
diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index b3da2a982f2..b652e7032e9 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -664,22 +664,20 @@ epath_finder::dump_feasible_path (const exploded_node 
*target_enode,
 
 /* class saved_diagnostic.  */
 
-/* saved_diagnostic's ctor.
-   Take ownership of D and STMT_FINDER.  */
+/* saved_diagnostic's ctor.  */
 
 saved_diagnostic::saved_diagnostic (const state_machine *sm,
-   const exploded_node *enode,
-   const supernode *snode, const gimple *stmt,
-   const stmt_finder *stmt_finder,
+   const pending_location ,
tree var,
const svalue *sval,
state_machine::state_t state,
std::unique_ptr d,
unsigned idx)
-: m_sm (sm), m_enode (enode), m_snode (snode), m_stmt (stmt),
- /* stmt_finder could be on-stack; we want our own copy that can
-outlive that.  */
-  m_stmt_finder (stmt_finder ? stmt_finder->clone () : NULL),
+: m_sm (sm), m_enode (ploc.m_enode), m_snode (ploc.m_snode),
+  m_stmt (ploc.m_stmt),
+  /* stmt_finder could be on-stack; we want our own copy that can
+ outlive that.  */
+  m_stmt_finder (ploc.m_finder ? ploc.m_finder->clone () : NULL),
   m_var (var), m_sval (sval), m_state (state),
   m_d (std::move (d)), m_trailing_eedge (NULL),
   m_idx (idx),
@@ -1102,9 +1100,7 @@ diagnostic_manager::diagnostic_manager (logger *logger, 
engine *eng,
 
 bool
 diagnostic_manager::add_diagnostic (const state_machine *sm,
-   exploded_node *enode,
-   const supernode *snode, const gimple *stmt,
-   const stmt_finder *finder,
+   const pending_location ,
tree var,
const svalue *sval,
state_machine::state_t state,
@@ -1114,15 +1110,16 @@ diagnostic_manager::add_diagnostic (const state_machine 
*sm,
 
   /* We must have an enode in order to be able to look for paths
  through the exploded_graph to the diagnostic.  */
-  gcc_assert (enode);
+  gcc_assert (ploc.m_enode);
 
   /* If this warning is ultimately going to be rejected by a -Wno-analyzer-*
  flag, reject it now.
  We can only do this for diagnostics where we already know the stmt,
  and thus can determine the emission location.  */
-  if (stmt)
+  if (ploc.m_stmt)
 {
-  location_t loc = get_emission_location (stmt, snode->m_fun, *d);
+  location_t loc
+   = get_emission_location (ploc.m_stmt, ploc.m_snode->m_fun, *d);
   int option = d->get_controlling_option ();
   if (!warning_enabled_at (loc, option))
{
@@ -1135,14 +1132,14 @@ diagnostic_manager::add_diagnostic (const state_machine 
*sm,
 }
 
   saved_diagnostic *sd
-= new saved_diagnostic (sm, enode, snode, stmt, finder, var, sval,
-   state, std::move 

[pushed] analyzer: handle volatile ops

2023-09-15 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4040-g6319b5b2d46690.

gcc/analyzer/ChangeLog:
* region-model.cc (region_model::get_gassign_result): Handle
volatile ops by using a conjured_svalue.

gcc/testsuite/ChangeLog:
* c-c++-common/analyzer/volatile-1.c: New test.
---
 gcc/analyzer/region-model.cc   | 11 +++
 .../c-c++-common/analyzer/volatile-1.c | 18 ++
 2 files changed, 29 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/volatile-1.c

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 2e774c2824e..c4e68661ef1 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -729,6 +729,17 @@ region_model::get_gassign_result (const gassign *assign,
   region_model_context *ctxt)
 {
   tree lhs = gimple_assign_lhs (assign);
+
+  if (gimple_has_volatile_ops (assign)
+  && !gimple_clobber_p (assign))
+{
+  conjured_purge p (this, ctxt);
+  return m_mgr->get_or_create_conjured_svalue (TREE_TYPE (lhs),
+  assign,
+  get_lvalue (lhs, ctxt),
+  p);
+}
+
   tree rhs1 = gimple_assign_rhs1 (assign);
   enum tree_code op = gimple_assign_rhs_code (assign);
   switch (op)
diff --git a/gcc/testsuite/c-c++-common/analyzer/volatile-1.c 
b/gcc/testsuite/c-c++-common/analyzer/volatile-1.c
new file mode 100644
index 000..f8440d41220
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/analyzer/volatile-1.c
@@ -0,0 +1,18 @@
+#include "../../gcc.dg/analyzer/analyzer-decls.h"
+
+volatile int g;
+
+void test_global (void)
+{
+  int v1 = g;
+  int v2 = g;
+  __analyzer_eval (v1 == v2); /* { dg-warning "UNKNOWN" } */
+}
+
+void test_local (void)
+{
+  volatile int x = 0;
+  int v1 = x;
+  int v2 = x;
+  __analyzer_eval (v1 == v2); /* { dg-warning "UNKNOWN" } */
+}
-- 
2.26.3



[pushed] diagnostics: support multithreaded diagnostic paths

2023-09-14 Thread David Malcolm via Gcc-patches
This patch extends the existing diagnostic_path class so that as well
as list of events, there is a list of named threads, with each event
being associated with one of the threads.

No GCC diagnostics take advantage of this, but GCC plugins may find a
use for this; an example is provided in the testsuite.

Given that there is still a single list of events within a
diagnostic_path, the events in a diagnostic_path have a specific global
ordering even if they are in multiple threads.

Within the SARIF serialization, the patch adds the "executionOrder"
property to threadFlowLocation objects (SARIF v2.1.0 3.38.11).  This is
1-based in order to match the human-readable numbering of events shown
in messages emitted by pretty-printer.cc's "%@".

With -fdiagnostics-path-format=separate-events, the threads are not
shown.

With -fdiagnostics-path-format=inline-events, the threads and the
per-thread stack activity are tracked and visalized separately.  An
example can be seen in the testsuite.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4006-g3a1e9f3ed7aa49.

gcc/analyzer/ChangeLog:
* checker-event.h (checker_event::get_thread_id): New.
* checker-path.h (class checker_path): Implement thread-related
vfuncs via a single simple_diagnostic_thread instance named
"main".

gcc/ChangeLog:
* diagnostic-event-id.h (diagnostic_thread_id_t): New typedef.
* diagnostic-format-sarif.cc (class sarif_thread_flow): New.
(sarif_thread_flow::sarif_thread_flow): New.
(sarif_builder::make_code_flow_object): Reimplement, creating
per-thread threadFlow objects, populating them with the relevant
events.
(sarif_builder::make_thread_flow_object): Delete, moving the
code into sarif_builder::make_code_flow_object.
(sarif_builder::make_thread_flow_location_object): Add
"path_event_idx" param.  Use it to set "executionOrder"
property.
* diagnostic-path.h (diagnostic_event::get_thread_id): New
pure-virtual vfunc.
(class diagnostic_thread): New.
(diagnostic_path::num_threads): New pure-virtual vfunc.
(diagnostic_path::get_thread):  New pure-virtual vfunc.
(diagnostic_path::multithreaded_p): New decl.
(simple_diagnostic_event::simple_diagnostic_event): Add optional
thread_id param.
(simple_diagnostic_event::get_thread_id): New accessor.
(simple_diagnostic_event::m_thread_id): New.
(class simple_diagnostic_thread): New.
(simple_diagnostic_path::simple_diagnostic_path): Move definition
to diagnostic.cc.
(simple_diagnostic_path::num_threads): New.
(simple_diagnostic_path::get_thread): New.
(simple_diagnostic_path::add_thread): New.
(simple_diagnostic_path::add_thread_event): New.
(simple_diagnostic_path::m_threads): New.
* diagnostic-show-locus.cc (layout::layout): Add pretty_printer
param for overriding the context's printer.
(diagnostic_show_locus): Likwise.
* diagnostic.cc (simple_diagnostic_path::simple_diagnostic_path):
Move here from diagnostic-path.h.  Add main thread.
(simple_diagnostic_path::num_threads): New.
(simple_diagnostic_path::get_thread): New.
(simple_diagnostic_path::add_thread): New.
(simple_diagnostic_path::add_thread_event): New.
(simple_diagnostic_event::simple_diagnostic_event): Add thread_id
param and use it to initialize m_thread_id.  Reformat.
* diagnostic.h: Add pretty_printer param for overriding the
context's printer.
* tree-diagnostic-path.cc: Add #define INCLUDE_VECTOR.
(can_consolidate_events): Compare thread ids.
(class per_thread_summary): New.
(event_range::event_range): Add per_thread_summary arg.
(event_range::print): Add "pp" param and use it rather than dc's
printer.
(event_range::m_thread_id): New field.
(event_range::m_per_thread_summary): New field.
(path_summary::multithreaded_p): New.
(path_summary::get_events_for_thread_id): New.
(path_summary::m_per_thread_summary): New field.
(path_summary::m_thread_id_to_events): New field.
(path_summary::get_or_create_events_for_thread_id): New.
(path_summary::path_summary): Create per_thread_summary instances
as needed and associate the event_range instances with them.
(base_indent): Move here from print_path_summary_as_text.
(per_frame_indent): Likewise.
(class thread_event_printer): New, adapted from parts of
print_path_summary_as_text.
(print_path_summary_as_text): Make static.  Reimplement to
moving most of existing code to class thread_event_printer,
capturing state as per-thread as appropriate.
(default_tree_diagnostic_path_printer): Add missing 'break' on
final case.


[pushed] analyzer: fix missing return in compatible_epath_p

2023-09-14 Thread David Malcolm via Gcc-patches
On Mon, 2023-09-11 at 10:23 +0200, Andreas Schwab via Gcc-patches wrote:
> ../../gcc/analyzer/diagnostic-manager.cc: In function 'bool 
> ana::compatible_epath_p(const exploded_path*, const exploded_path*)':
> ../../gcc/analyzer/diagnostic-manager.cc:969:1: warning: control reaches end 
> of non-void function [-Wreturn-type]

Sorry about this; should be fixed by the following patch.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4005-g59f6185b59f711.

gcc/analyzer/ChangeLog:
* diagnostic-manager.cc (compatible_epath_p): Fix missing return.
---
 gcc/analyzer/diagnostic-manager.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index a010f4ba1e1..b3da2a982f2 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -966,6 +966,14 @@ compatible_epath_p (const exploded_path *lhs_path,
   /* A superedge was found for only one of the two paths.  */
   return false;
 }
+
+  /* A superedge was found for only one of the two paths.  */
+  if (lhs_eedge_idx >= 0 || rhs_eedge_idx >= 0)
+return false;
+
+  /* Both paths were drained up entirely.
+ No discriminant was found.  */
+  return true;
 }
 
 
-- 
2.26.3



Re: [PATCH] ggc, jit: forcibly clear GTY roots in jit

2023-09-14 Thread David Malcolm via Gcc-patches
On Tue, 2023-09-12 at 15:20 -0400, Antoni Boucher wrote:

FWIW I've pushed the "ggc, jit: forcibly clear GTY roots in jit" to
trunk after retesting it, as r14-4003-geaa8e8541349df.

> I added it to bugzilla here:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111396

I don't yet have a fix for this issue.

Dave


> 
> Since this only reproduces part of the issue, please let me test
> again
> with rustc_codegen_gcc after adding the missing fix.
> 
> I confirmed that the fix is in
> https://github.com/antoyo/gcc/commit/9d5b6b20efa20825926196759d50706a604c64a8
> so you might as well include all of this (except the linetable
> condition in toplev.cc).
> 
> On Tue, 2023-09-12 at 14:38 -0400, David Malcolm wrote:
> > On Tue, 2023-09-12 at 13:36 -0400, Antoni Boucher wrote:
> > > In the mean time, here's a (Rust) reproducer for the issue:
> > > 
> > > fn main() {
> > >     for _ in 0..5 {
> > >     let context = Context::default();
> > >     context.add_command_line_option("-flto");
> > >    
> > > context.set_optimization_level(OptimizationLevel::Aggressive);
> > >     context.add_driver_option("-nostdlib");
> > > 
> > >     let int_type = context.new_type::();
> > > 
> > >     let function = context.new_function(None,
> > > FunctionType::Exported, int_type, &[], "main", false);
> > >     let block = function.new_block("start");
> > >     let value = context.new_rvalue_from_int(int_type, 42);
> > >     block.end_with_return(None, value);
> > > 
> > >     context.compile_to_file(OutputKind::Executable,
> > > "my_exe");
> > >     }
> > > }
> > 
> > Can we get this in bugzilla please?  If you generate a .c version
> > of
> > the context (via gcc_jit_context_dump_reproducer_to_file) I can try
> > to
> > debug it.
> > 
> > Thanks
> > Dave
> > 
> 



[pushed] analyzer: use unique_ptr for rejected_constraint

2023-09-14 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-4004-g8878f7ab1cb9ed.

gcc/analyzer/ChangeLog:
* diagnostic-manager.cc (process_worklist_item): Use
std::unique_ptr rather than plain rejected_constraint *.
* engine.cc (exploded_path::feasible_p): Likewise.
(feasibility_state::maybe_update_for_edge): Likewise.
* exploded-graph.h (feasibility_problem::feasibility_problem):
Likewise.
(feasibility_problem::~feasibility_problem): Delete.
(feasibility_problem::m_rc): Use std::unique_ptr.
(feasibility_state::maybe_update_for_edge): Likewise.
* feasible-graph.cc (feasible_graph::add_feasibility_problem):
Likewise.
* feasible-graph.h (class infeasible_node): Likewise.
(feasible_graph::add_feasibility_problem): Likewise.
* region-model.cc (region_model::add_constraint): Likewise.
(region_model::maybe_update_for_edge): Likewise.
(region_model::apply_constraints_for_gcond): Likewise.
(region_model::apply_constraints_for_gswitch): Likewise.
(region_model::apply_constraints_for_exception): Likewise.
* region-model.h (class region_model): Likewise for decls.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/diagnostic-manager.cc |  4 ++--
 gcc/analyzer/engine.cc | 16 +++---
 gcc/analyzer/exploded-graph.h  |  9 
 gcc/analyzer/feasible-graph.cc |  7 +++---
 gcc/analyzer/feasible-graph.h  |  9 
 gcc/analyzer/region-model.cc   | 35 --
 gcc/analyzer/region-model.h| 10 -
 7 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index 0dd375d99e0..a010f4ba1e1 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -516,7 +516,7 @@ process_worklist_item (feasible_worklist *worklist,
}
 
   feasibility_state succ_state (fnode->get_state ());
-  rejected_constraint *rc = NULL;
+  std::unique_ptr rc;
   if (succ_state.maybe_update_for_edge (logger, succ_eedge, ))
{
  gcc_assert (rc == NULL);
@@ -560,7 +560,7 @@ process_worklist_item (feasible_worklist *worklist,
  gcc_assert (rc);
  fg->add_feasibility_problem (fnode,
   succ_eedge,
-  rc);
+  std::move (rc));
 
  /* Give up if there have been too many infeasible edges.  */
  if (fg->get_num_infeasible ()
diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index 736a41ecdaf..1e7750dcbdc 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -4697,7 +4697,7 @@ exploded_path::feasible_p (logger *logger,
 eedge->m_src->m_index,
 eedge->m_dest->m_index);
 
-  rejected_constraint *rc = NULL;
+  std::unique_ptr  rc;
   if (!state.maybe_update_for_edge (logger, eedge, ))
{
  gcc_assert (rc);
@@ -4707,11 +4707,10 @@ exploded_path::feasible_p (logger *logger,
  const program_point _point = src_enode.get_point ();
  const gimple *last_stmt
= src_point.get_supernode ()->get_last_stmt ();
- *out = make_unique (edge_idx, *eedge,
-  last_stmt, rc);
+ *out = ::make_unique (edge_idx, *eedge,
+last_stmt,
+std::move (rc));
}
- else
-   delete rc;
  return false;
}
 
@@ -4837,9 +4836,10 @@ feasibility_state::feasibility_state (const 
feasibility_state )
Otherwise, return false and write to *OUT_RC.  */
 
 bool
-feasibility_state::maybe_update_for_edge (logger *logger,
- const exploded_edge *eedge,
- rejected_constraint **out_rc)
+feasibility_state::
+maybe_update_for_edge (logger *logger,
+  const exploded_edge *eedge,
+  std::unique_ptr *out_rc)
 {
   const exploded_node _enode = *eedge->m_src;
   const program_point _point = src_enode.get_point ();
diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h
index 6e9a5ef58c7..cb64c2a180a 100644
--- a/gcc/analyzer/exploded-graph.h
+++ b/gcc/analyzer/exploded-graph.h
@@ -949,18 +949,17 @@ public:
   feasibility_problem (unsigned eedge_idx,
   const exploded_edge ,
   const gimple *last_stmt,
-  rejected_constraint *rc)
+  std::unique_ptr rc)
   : m_eedge_idx (eedge_idx), m_eedge (eedge),
-m_last_stmt (last_stmt), m_rc (rc)
+m_last_stmt (last_stmt), m_rc (std::move (rc))
   {}
-  ~feasibility_problem () 

Re: [WIP RFC] analyzer: Move gcc.dg/analyzer tests to c-c++-common (3) [PR96395]

2023-09-13 Thread David Malcolm via Gcc-patches
On Mon, 2023-09-11 at 19:44 +0200, priour...@gmail.com wrote:
> From: benjamin priour 
> 
> Hi,
> 
> Patch below is mostly done, just have to check the formatting.
> Althought, I'd like your feedback on how to manage named_constants
> from enum in C++.
> 
> I've checked and the analyzer works as expected with them.
> However, C++ FE makes it so that given
> 
> enum
> {
>   NAMED = 0x1
> };
> 
> then in analyzer-language.cc:maybe_stash_named_constant
> 
>     tree t = tu.lookup_constant_by_id (id);
>     ...
>     logger->log ("%qs: %qE", name, t);
> 
> t is printed as 1 in C, but NAMED in C++.
> Should it be left as a "FE specifity",
> or should we aim for 1 in C++ as well ?

Thanks for the patch.

It seems that the patch consists of three parts:
(a) adding kf_bzero
(b) refactoring/moving c_translation_unit so it can be used by g++
(c) a whole bunch of tests being moved, some of which may depend on (a)
and (b); are there some that don't?

Given how big the (c) changes look like in a "diff", I'd prefer the (a)
and (b) changes to be split out as preliminaries, for readability.  

Presumably this change could be made part of (a):
* gcc.dg/analyzer/bzero-1.c: Moved to...
* c-c++-common/analyzer/bzero-1.c: ...here.

Does anything in the patch actually use (b)?  IIRC it's used by the
file-descriptor tests, so fd-*.c, pipe-*.c, etc.

As for your question, lookup_constant_by_id should return an
INTEGER_CST (or NULL_TREE), so presumably we want t to be printed as
'1' with both frontends.

Dave



Re: [PATCH] testsuite work-around compound-assignment-1.c C++ failures on various targets [PR111377]

2023-09-13 Thread David Malcolm via Gcc-patches
On Tue, 2023-09-12 at 09:02 +0200, Jakub Jelinek wrote:
> On Mon, Sep 11, 2023 at 11:11:30PM +0200, Jakub Jelinek via Gcc-
> patches wrote:
> > On Mon, Sep 11, 2023 at 07:27:57PM +0200, Benjamin Priour via Gcc-
> > patches wrote:
> > > Thanks for the report,
> > > 
> > > After investigation it seems the location of the new dejagnu
> > > directive for
> > > C++ differs depending on the configuration.
> > > The expected warning is still emitted, but its location differ
> > > slightly.
> > > I expect it to be not an issue per se of the analyzer, but a
> > > divergence in
> > > the FE between the two configurations.
> > 
> > I think the divergence is whether called_by_test_5b returns the
> > struct
> > in registers or in memory.  If in memory (like in the x86_64 -m32
> > case), we have
> >   [compound-assignment-1.c:71:21] D.3191 = called_by_test_5b ();
> > [return slot optimization]
> >   [compound-assignment-1.c:71:21 discrim 1] D.3191 ={v}
> > {CLOBBER(eol)};
> >   [compound-assignment-1.c:72:1] return;
> > in the IL, while if in registers (like x86_64 -m64 case), just
> >   [compound-assignment-1.c:71:21] D.3591 = called_by_test_5b ();
> >   [compound-assignment-1.c:72:1] return;
> > 
> > If you just want to avoid the differences, putting } on the same
> > line as the
> > call might be a usable workaround for that.
> 
> Here is the workaround in patch form.  Tested on x86_64-linux -m32/-
> m64, ok
> for trunk?

Yes, thanks!

Dave

> 
> 2023-09-12  Jakub Jelinek  
> 
> PR testsuite/111377
> * c-c++-common/analyzer/compound-assignment-1.c (test_5b):
> Move
> closing } to the same line as the call to work-around
> differences in
> diagnostics line.
> 
> --- gcc/testsuite/c-c++-common/analyzer/compound-assignment-
> 1.c.jj  2023-09-11 11:05:47.523727789 +0200
> +++ gcc/testsuite/c-c++-common/analyzer/compound-assignment-1.c 2023-
> 09-12 08:58:52.854231161 +0200
> @@ -68,5 +68,8 @@ called_by_test_5b (void)
>  
>  void test_5b (void)
>  {
> -  called_by_test_5b ();
> -} /* { dg-warning "leak of '.ptr_wrapper::ptr'" "" {
> target c++ } } */
> +  called_by_test_5b (); }
> +/* { dg-warning "leak of '.ptr_wrapper::ptr'" "" { target
> c++ } .-1 } */
> +/* The closing } above is intentionally on the same line as the
> call, because
> +   otherwise the exact line of the diagnostics depends on whether
> the
> +   called_by_test_5b () call satisfies aggregate_value_p or not.  */
> 
> 
> Jakub
> 



Re: [PATCH] ggc, jit: forcibly clear GTY roots in jit

2023-09-12 Thread David Malcolm via Gcc-patches
On Tue, 2023-09-12 at 13:36 -0400, Antoni Boucher wrote:
> In the mean time, here's a (Rust) reproducer for the issue:
> 
> fn main() {
>     for _ in 0..5 {
>     let context = Context::default();
>     context.add_command_line_option("-flto");
>    
> context.set_optimization_level(OptimizationLevel::Aggressive);
>     context.add_driver_option("-nostdlib");
> 
>     let int_type = context.new_type::();
> 
>     let function = context.new_function(None,
> FunctionType::Exported, int_type, &[], "main", false);
>     let block = function.new_block("start");
>     let value = context.new_rvalue_from_int(int_type, 42);
>     block.end_with_return(None, value);
> 
>     context.compile_to_file(OutputKind::Executable, "my_exe");
>     }
> }

Can we get this in bugzilla please?  If you generate a .c version of
the context (via gcc_jit_context_dump_reproducer_to_file) I can try to
debug it.

Thanks
Dave



Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]

2023-09-11 Thread David Malcolm via Gcc-patches
On Sun, 2023-09-10 at 22:12 -0400, Eric Feng wrote:
> On Thu, Sep 7, 2023 at 1:28 PM David Malcolm 
> wrote:
> 
> > On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:
> > 

[...snip...]

> > 
> > 
> > I know you're emulating the old behavior I implemented way back in
> > cpychecker, but I don't like that behavior :(
> > 
> > Specifically, although the patch improves the behavior for symbolic
> > values, it regresses the precision of wording for the concrete
> > values
> > case.  If we have e.g. a concrete ob_refcnt of 2, whereas we only
> > have
> > 1 pointer, then it's more readable to say:
> > 
> >   warning: expected ‘obj’ to have reference count: ‘1’ but
> > ob_refcnt
> > field is ‘2’
> > 
> > than:
> > 
> >   warning: expected ‘obj’ to have reference count: N + ‘1’ but
> > ob_refcnt
> > field is N + ‘2’
> > 
> > ...and we shouldn't quote concrete numbers, the message should be:
> > 
> >   warning: expected ‘obj’ to have reference count of 1 but
> > ob_refcnt field
> > is 2
> 
> 
> > or better:
> > 
> >   warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field
> > is 2
> > 
> > 
> > Can you move the unwrapping of the svalue from the tests below into
> > the
> > emit vfunc?  That way the m_actual_refcnt doesn't have to be a
> > constant_svalue; you could have logic in the emit vfunc to print
> > readable messages based on what kind of svalue it is.
> > 
> > Rather than 'N', it might be better to say 'initial'; how about:
> > 
> >   warning: ‘*obj’ is pointed to by 0 additional pointers but
> > 'ob_refcnt'
> > field has increased by 1
> >   warning: ‘*obj’ is pointed to by 1 additional pointer but
> > 'ob_refcnt'
> > field has increased by 2
> >   warning: ‘*obj’ is pointed to by 1 additional pointer but
> > 'ob_refcnt'
> > field is unchanged
> >   warning: ‘*obj’ is pointed to by 2 additional pointers but
> > 'ob_refcnt'
> > field has decreased by 1
> >   warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt'
> > field
> > is unchanged
> > 
> > and similar?
> > 
> 
> That makes sense to me as well (indeed I was just emulating the old
> behavior)! Will experiment and keep you posted on a revised patch
> with this
> in mind.  This is somewhat of a minor detail but can we emit ‘*obj’
> as
> bolded text in the diagnostic message? Currently, I can emit this
> (including the asterisk) like so: '*%E'. But unlike using %qE, it
> doesn't
> bold the body of the single quotations. Is this possible?

Yes.

You could use %< and %> to get the colorized (and localized) quotes
(see pretty-print.cc), but better would probably be to pass a tree for
the *obj, rather than obj.  You can make this by building a MEM_REF
tree node wrapping the pointer (you can see an example of this in the
RK_SYMBOLIC case of region_model::get_representative_path_var_1).

Dave



Re: [PATH] [CLEANUP] Remove trailing whitespace characters

2023-09-11 Thread David Malcolm via Gcc-patches
On Sun, 2023-09-10 at 16:36 +0200, Guillaume Gomez wrote:
> When going through the code, I saw a lot of trailing whitespace
> characters so I decided to write a small script that would remove
> them. I didn't expect there would be so many though... Not sure if
> patch with so many changes are accepted like this or if I should send
> more focused one.

I'm not sure either.

Some notes on the patch:

- the ChangeLog sensibly makes use of "Likewise", but for the initial
file in each ChangeLog it incorrectly also says "Likewise".  When these
are copied into the individual ChangeLog files by the "Daily bump"
cronjob, the Subject line from the commit won't be visible [1], so the
thing that "Likewise" refers to won't be present.  So that initial file
in each category should read "Remove trailing whitespace characters".

- the patch touches the testsuite.  Note that not all source files in
the testsuite are UTF-8 encoded, and we want the testsuite to contain a
variety of source formatting idioms (and examples of badly formatted
source code).

- some of our source files use U+000C, the form feed character, and the
patch eliminates these.  I think this is an old convention used to
indicate a major change of topic within the source file.  Perhaps it
leads to a page break when printing the source file?  Personally I
dislike this convention, and feel a suitable big comment line would be
clearer such as:

/* Name of new topic.

   General comments about the new topic, where useful.  */

for such "high-level" source file organizational bounaries, but perhaps
people like and use the form feed characters?

Hope this is constructive
Dave

[1] see e.g. a134b6ce8e5c589f8c1b58cdf124cd4a916b0e8c


> 
> Anyway, for posterity, here is the python script I used:
> 
> ```
> from os import listdir
> from os.path import isfile, join
> 
> 
> def clean_file(p):
>     if not p.endswith(".cc") and not p.endswith(".h"):
>     return
>     with open(p, 'r', encoding='utf8') as f:
>     content = f.read().split('\n')
>     updated = 0
>     i = 0
>     while i < len(content):
>     s = content[i].rstrip()
>     if s != content[i]:
>     updated += 1
>     content[i] = s
>     i += 1
>     if updated == 0:
>     return
>     with open(p, 'w', encoding='utf8') as f:
>     f.write('\n'.join(content))
> 
> 
> def recur_read(p):
>     for f in listdir(p):
>     full_path = join(p, f)
>     if isfile(full_path):
>     clean_file(full_path)
>     else:
>     recur_read(full_path)
> 
> recur_read(".")
> ```
> 
> Cordially.



Re: [Patch] OpenMP (C only): omp allocate - extend parsing support, improve diagnostic (was: [Patch] OpenMP (C only): omp allocate - handle stack vars, improve diagnostic)

2023-09-11 Thread David Malcolm via Gcc-patches
On Mon, 2023-09-11 at 13:54 +0200, Jakub Jelinek wrote:
> Hi!
> 
> One question to David below, CCed.
> 
> On Mon, Sep 11, 2023 at 01:44:07PM +0200, Tobias Burnus wrote:

[...]

> 
> > +
> > + if (DECL_SOURCE_LOCATION (allocator) >
> > DECL_SOURCE_LOCATION (var))
> > +   {
> > + error_at (OMP_CLAUSE_LOCATION (nl),
> > +   "allocator variable %qD must be declared
> > before %qD",
> > +   allocator, var);
> > + inform (DECL_SOURCE_LOCATION (allocator), "declared
> > here");
> > + inform (DECL_SOURCE_LOCATION (var), "declared here");
> 
> I think this will be confusing to users when the inform is the same
> in both
> cases.  I'd use "allocator declared here" in the first case.
> 
> And, am really not sure if one can just simply compare location_t
> like that.
> Isn't there some function which determines what source location is
> before
> another one?  David?

Indeed, the numerical ordering of location_t values doesn't fully
correspond to declaration order.

Please use
  linemap_compare_locations
or
  linemap_location_before_p

> 
> > +    if (EXPR_LOCATION (*l) < DECL_SOURCE_LOCATION
> > (var))
> > +  break;
> 
> Likewise.
> 


Dave
> 


[pushed] analyzer: basic support for computed gotos (PR analyzer/110529)

2023-09-07 Thread David Malcolm via Gcc-patches
PR analyzer/110529 notes that -fanalyzer was giving up on execution
paths that follow a computed goto, due to ignoring CFG edges with the
flag EDGE_ABNORMAL set.

This patch implements enough handling for them to allow analysis of
such execution paths to continue.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3796-g1b761fede44afa.

gcc/analyzer/ChangeLog:
PR analyzer/110529
* program-point.cc (program_point::on_edge): Don't reject
EDGE_ABNORMAL for computed gotos.
* region-model.cc (region_model::maybe_update_for_edge): Handle
computed goto statements.
(region_model::apply_constraints_for_ggoto): New.
* region-model.h (region_model::apply_constraints_for_ggoto): New decl.
* supergraph.cc (supernode::get_label): New.
* supergraph.h (supernode::get_label): New decl.

gcc/testsuite/ChangeLog:
PR analyzer/110529
* c-c++-common/analyzer/computed-goto-1.c: New test.
* gcc.dg/analyzer/computed-goto-pr110529.c: New test.
---
 gcc/analyzer/program-point.cc | 17 +-
 gcc/analyzer/region-model.cc  | 39 +++-
 gcc/analyzer/region-model.h   |  3 +
 gcc/analyzer/supergraph.cc| 13 
 gcc/analyzer/supergraph.h |  2 +
 .../c-c++-common/analyzer/computed-goto-1.c   | 60 +++
 .../gcc.dg/analyzer/computed-goto-pr110529.c  | 27 +
 7 files changed, 158 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/computed-goto-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/computed-goto-pr110529.c

diff --git a/gcc/analyzer/program-point.cc b/gcc/analyzer/program-point.cc
index f2d6490f0c04..d7db2f522394 100644
--- a/gcc/analyzer/program-point.cc
+++ b/gcc/analyzer/program-point.cc
@@ -426,9 +426,22 @@ program_point::on_edge (exploded_graph ,
   {
const cfg_superedge *cfg_sedge = as_a  (succ);
 
-   /* Reject abnormal edges; we special-case setjmp/longjmp.  */
if (cfg_sedge->get_flags () & EDGE_ABNORMAL)
- return false;
+ {
+   const supernode *src_snode = cfg_sedge->m_src;
+   if (gimple *last_stmt = src_snode->get_last_stmt ())
+ if (last_stmt->code == GIMPLE_GOTO)
+   {
+ /* For the program_point aspect here, consider all
+out-edges from goto stmts to be valid; we'll
+consider state separately.  */
+ return true;
+   }
+
+   /* Reject other kinds of abnormal edges;
+  we special-case setjmp/longjmp.  */
+   return false;
+ }
   }
   break;
 
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 999480e55ef7..a351e5cd214b 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -4997,7 +4997,7 @@ region_model::maybe_update_for_edge (const superedge 
,
   if (last_stmt == NULL)
 return true;
 
-  /* Apply any constraints for conditionals/switch statements.  */
+  /* Apply any constraints for conditionals/switch/computed-goto statements.  
*/
 
   if (const gcond *cond_stmt = dyn_cast  (last_stmt))
 {
@@ -5013,6 +5013,12 @@ region_model::maybe_update_for_edge (const superedge 
,
ctxt, out);
 }
 
+  if (const ggoto *goto_stmt = dyn_cast  (last_stmt))
+{
+  const cfg_superedge *cfg_sedge = as_a  ();
+  return apply_constraints_for_ggoto (*cfg_sedge, goto_stmt, ctxt);
+}
+
   /* Apply any constraints due to an exception being thrown.  */
   if (const cfg_superedge *cfg_sedge = dyn_cast  
())
 if (cfg_sedge->get_flags () & EDGE_EH)
@@ -5267,6 +5273,37 @@ region_model::apply_constraints_for_gswitch (const 
switch_cfg_superedge ,
   return sat;
 }
 
+/* Given an edge reached by GOTO_STMT, determine appropriate constraints
+   for the edge to be taken.
+
+   If they are feasible, add the constraints and return true.
+
+   Return false if the constraints contradict existing knowledge
+   (and so the edge should not be taken).  */
+
+bool
+region_model::apply_constraints_for_ggoto (const cfg_superedge ,
+  const ggoto *goto_stmt,
+  region_model_context *ctxt)
+{
+  tree dest = gimple_goto_dest (goto_stmt);
+  const svalue *dest_sval = get_rvalue (dest, ctxt);
+
+  /* If we know we were jumping to a specific label.  */
+  if (tree dst_label = edge.m_dest->get_label ())
+{
+  const label_region *dst_label_reg
+   = m_mgr->get_region_for_label (dst_label);
+  const svalue *dst_label_ptr
+   = m_mgr->get_ptr_svalue (ptr_type_node, dst_label_reg);
+
+  if (!add_constraint (dest_sval, EQ_EXPR, dst_label_ptr, ctxt))
+   return false;
+}
+
+  return true;
+}
+
 /* Apply any constraints due to an exception being thrown 

[pushed] analyzer: fix -Wunused-parameter warnings

2023-09-07 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3793-g18f1f79ec5b1f1.

gcc/analyzer/ChangeLog:
* region-model.h: fix -Wunused-parameter warnings
---
 gcc/analyzer/region-model.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 625f68805361..1ac3a32b7a41 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -793,8 +793,8 @@ class region_model_context
 class noop_region_model_context : public region_model_context
 {
 public:
-  bool warn (std::unique_ptr d,
-const stmt_finder *custom_finder) override { return false; }
+  bool warn (std::unique_ptr,
+const stmt_finder *) override { return false; }
   void add_note (std::unique_ptr) override;
   void add_event (std::unique_ptr) override;
   void on_svalue_leak (const svalue *) override {}
@@ -1200,7 +1200,7 @@ class test_region_model_context : public 
noop_region_model_context
 {
 public:
   bool warn (std::unique_ptr d,
-const stmt_finder *custom_finder) final override
+const stmt_finder *) final override
   {
 m_diagnostics.safe_push (d.release ());
 return true;
-- 
2.26.3



Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]

2023-09-07 Thread David Malcolm via Gcc-patches
On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:

> Hi Dave,

Hi Eric, thanks for the patch.

> 
> Recently I've been working on symbolic value support for the reference
> count checker. I've attached a patch for it below; let me know it looks
> OK for trunk. Thanks!
> 
> Best,
> Eric
> 
> ---
> 
> This patch enhances the reference count checker in the CPython plugin by
> adding support for symbolic values. Whereas previously we were only able
> to check the reference count of PyObject* objects created in the scope
> of the function; we are now able to emit diagnostics on reference count
> mismatch of objects that were, for example, passed in as a function
> parameter.
> 
> rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but 
> ob_refcnt field is N + ‘2’
> 6 |   return obj;
>   |  ^~~

[...snip...]

>  create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> 
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c 
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> index bf1982e79c3..d7ecd7fce09 100644
> --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -314,17 +314,20 @@ public:
>{
>  diagnostic_metadata m;
>  bool warned;
> -// just assuming constants for now
> -auto actual_refcnt
> - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> -auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant 
> ();
> -warned = warning_meta (rich_loc, m, get_controlling_option (),
> -"expected %qE to have "
> -"reference count: %qE but ob_refcnt field is: %qE",
> -m_reg_tree, actual_refcnt, ob_refcnt);
> -
> -// location_t loc = rich_loc->get_loc ();
> -// foo (loc);
> +
> +const auto *actual_refcnt_constant
> + = m_actual_refcnt->dyn_cast_constant_svalue ();
> +const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue 
> ();
> +if (!actual_refcnt_constant || !ob_refcnt_constant)
> +  return false;
> +
> +auto actual_refcnt = actual_refcnt_constant->get_constant ();
> +auto ob_refcnt = ob_refcnt_constant->get_constant ();
> +warned = warning_meta (
> + rich_loc, m, get_controlling_option (),
> + "expected %qE to have "
> + "reference count: N + %qE but ob_refcnt field is N + %qE",
> + m_reg_tree, actual_refcnt, ob_refcnt);
>  return warned;

I know you're emulating the old behavior I implemented way back in
cpychecker, but I don't like that behavior :(

Specifically, although the patch improves the behavior for symbolic
values, it regresses the precision of wording for the concrete values
case.  If we have e.g. a concrete ob_refcnt of 2, whereas we only have
1 pointer, then it's more readable to say:

  warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt
field is ‘2’

than:

  warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field 
is N + ‘2’

...and we shouldn't quote concrete numbers, the message should be:

  warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field is 2

or better:

  warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2


Can you move the unwrapping of the svalue from the tests below into the
emit vfunc?  That way the m_actual_refcnt doesn't have to be a
constant_svalue; you could have logic in the emit vfunc to print
readable messages based on what kind of svalue it is.

Rather than 'N', it might be better to say 'initial'; how about:

  warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt' field 
has increased by 1
  warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field 
has increased by 2
  warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field 
is unchanged
  warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt' field 
has decreased by 1
  warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field is 
unchanged

and similar?

Maybe have a flag that tracks whether we're talking about a concrete
value that's absolute versus a concrete value that's relative to the
initial value?


[...snip...]


> @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map 
> , const region *key)
>refcnt = existed ? refcnt + 1 : 1;
>  }
>  
> +static const region *
> +get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
> +{
> +  const auto *region_sval = sval->dyn_cast_region_svalue ();
> +  if (region_sval)
> +return region_sval->get_pointee ();
> +
> +  const auto *initial_sval = sval->dyn_cast_initial_svalue ();
> +  if (initial_sval)
> +return mgr->get_symbolic_region (initial_sval);
> +
> +  return nullptr;
> +}

This is dereferencing a pointer, right?

Can the caller use region_model::deref_rvalue instead?


[...snip...]

> +static void
> +unwrap_any_ob_refcnt_sval 

Re: [PATCH v2] analyzer: Call off a superseding when diagnostics are unrelated [PR110830]

2023-09-06 Thread David Malcolm via Gcc-patches
On Wed, 2023-09-06 at 21:16 +0200, priour...@gmail.com wrote:

[...snip...]

> Signed-off-by: benjamin priour 
> Co-authored-by: david malcolm 

Please also add:

  Signed-off-by: David Malcolm 

[...snip...]

> 
> +static bool
> +compatible_epath_p (const exploded_path *lhs_path,
> +   const exploded_path *rhs_path)
> +{
> +  gcc_assert (lhs_path);
> +  gcc_assert (rhs_path);
> +  gcc_assert (rhs_path->length () > 0);
> +  gcc_assert (rhs_path->length () > 0);
> +  int lhs_eedge_idx = lhs_path->length () -1;
> +  int rhs_eedge_idx = rhs_path->length () -1;

Minor formatting nit: there should be a space between the '-' and the
'1' in the above lines, hence:

  int lhs_eedge_idx = lhs_path->length () - 1;
  int rhs_eedge_idx = rhs_path->length () - 1;

[...snip...]

OK for trunk with those changes

Thanks
Dave



Re: [PATCH] analyzer: Move gcc.dg/analyzer tests to c-c++-common (2) [PR96395]

2023-09-06 Thread David Malcolm via Gcc-patches
On Wed, 2023-09-06 at 15:50 +0200, Benjamin Priour wrote:
> Hi David,
> Thanks for the review.
> 
> 
> 
> On Tue, Sep 5, 2023 at 1:53 PM David Malcolm 
> wrote:
> 
> > On Mon, 2023-09-04 at 20:00 +0200, priour...@gmail.com wrote:
> > 
> > 
> [...snip...]
> 
> 
> > All of these "new" tests (apart from the "-noexcept" ones) look
> > like
> > they're meant to be existing tests that were moved, but where the
> > copy
> > of the test in gcc.dg/analyzer didn't get deleted, so they show up
> > as a
> > duplicate.  See the details below.
> > 
> 
> > >   * c-c++-common/analyzer/file-pr58237-noexcept.c: New test.
> > 
> > When duplicating a test like this, the test isn't entirely "new",
> > so
> > please say something like this in the ChangeLog entry, to make it
> > clear
> > where it came from:
> > 
> > 
> I actually wasn't sure about these -noexcept tests. They were part
> of gcc.dg/analyzer, thus only gcc was running them. Exceptions
> were not disabled *explicitly*, but since it was C, they weren't
> enabled
> either.
> 
> Therefore, the -noexcept tests are basically a copy, but with an
> explicit
> -fno-exceptions specification.
> When I duplicated them in that way I was thinking about making it
> clear
> that these tests fail in C++ with exceptions enabled, so that we
> would
> already
> have easy-to-spot failing tests to challenge a future exception
> support.

Ah, OK; let's go with your approach.

> 
> Though perhaps *not* duplicating the tests but rather simply specify
> -fno-exceptions,
> with a comment "Fails with exceptions" may be better.

[...snip...]

> > > @@ -45,7 +45,7 @@ void test(int n)
> > >    struct iter *it = iter_new (0, n, 1);
> > >    while (!iter_done_p (it))
> > >  {
> > > -  __analyzer_eval (it->val < n); /* { dg-warning "TRUE"
> > > "true" {
> > xfail *-*-* } } */
> > > +  __analyzer_eval (it->val < n); /* { dg-warning "TRUE"
> > > "true" } */
> > >    /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
> > >    /* TODO(xfail^^^): ideally we ought to figure out i > 0
> > > after 1st
> > iteration.  */
> > > 
> > 
> > Presumably due to the change to
> > region_model::add_constraints_from_binop, right?
> > Looking at that dg-bogus "UNKNOWN", do we still get an UNKNOWN
> > here, or
> > can that line be removed?
> > If so, then the 3rd comment can presumably become:
> > 
> > 
> The bogus here still make sense - without it there is an excess error
> -.
> I had checked for it because I too thought it could be removed.
> If I remember it correctly, we get UNKNOWN during the widening pass.

(nods)

> 
> 
> > >    /* TODO: ideally we ought to figure out i > 0 after 1st
> > iteration.  */
> > 
> > [...snip...]
> > 
> > 
> > 
> [...snip...]
> 
> Thanks for spotting the files I forgot to remove from gcc.dg.
> Sorry about them, I had messed up my test folder when checking for
> arm-eabi,
> and I apparently missed some duplicates when retrieving my save.
> 
> As for the files the likes of inlining-*.c, i.e. noted as Moved
> to/...here.
> at the end of the ChangeLog, some tests checking for multiline
> outputs
> are so heavily rewritten than git marks them as Removed/New test
> instead of moved. I've manually edited that, but perhaps I shouldn't
> ?
> 
> I have successfully regstrapped the improvements you suggested.

Thanks.  Did you want me to doublecheck the updated patch?  Otherwise
feel free to push it to trunk.


> Part 3 of this serie of patches I hope will be regstrapped for
> Friday.

Thanks; I'm impressed at how much progress you've made on this problem.

Dave



Re: [PATCH 2/2] Experiment with adding an error code to an error

2023-09-06 Thread David Malcolm via Gcc-patches
On Wed, 2023-09-06 at 15:53 +0200, Arthur Cohen wrote:
> From: David Malcolm 

This is probably something for the gcc-rust maintainers to review
(rather than me self-reviewing with my "diagnostics maintainer" hat
on).

Doesn't have a ChangeLog entry, FWIW.
Doesn't have a signed-off-by, so here's one:

Signed-off-by: David Malcolm 

[...snip...]

> diff --git a/gcc/rust/rust-gcc-diagnostics.cc b/gcc/rust/rust-gcc-
> diagnostics.cc
> index 72d2c068541..58c0a5654ea 100644
> --- a/gcc/rust/rust-gcc-diagnostics.cc
> +++ b/gcc/rust/rust-gcc-diagnostics.cc

[...snip...]

> +void
> +rust_be_error_at (const RichLocation , const ErrorCode
> code,
> + const std::string )
> +{
> +  /* TODO: 'error_at' would like a non-'const' 'rich_location *'. 

The above comment should refer to "error_meta", rather than
"error_at"...

> */
> +  rich_location _loc = const_cast (location.get
> ());
> +  diagnostic_metadata m;
> +  rust_error_code_rule rule (code);
> +  m.add_rule (rule);
> +  error_meta (_loc, m, "%s", errmsg.c_str ());

... to match this call.

[...snip...]

Otherwise, LGTM, but as I said, this is more in the gcc-rust
maintainers' area.

Dave



Re: [PATCH 1/2] diagnostics: add error_meta

2023-09-06 Thread David Malcolm via Gcc-patches
On Wed, 2023-09-06 at 15:53 +0200, Arthur Cohen wrote:
> From: David Malcolm 

I guess I can review this patch :)

Needs a ChangeLog entry, so here's one:

gcc/ChangeLog
* diagnostic-core.h (error_meta): New decl.
* diagnostic.cc (error_meta): New.

Also, needs a signed-off-by, so here's one:

Signed-off-by: David Malcolm 


OK for trunk with those added.

Thanks
Dave



> 
> ---
>  gcc/diagnostic-core.h |  3 +++
>  gcc/diagnostic.cc | 15 +++
>  2 files changed, 18 insertions(+)
> 
> diff --git a/gcc/diagnostic-core.h b/gcc/diagnostic-core.h
> index 7334c79e8e6..c9e27fd2e6e 100644
> --- a/gcc/diagnostic-core.h
> +++ b/gcc/diagnostic-core.h
> @@ -92,6 +92,9 @@ extern void error_n (location_t, unsigned
> HOST_WIDE_INT, const char *,
>  extern void error_at (location_t, const char *, ...)
> ATTRIBUTE_GCC_DIAG(2,3);
>  extern void error_at (rich_location *, const char *, ...)
>    ATTRIBUTE_GCC_DIAG(2,3);
> +extern void error_meta (rich_location *, const diagnostic_metadata
> &,
> +   const char *, ...)
> +  ATTRIBUTE_GCC_DIAG(3,4);
>  extern void fatal_error (location_t, const char *, ...)
> ATTRIBUTE_GCC_DIAG(2,3)
>   ATTRIBUTE_NORETURN;
>  /* Pass one of the OPT_W* from options.h as the second parameter. 
> */
> diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
> index c523f215bae..65c0cfbf11a 100644
> --- a/gcc/diagnostic.cc
> +++ b/gcc/diagnostic.cc
> @@ -2108,6 +2108,21 @@ error_at (rich_location *richloc, const char
> *gmsgid, ...)
>    va_end (ap);
>  }
>  
> +/* Same as above, but with metadata.  */
> +
> +void
> +error_meta (rich_location *richloc, const diagnostic_metadata
> ,
> +   const char *gmsgid, ...)
> +{
> +  gcc_assert (richloc);
> +
> +  auto_diagnostic_group d;
> +  va_list ap;
> +  va_start (ap, gmsgid);
> +  diagnostic_impl (richloc, , -1, gmsgid, , DK_ERROR);
> +  va_end (ap);
> +}
> +
>  /* "Sorry, not implemented."  Use for a language feature which is
>     required by the relevant specification but not implemented by
> GCC.
>     An object file will not be produced.  */



[pushed] analyzer: implement kf_strstr [PR105899]

2023-09-06 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3741-gf2d7a4001a3388.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* kf.cc (class kf_strstr): New.
(kf_strstr::impl_call_post): New.
(register_known_functions): Register it.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* c-c++-common/analyzer/strstr-1.c: New test.
---
 gcc/analyzer/kf.cc| 96 +++
 .../c-c++-common/analyzer/strstr-1.c  | 54 +++
 2 files changed, 150 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/strstr-1.c

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 8a45c329c282..92959891fe44 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1585,6 +1585,100 @@ public:
   }
 };
 
+/* Handler for "strstr" and "__builtin_strstr".
+ extern char *strstr (const char* str, const char* substr);
+   See e.g. https://en.cppreference.com/w/c/string/byte/strstr  */
+
+class kf_strstr : public builtin_known_function
+{
+public:
+  bool matches_call_types_p (const call_details ) const final override
+  {
+return (cd.num_args () == 2
+   && cd.arg_is_pointer_p (0)
+   && cd.arg_is_pointer_p (1));
+  }
+  enum built_in_function builtin_code () const final override
+  {
+return BUILT_IN_STRSTR;
+  }
+  void impl_call_pre (const call_details ) const final override
+  {
+cd.check_for_null_terminated_string_arg (0);
+cd.check_for_null_terminated_string_arg (1);
+  }
+  void impl_call_post (const call_details ) const final override;
+};
+
+void
+kf_strstr::impl_call_post (const call_details ) const
+{
+  class strstr_call_info : public call_info
+  {
+  public:
+strstr_call_info (const call_details , bool found)
+: call_info (cd), m_found (found)
+{
+}
+
+label_text get_desc (bool can_colorize) const final override
+{
+  if (m_found)
+   return make_label_text (can_colorize,
+   "when %qE returns non-NULL",
+   get_fndecl ());
+  else
+   return make_label_text (can_colorize,
+   "when %qE returns NULL",
+   get_fndecl ());
+}
+
+bool update_model (region_model *model,
+  const exploded_edge *,
+  region_model_context *ctxt) const final override
+{
+  const call_details cd (get_call_details (model, ctxt));
+  if (tree lhs_type = cd.get_lhs_type ())
+   {
+ region_model_manager *mgr = model->get_manager ();
+ const svalue *result;
+ if (m_found)
+   {
+ const svalue *str_sval = cd.get_arg_svalue (0);
+ const region *str_reg
+   = model->deref_rvalue (str_sval, cd.get_arg_tree (0),
+  cd.get_ctxt ());
+ /* We want str_sval + OFFSET for some unknown OFFSET.
+Use a conjured_svalue to represent the offset,
+using the str_reg as the id of the conjured_svalue.  */
+ const svalue *offset
+   = mgr->get_or_create_conjured_svalue (size_type_node,
+ cd.get_call_stmt (),
+ str_reg,
+ conjured_purge (model,
+ ctxt));
+ result = mgr->get_or_create_binop (lhs_type, POINTER_PLUS_EXPR,
+str_sval, offset);
+   }
+ else
+   result = mgr->get_or_create_int_cst (lhs_type, 0);
+ cd.maybe_set_lhs (result);
+   }
+  return true;
+}
+  private:
+bool m_found;
+  };
+
+  /* Body of kf_strstr::impl_call_post.  */
+  if (cd.get_ctxt ())
+{
+  cd.get_ctxt ()->bifurcate (make_unique (cd, false));
+  cd.get_ctxt ()->bifurcate (make_unique (cd, true));
+  cd.get_ctxt ()->terminate_path ();
+}
+}
+
 class kf_ubsan_bounds : public internal_known_function
 {
   /* Empty.  */
@@ -1806,6 +1900,8 @@ register_known_functions (known_function_manager )
 kfm.add ("__builtin_strndup", make_unique ());
 kfm.add ("strlen", make_unique ());
 kfm.add ("__builtin_strlen", make_unique ());
+kfm.add ("strstr", make_unique ());
+kfm.add ("__builtin_strstr", make_unique ());
 
 register_atomic_builtins (kfm);
 register_varargs_builtins (kfm);
diff --git a/gcc/testsuite/c-c++-common/analyzer/strstr-1.c 
b/gcc/testsuite/c-c++-common/analyzer/strstr-1.c
new file mode 100644
index ..469e6a817d0d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/analyzer/strstr-1.c
@@ -0,0 +1,54 @@
+/* See e.g. https://en.cppreference.com/w/c/string/byte/strstr  */
+
+/* { dg-additional-options "-fpermissive" { target c++ } } */
+
+#include 

[pushed] analyzer: implement kf_strncpy [PR105899]

2023-09-06 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3740-gb51cde34d4e750.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* kf.cc (class kf_strncpy): New.
(kf_strncpy::impl_call_post): New.
(register_known_functions): Register it.
* region-model.cc (region_model::read_bytes): Handle unknown
number of bytes.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* c-c++-common/analyzer/null-terminated-strings-2.c: New test.
* c-c++-common/analyzer/overlapping-buffers.c: Update dg-bogus
directives to avoid clashing with note from  that might
happen to have the same line number.  Add strpncpy test coverage.
* c-c++-common/analyzer/strncpy-1.c: New test.
* gcc.dg/analyzer/null-terminated-strings-1.c
(test_filled_nonzero): New.
(void test_filled_zero): New.
(test_filled_symbolic): New.
---
 gcc/analyzer/kf.cc| 182 ++
 gcc/analyzer/region-model.cc  |   2 +
 .../analyzer/null-terminated-strings-2.c  |  17 ++
 .../analyzer/overlapping-buffers.c|  24 ++-
 .../c-c++-common/analyzer/strncpy-1.c | 157 +++
 .../analyzer/null-terminated-strings-1.c  |  24 +++
 6 files changed, 398 insertions(+), 8 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/analyzer/null-terminated-strings-2.c
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/strncpy-1.c

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index a62227729991..8a45c329c282 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1375,6 +1375,186 @@ make_kf_strlen ()
   return make_unique ();
 }
 
+/* Handler for "strncpy" and "__builtin_strncpy".
+   See e.g. https://en.cppreference.com/w/c/string/byte/strncpy
+
+ extern char *strncpy (char *dst, const char *src, size_t count);
+
+   Handle this by splitting into two outcomes:
+   (a) truncated read from "src" of "count" bytes,
+   writing "count" bytes to "dst"
+   (b) read from "src" of up to (and including) the null terminator,
+   where the number of bytes read < "count" bytes,
+   writing those bytes to "dst", and zero-filling the rest,
+   up to "count".  */
+
+class kf_strncpy : public builtin_known_function
+{
+public:
+  bool matches_call_types_p (const call_details ) const final override
+  {
+return (cd.num_args () == 3
+   && cd.arg_is_pointer_p (0)
+   && cd.arg_is_pointer_p (1)
+   && cd.arg_is_integral_p (2));
+  }
+  enum built_in_function builtin_code () const final override
+  {
+return BUILT_IN_STRNCPY;
+  }
+  void impl_call_post (const call_details ) const final override;
+};
+
+void
+kf_strncpy::impl_call_post (const call_details ) const
+{
+  class strncpy_call_info : public call_info
+  {
+  public:
+strncpy_call_info (const call_details ,
+  const svalue *num_bytes_with_terminator_sval,
+  bool truncated_read)
+: call_info (cd),
+  m_num_bytes_with_terminator_sval (num_bytes_with_terminator_sval),
+  m_truncated_read (truncated_read)
+{
+}
+
+label_text get_desc (bool can_colorize) const final override
+{
+  if (m_truncated_read)
+   return make_label_text (can_colorize,
+   "when %qE truncates the source string",
+   get_fndecl ());
+  else
+   return make_label_text (can_colorize,
+   "when %qE copies the full source string",
+   get_fndecl ());
+}
+
+bool update_model (region_model *model,
+  const exploded_edge *,
+  region_model_context *ctxt) const final override
+{
+  const call_details cd (get_call_details (model, ctxt));
+
+  const svalue *dest_sval = cd.get_arg_svalue (0);
+  const region *dest_reg
+   = model->deref_rvalue (dest_sval, cd.get_arg_tree (0), ctxt);
+
+  const svalue *src_sval = cd.get_arg_svalue (1);
+  const region *src_reg
+   = model->deref_rvalue (src_sval, cd.get_arg_tree (1), ctxt);
+
+  const svalue *count_sval = cd.get_arg_svalue (2);
+
+  /* strncpy returns the initial param.  */
+  cd.maybe_set_lhs (dest_sval);
+
+  const svalue *num_bytes_read_sval;
+  if (m_truncated_read)
+   {
+ /* Truncated read.  */
+ num_bytes_read_sval = count_sval;
+
+ if (m_num_bytes_with_terminator_sval)
+   {
+ /* The terminator is after the limit.  */
+ if (!model->add_constraint (m_num_bytes_with_terminator_sval,
+ GT_EXPR,
+ count_sval,
+ ctxt))
+   return false;
+   }
+ else
+   {
+ /* We don't know where the terminator is, or if there is 

[pushed] analyzer: add ctxt to fill_region/zero_fill_region

2023-09-06 Thread David Malcolm via Gcc-patches
I noticed that region_model's fill_region/zero_fill_region member
functions weren't checking that the write to the region was valid.

Fixed thusly.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3739-gb923978a6ec447.

gcc/analyzer/ChangeLog:
* kf.cc (kf_calloc::impl_call_pre): Pass ctxt to zero_fill_region.
(kf_memset::impl_call_pre): Move responsibility for calling
check_region_for_write to fill_region.
* region-model.cc (region_model::on_assignment): Pass ctxt to
zero_fill_region.
(region_model::fill_region): Add "ctxt" param, using it to call
check_region_for_write.
(region_model::zero_fill_region): Likewise.
* region-model.h (region_model::fill_region): Add "ctxt" param.
(region_model::zero_fill_region): Likewise.

gcc/testsuite/ChangeLog:
* gcc.dg/plugin/analyzer_cpython_plugin.c: Pass ctxt to
zero_fill_region.
---
 gcc/analyzer/kf.cc|  7 ++-
 gcc/analyzer/region-model.cc  | 19 ++-
 gcc/analyzer/region-model.h   |  7 +--
 .../gcc.dg/plugin/analyzer_cpython_plugin.c   |  2 +-
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index e5bd7459f271..a62227729991 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -358,7 +358,7 @@ kf_calloc::impl_call_pre (const call_details ) const
 = model->get_or_create_region_for_heap_alloc (prod_sval, cd.get_ctxt ());
   const region *sized_reg
 = mgr->get_sized_region (new_reg, NULL_TREE, prod_sval);
-  model->zero_fill_region (sized_reg);
+  model->zero_fill_region (sized_reg, cd.get_ctxt ());
   if (cd.get_lhs_type ())
 {
   const svalue *ptr_sval
@@ -650,10 +650,7 @@ kf_memset::impl_call_pre (const call_details ) const
   const region *sized_dest_reg = mgr->get_sized_region (dest_reg,
NULL_TREE,
num_bytes_sval);
-  model->check_region_for_write (sized_dest_reg,
-nullptr,
-cd.get_ctxt ());
-  model->fill_region (sized_dest_reg, fill_value_u8);
+  model->fill_region (sized_dest_reg, fill_value_u8, cd.get_ctxt ());
 
   cd.maybe_set_lhs (dest_sval);
 }
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 82bc3b2c3826..6be0ad72aaae 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -1204,7 +1204,7 @@ region_model::on_assignment (const gassign *assign, 
region_model_context *ctxt)
/* Any CONSTRUCTOR that survives to this point is either
   just a zero-init of everything, or a vector.  */
if (!CONSTRUCTOR_NO_CLEARING (rhs1))
- zero_fill_region (lhs_reg);
+ zero_fill_region (lhs_reg, ctxt);
unsigned ix;
tree index;
tree val;
@@ -3929,19 +3929,28 @@ region_model::purge_region (const region *reg)
   m_store.purge_region (m_mgr->get_store_manager(), reg);
 }
 
-/* Fill REG with SVAL.  */
+/* Fill REG with SVAL.
+   Use CTXT to report any warnings associated with the write
+   (e.g. out-of-bounds).  */
 
 void
-region_model::fill_region (const region *reg, const svalue *sval)
+region_model::fill_region (const region *reg,
+  const svalue *sval,
+  region_model_context *ctxt)
 {
+  check_region_for_write (reg, nullptr, ctxt);
   m_store.fill_region (m_mgr->get_store_manager(), reg, sval);
 }
 
-/* Zero-fill REG.  */
+/* Zero-fill REG.
+   Use CTXT to report any warnings associated with the write
+   (e.g. out-of-bounds).  */
 
 void
-region_model::zero_fill_region (const region *reg)
+region_model::zero_fill_region (const region *reg,
+   region_model_context *ctxt)
 {
+  check_region_for_write (reg, nullptr, ctxt);
   m_store.zero_fill_region (m_mgr->get_store_manager(), reg);
 }
 
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index bb50ff12b12e..625f68805361 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -370,8 +370,11 @@ class region_model
   void set_value (tree lhs, tree rhs, region_model_context *ctxt);
   void clobber_region (const region *reg);
   void purge_region (const region *reg);
-  void fill_region (const region *reg, const svalue *sval);
-  void zero_fill_region (const region *reg);
+  void fill_region (const region *reg,
+   const svalue *sval,
+   region_model_context *ctxt);
+  void zero_fill_region (const region *reg,
+region_model_context *ctxt);
   void write_bytes (const region *dest_reg,
const svalue *num_bytes_sval,
const svalue *sval,
diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c 

[PATCH] ggc, jit: forcibly clear GTY roots in jit

2023-09-06 Thread David Malcolm via Gcc-patches
As part of Antoyo's work on supporting LTO in rustc_codegen_gcc, he
noticed an ICE inside libgccjit when compiling certain rust files.

Debugging libgccjit showed that outdated information from a previous
in-memory compile was referring to ad-hoc locations in the previous
compile's line_table.

The issue turned out to be the function decls in internal_fn_fnspec_array
from the previous compile keeping alive the symtab nodes for these
functions, and from this finding other functions in the previous
compile, walking their CFGs, and finding ad-hoc data pointers in an edge
with a location_t using ad-hoc data from the previous line_table
instance, and thus a use-after-free ICE attempting to use this ad-hoc
data.

Previously in toplev::finalize we've fixed global state "piecemeal" by
calling out to individual source_name_cc_finalize functions.  However,
it occurred to me that we have run-time information on where the
GTY-marked pointers are.

Hence this patch takes something of a "big hammer" approach by adding a
new ggc_common_finalize that walks the GC roots, zeroing all of the
pointers.  I stepped through this in the debugger and observed that, in
particular, this correctly zeroes the internal_fn_fnspec_array at the end
of a libgccjit compile.  Antoyo reports that this fixes the ICE for him.
Doing so uncovered an ICE with libgccjit in dwarf2cfi.cc due to reuse of
global variables from the previous compile, which this patch also fixes.

I noticed that in ggc_mark_roots when clearing deletable roots we only
clear the initial element in each gcc_root_tab_t.  This looks like a
latent bug to me, which the patch fixes.  That said, there don't seem to
be any deletable roots where the number of elements != 1.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

Thanks
Dave

gcc/ChangeLog:
* dwarf2cfi.cc (dwarf2cfi_cc_finalize): New.
* dwarf2out.h (dwarf2cfi_cc_finalize): New decl.
* ggc-common.cc (ggc_mark_roots): Multiply by rti->nelt when
clearing the deletable gcc_root_tab_t.
(ggc_common_finalize): New.
* ggc.h (ggc_common_finalize): New decl.
* toplev.cc (toplev::finalize): Call dwarf2cfi_cc_finalize and
ggc_common_finalize.
---
 gcc/dwarf2cfi.cc  |  9 +
 gcc/dwarf2out.h   |  1 +
 gcc/ggc-common.cc | 23 ++-
 gcc/ggc.h |  2 ++
 gcc/toplev.cc |  3 +++
 5 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index ddc728f4ad00..f1777c0a4cf1 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -3822,4 +3822,13 @@ make_pass_dwarf2_frame (gcc::context *ctxt)
   return new pass_dwarf2_frame (ctxt);
 }
 
+void dwarf2cfi_cc_finalize ()
+{
+  add_cfi_insn = NULL;
+  add_cfi_vec = NULL;
+  cur_trace = NULL;
+  cur_row = NULL;
+  cur_cfa = NULL;
+}
+
 #include "gt-dwarf2cfi.h"
diff --git a/gcc/dwarf2out.h b/gcc/dwarf2out.h
index 870b56a6a372..61a996050ff9 100644
--- a/gcc/dwarf2out.h
+++ b/gcc/dwarf2out.h
@@ -419,6 +419,7 @@ struct fixed_point_type_info
 } scale_factor;
 };
 
+void dwarf2cfi_cc_finalize (void);
 void dwarf2out_cc_finalize (void);
 
 /* Some DWARF internals are exposed for the needs of DWARF-based debug
diff --git a/gcc/ggc-common.cc b/gcc/ggc-common.cc
index bed7a9d4d021..95803fa95a17 100644
--- a/gcc/ggc-common.cc
+++ b/gcc/ggc-common.cc
@@ -86,7 +86,7 @@ ggc_mark_roots (void)
 
   for (rt = gt_ggc_deletable_rtab; *rt; rt++)
 for (rti = *rt; rti->base != NULL; rti++)
-  memset (rti->base, 0, rti->stride);
+  memset (rti->base, 0, rti->stride * rti->nelt);
 
   for (rt = gt_ggc_rtab; *rt; rt++)
 ggc_mark_root_tab (*rt);
@@ -1293,3 +1293,24 @@ report_heap_memory_use ()
 SIZE_AMOUNT (MALLINFO_FN ().arena));
 #endif
 }
+
+/* Forcibly clear all GTY roots.  */
+
+void
+ggc_common_finalize ()
+{
+  const struct ggc_root_tab *const *rt;
+  const_ggc_root_tab_t rti;
+
+  for (rt = gt_ggc_deletable_rtab; *rt; rt++)
+for (rti = *rt; rti->base != NULL; rti++)
+  memset (rti->base, 0, rti->stride * rti->nelt);
+
+  for (rt = gt_ggc_rtab; *rt; rt++)
+for (rti = *rt; rti->base != NULL; rti++)
+  memset (rti->base, 0, rti->stride * rti->nelt);
+
+  for (rt = gt_pch_scalar_rtab; *rt; rt++)
+for (rti = *rt; rti->base != NULL; rti++)
+  memset (rti->base, 0, rti->stride * rti->nelt);
+}
diff --git a/gcc/ggc.h b/gcc/ggc.h
index 34108e2f0061..3280314f8481 100644
--- a/gcc/ggc.h
+++ b/gcc/ggc.h
@@ -368,4 +368,6 @@ inline void gt_ggc_mx (unsigned long int) { }
 inline void gt_ggc_mx (long long int) { }
 inline void gt_ggc_mx (unsigned long long int) { }
 
+extern void ggc_common_finalize ();
+
 #endif
diff --git a/gcc/toplev.cc b/gcc/toplev.cc
index 6c1a6f443c16..db62e3e995ec 100644
--- a/gcc/toplev.cc
+++ b/gcc/toplev.cc
@@ -2336,6 +2336,7 @@ toplev::finalize (void)
   cgraph_cc_finalize ();
   cgraphunit_cc_finalize ();
   symtab_thunks_cc_finalize ();
+  dwarf2cfi_cc_finalize ();
   

Re: [PATCH] analyzer: Move gcc.dg/analyzer tests to c-c++-common (2) [PR96395]

2023-09-05 Thread David Malcolm via Gcc-patches
On Mon, 2023-09-04 at 20:00 +0200, priour...@gmail.com wrote:


> Hi,
> 
> The second patch of this serie.
> Regstrapped on x86_64-linux-gnu off trunk 
> a7d052b3200c7928d903a0242b8cfd75d131e374.

Thanks for the patch.

Overall, looks like great work, but there are a few nitpicks to be
fixed, see below...

[...snip...]
 
> Second batch of moving tests from under gcc.dg/analyzer into
> c-c++-common/analyzer.
> 
> Prior to this patch the analyzer was not unwrapping ordering
> binop_svalue, such as LT_EXPR, when evaluating conditions.
> 
> Therefore when an ordering conditional was stored, the analyzer
> was missing out on some constraints, which led to false positives.
> 
> Signed-off-by: benjamin priour 

[...snip...]

>   * gcc.dg/analyzer/inlining-7.c: Moved to...
>   * c-c++-common/analyzer/inlining-7.c: ...here.
>   * c-c++-common/analyzer/compound-assignment-1.c: New test.

All of these "new" tests (apart from the "-noexcept" ones) look like
they're meant to be existing tests that were moved, but where the copy
of the test in gcc.dg/analyzer didn't get deleted, so they show up as a
duplicate.  See the details below.

>   * c-c++-common/analyzer/file-pr58237-noexcept.c: New test.

When duplicating a test like this, the test isn't entirely "new", so
please say something like this in the ChangeLog entry, to make it clear
where it came from:

* c-c++-common/analyzer/file-pr58237-noexcept.c: New test,
based on gcc.dg/analyzer/file-pr58237.c.

>   * c-c++-common/analyzer/fopen-2.c: New test.

Looks fopen-2.c is a move of the parts of gcc.dg/analyzer/fopen-1.c
that can also be C++, so please state that in the ChangeLog.

>   * c-c++-common/analyzer/infinite-recursion.c: New test.
>   * c-c++-common/analyzer/malloc-paths-9-noexcept.c: New test.

Likewise, please say where the -noexcept.c test came from.


>   * c-c++-common/analyzer/pr109577-noexcept.c: New test.

Likewise for this -noexcept test.

>   * c-c++-common/analyzer/pr93355-localealias-feasibility-noexcept.c: New 
> test.

Likewise for this -noexcept test.

>   * c-c++-common/analyzer/pr94362-1.c: New test.
>   * c-c++-common/analyzer/pr99193-1-noexcept.c: New test.

Likewise for this -noexcept test.

>   * c-c++-common/analyzer/scope-1.c: New test.
>   * c-c++-common/analyzer/setjmp-2.c: New test.
>   * c-c++-common/analyzer/setjmp-5.c: New test.
>   * c-c++-common/analyzer/setjmp-9.c: New test.
>   * c-c++-common/analyzer/signal-4a.c: New test.
>   * c-c++-common/analyzer/signal-4b.c: New test.

[...snip...]

> diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
> index 82bc3b2c382..43b4bc1cc5b 100644
> --- a/gcc/analyzer/region-model.cc
> +++ b/gcc/analyzer/region-model.cc
> @@ -4486,6 +4486,14 @@ region_model::add_constraints_from_binop (const svalue 
> *outer_lhs,
> return true;
>   }
>return false;
> +case GE_EXPR:
> +case GT_EXPR:
> +case LE_EXPR:
> +case LT_EXPR:
> +  if (!is_true)
> + inner_op = invert_tree_comparison (inner_op, false /* honor_nans */);
> +  *out = add_constraint (inner_lhs, inner_op, inner_rhs, ctxt);
> +  return true;
>  }
>  }
>  

Nice - thanks.

Can this be combined with the EQ_EXPR and NE_EXPR cases? (possibly
updating the comment)  The code looks identical to me, but I might be
misreading it.

[...snip...]

> diff --git a/gcc/testsuite/c-c++-common/analyzer/compound-assignment-1.c 
> b/gcc/testsuite/c-c++-common/analyzer/compound-assignment-1.c
> new file mode 100644
> index 000..b208f58f09f
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/analyzer/compound-assignment-1.c
> @@ -0,0 +1,72 @@
> +#include 
> +
> +struct ptr_wrapper
> +{
> +  int *ptr;
> +};
> +
> +struct ptr_wrapper
> +test_1 (void)
> +{
> +  struct ptr_wrapper r;
> +  r.ptr = (int *) malloc (sizeof (int));
> +  return r;
> +}

This looks the same as gcc.dg/analyzer/compound-assignment-1.c

Should this be a move, rather than a new file?  i.e. is the patch
missing a deletion of the file in the old location?

[...snip...]

> diff --git a/gcc/testsuite/c-c++-common/analyzer/infinite-recursion.c 
> b/gcc/testsuite/c-c++-common/analyzer/infinite-recursion.c
> new file mode 100644
> index 000..6b7d25cfabe
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/analyzer/infinite-recursion.c

Likewise here for infinite-recursion.c.

[...snip...]

> diff --git 
> a/gcc/testsuite/gcc.dg/analyzer/loop-0-up-to-n-by-1-with-iter-obj.c 
> b/gcc/testsuite/c-c++-common/analyzer/loop-0-up-to-n-by-1-with-iter-obj.c
> similarity index 97%
> rename from gcc/testsuite/gcc.dg/analyzer/loop-0-up-to-n-by-1-with-iter-obj.c
> rename to 
> gcc/testsuite/c-c++-common/analyzer/loop-0-up-to-n-by-1-with-iter-obj.c
> index 0172c9b324c..1b657697ef4 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/loop-0-up-to-n-by-1-with-iter-obj.c
> +++ b/gcc/testsuite/c-c++-common/analyzer/loop-0-up-to-n-by-1-with-iter-obj.c
> @@ -1,6 

Re: [PATCH] diagnostics: Delete config pointer before overwriting it.

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 21:16 +0200, Mikael Morin via Gcc-patches wrote:
> Hello,
> 
> this is a fix for a small memory leak in the fortran frontend.
> Tested on x86_64-pc-linux-gnu, nothing stands out besides the
> apparently well-known guality instability.
> OK for master ? 

LGTM, thanks!

Dave



Re: [PATCH] analyzer: call off a superseding when diagnostics are unrelated [PR110830]

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 21:59 +0200, priour...@gmail.com wrote:
> From: benjamin priour 
> 
> Hi,
> 
> Patch succesfully regstrapped off trunk
> 7f2ed06ddc825e8a4e0edfd1d66b5156e6dc1d34
> on x86_64-linux-gnu.
> 
> Is it OK for trunk ?
> 
> Thanks,
> Benjamin.
> 

[...snip...]

>  
> +/* Walk up the two paths to each of their common conditional
> +   branching.  At each branching, make sure both diagnostics'
> +   paths branched similarly.  If there is at least one where
> +   both paths go down a different outcome, then the paths
> +   are incompatible and this function returns FALSE.
> +   Otherwise return TRUE.
> +
> +   Incompatible paths:
> +
> +   
> +   /  \
> +  /    \
> +    true  false
> + |  |
> +    ...    ...
> + |  |
> +    ...   stmt x
> + |
> +   stmt x
> +
> +   Both LHS_PATH and RHS_PATH final enodes should be
> +   over the same gimple statement.  */
> +
> +static bool
> +compatible_epath_p (const exploded_path *lhs_path,
> +   const exploded_path *rhs_path)
> +{
> +  gcc_assert (lhs_path);
> +  gcc_assert (rhs_path);
> +  int i;
> +  const exploded_edge *outer_eedge;
> +  FOR_EACH_VEC_ELT_REVERSE (lhs_path->m_edges, i, outer_eedge)
> +    {
> +  const superedge *outer_sedge = outer_eedge->m_sedge;
> +  if (!outer_sedge || !outer_eedge->m_src)
> +   continue;
> +  const program_point _src_point = outer_eedge->m_src->get_point 
> ();
> +  switch (outer_src_point.get_kind ())
> +   {
> + case PK_AFTER_SUPERNODE:
> +   if (const cfg_superedge *cfg_outer_sedge
> +   = outer_sedge->dyn_cast_cfg_superedge ())
> + {
> +   int j;
> +   const exploded_edge *inner_eedge;
> +   FOR_EACH_VEC_ELT_REVERSE (rhs_path->m_edges, j, inner_eedge)
> + {
> +   const superedge *inner_sedge = inner_eedge->m_sedge;
> +   if (!inner_sedge || !inner_eedge->m_src)
> + continue;
> +   const program_point _src_point
> + = inner_eedge->m_src->get_point ();
> +   switch (inner_src_point.get_kind ())
> + {
> +   case PK_AFTER_SUPERNODE:
> + if (inner_src_point.get_stmt ()
> + != outer_src_point.get_stmt ())
> +   continue;
> + if (const cfg_superedge *cfg_inner_sedge
> + = inner_sedge->dyn_cast_cfg_superedge ())
> +   {
> + if (cfg_inner_sedge->true_value_p ()
> + != cfg_outer_sedge->true_value_p ())
> +   return false;
> +   }
> + break;
> +   default:
> + break;
> + }
> + }
> + }
> +   break;
> +
> + default:
> +   break;
> +   }
> +    }
> +    return true;
> +}

[...snip...]

Thanks for the patch.  I think the high-level idea is good, but I'm not
sure the implementation is correct:

- it is O(n^2), where n is the length of exploded_path.
- it walks backwards through the LHS path, and for each eedge from a
PK_AFTER_SUPERNODE it walks backwards from the end of the RHS epath; it
only looks at the "true" flag on CFG edges.  I think this works for
simple cases, but the way it restarts the rhs_path iteration from the
end of the rhs_path each time "feels" incorrect.

An eedge from a PK_AFTER_SUPERNODE is presumably just an eedge that has
a non-NULL m_sedge i.e. an exploded edge relating to an edge in the
supergraph.  Rather than looking at flags, can we simply compare
superedge pointers?  For example, if we care that we followed the
"true" path of a conditional in both lhs and rhs epaths, we can look to
see if both have an eedge where the superedge is the cfg_superedge
wrapping the CFG "true" edge i.e. I think we can simply compare the
superedge pointers.

Or is there some detail here that I'm misunderstanding?

I *think* it's possible to implement it in O(n) with something like
this:  (warning: untested code follows!)

  /* For compatibility, there should effectively be the same
 vector of superedges followed in both epaths.
 Walk backwards through each epath, looking at the superedges.  */
  // FIXME: really?  Benjamin, have I understood this correctly?

  gcc_assert (lhs_path->length () > 0);
  gcc_assert (rhs_path->length () > 0);

  int lhs_idx = lhs_path->length () - 1;
  int rhs_idx = rhs_path->length () - 1;

  while (lhs_idx >= 0 && rhs_idx >= 0)
{
  /* Find next LHS superedge, if any.  */
  while (lhs_idx >= 0)
{
  const exploded_edge *lhs_eedge = lhs_path->m_edges[lhs_idx];
  if (lhs_eedge->m_sedge)
break;
  else
 

Re: [PATCH] analyzer: Add support of placement new and improved operator new [PR105948,PR94355]

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 16:48 +0200, Benjamin Priour wrote:
> Patch has been updated as per your suggestions and successfully
> regstrapped
> on x86_64-linux-gnu.
> 
> call_details::maybe_get_arg_region is now
> /* If argument IDX's svalue at the callsite is of pointer type,
>     return the region it points to.
>     Otherwise return NULL.  */
> 
> const region *
>  call_details::deref_ptr_arg (unsigned idx) const
>  {
>    const svalue *ptr_sval = get_arg_svalue (idx);
>    return m_model->deref_rvalue (ptr_sval, get_arg_tree (idx),
> m_ctxt);
>  }
> 
> 
> New test is
> 
> +
> +void test_binop ()
> +{
> +  char *p = (char *) malloc (4);
> +  if (!p)
> +    return;
> +  int32_t *i = ::new (p + 1) int32_t; /* { dg-warning "heap-based
> buffer
> overflow" } */
> +  *i = 42; /* { dg-warning "heap-based buffer overflow" } */
> +  free (p);
> +}
> 
> Is it OK for trunk ?
> I didn't resend the whole patch as it otherwise was OK.

Yes, thanks.

Dave



Re: [PATCH] analyzer: implement reference count checking for CPython plugin [PR107646]

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 04:49 +0200, Hans-Peter Nilsson wrote:
> (Looks like this was committed as r14-3580-g597b9ec69bca8a)
> 
> > Cc: g...@gcc.gnu.org, gcc-patches@gcc.gnu.org, Eric Feng
> > 
> > From: Eric Feng via Gcc 
> 
> > gcc/testsuite/ChangeLog:
> >   PR analyzer/107646
> > * gcc.dg/plugin/analyzer_cpython_plugin.c: Implements
> > reference count
> >   * checking for PyObjects.
> > * gcc.dg/plugin/cpython-plugin-test-2.c: Moved to...
> > * gcc.dg/plugin/cpython-plugin-test-PyList_Append.c:
> > ...here (and
> >   * added more tests).
> > * gcc.dg/plugin/cpython-plugin-test-1.c: Moved to...
> > * gcc.dg/plugin/cpython-plugin-test-no-plugin.c: ...here
> > (and added
> >   * more tests).
> > * gcc.dg/plugin/plugin.exp: New tests.
> > * gcc.dg/plugin/cpython-plugin-test-PyList_New.c: New test.
> > * gcc.dg/plugin/cpython-plugin-test-PyLong_FromLong.c: New
> > test.
> > * gcc.dg/plugin/cpython-plugin-test-refcnt-checking.c: New
> > test.
> 
> It seems this was more or less a rewrite, but that said,
> it's generally preferable to always *add* tests, never *modify* them.
> 
> >  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 376
> > +-
> 
> ^^^ Ouch!  Was it not within reason to keep that test as it
> was, and just add another test?
> 
> Anyway, the test after rewrite fails, and for some targets
> like cris-elf and apparently m68k-linux, yields an error.
> I see a PR was already opened.
> 
> Also, mostly for future reference, several files in the
> patch miss a final newline, as seen by a "\ No newline at
> end of file"-marker.
> 
> I think I found the problem; a mismatch between default C++
> language standard between host-gcc and target-gcc.
> 
> (It's actually *not* as simple as "auto var = typeofvar()"
> not being recognized in C++11 --or else there'd be an error
> for the hash_set declaration too, which I just changed for
> consistency-- but it's close enough for me.)
> 
> With this, retesting plugin.exp for cris-elf works.
> 
> Ok to commit?

Sorry about the failing tests.

Thanks for the patch; please go ahead and commit.

Dave

> 
> -- >8 --
> From: Hans-Peter Nilsson 
> Date: Fri, 1 Sep 2023 04:36:03 +0200
> Subject: [PATCH] testsuite: Fix analyzer_cpython_plugin.c
> declarations, PR testsuite/111264
> 
> Also, add missing newline at end of file.
> 
> PR testsuite/111264
> * gcc.dg/plugin/analyzer_cpython_plugin.c: Make declarations
> C++11-compatible.
> ---
>  gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> index 7af520436549..bf1982e79c37 100644
> --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -477,8 +477,8 @@ pyobj_refcnt_checker (const region_model *model,
>    if (!ctxt)
>  return;
>  
> -  auto region_to_refcnt = hash_map ();
> -  auto seen_regions = hash_set ();
> +  hash_map region_to_refcnt;
> +  hash_set seen_regions;
>  
>    count_pyobj_references (model, region_to_refcnt, retval,
> seen_regions);
>    check_refcnts (model, old_model, retval, ctxt, region_to_refcnt);
> @@ -561,7 +561,7 @@ public:
>  if (!ctxt)
>    return;
>  region_model *model = cd.get_model ();
> -    auto region_to_refcnt = hash_map ();
> +    hash_map region_to_refcnt;
>  count_all_references(model, region_to_refcnt);
>  dump_refcnt_info(region_to_refcnt, model, ctxt);
>    }
> @@ -1330,4 +1330,4 @@ plugin_init (struct plugin_name_args
> *plugin_info,
>    sorry_no_analyzer ();
>  #endif
>    return 0;
> -}
> \ No newline at end of file
> +}



Re: [PATCH] analyzer: Add support of placement new and improved operator new [PR105948,PR94355]

2023-08-31 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 00:04 +0200, priour...@gmail.com wrote:


> Hi, 
> 
> Succesfully regstrapped off trunk 7f2ed06ddc825e8a4e0edfd1d66b5156e6dc1d34
> on x86_64-linux-gnu.
> 
> Is it OK for trunk ?

Hi Benjamin.

Thanks for the patch.  It's OK as-is, but it doesn't cover every
case...

[...snip...]

> diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc
> index 66fb0fe871e..8d60e928b15 100644
> --- a/gcc/analyzer/call-details.cc
> +++ b/gcc/analyzer/call-details.cc
> @@ -295,6 +295,17 @@ call_details::get_arg_svalue (unsigned idx) const
>return m_model->get_rvalue (arg, m_ctxt);
>  }
>  
> +/* If argument IDX's svalue at the callsite is a region_svalue,
> +   return the region it points to.
> +   Otherwise return NULL.  */
> +
> +const region *
> +call_details::maybe_get_arg_region (unsigned idx) const
> +{
> +  const svalue *sval = get_arg_svalue (idx);
> +  return sval->maybe_get_region ();
> +}
> +

Is this the correct thing to be doing?  It's used in the following...

[...snip...]

> diff --git a/gcc/analyzer/kf-lang-cp.cc b/gcc/analyzer/kf-lang-cp.cc
> index 393b4f25e79..4450892dfa2 100644
> --- a/gcc/analyzer/kf-lang-cp.cc
> +++ b/gcc/analyzer/kf-lang-cp.cc

[...snip...]

> @@ -54,28 +90,75 @@ public:
>  region_model *model = cd.get_model ();
>  region_model_manager *mgr = cd.get_manager ();
>  const svalue *size_sval = cd.get_arg_svalue (0);
> -const region *new_reg
> -  = model->get_or_create_region_for_heap_alloc (size_sval, cd.get_ctxt 
> ());
> -if (cd.get_lhs_type ())
> +region_model_context *ctxt = cd.get_ctxt ();
> +const gcall *call = cd.get_call_stmt ();
> +
> +/* If the call was actually a placement new, check that accessing
> +   the buffer lhs is placed into does not result in out-of-bounds.  */
> +if (is_placement_new_p (call))
>{
> - const svalue *ptr_sval
> -   = mgr->get_ptr_svalue (cd.get_lhs_type (), new_reg);
> - cd.maybe_set_lhs (ptr_sval);
> + const region *ptr_reg = cd.maybe_get_arg_region (1);
> + if (ptr_reg && cd.get_lhs_type ())
> +   {

...which will only fire if arg 1 is a region_svalue.  This won't
trigger if you have e.g. a binop_svalue for pointer arithmetic.

What happens e.g. for this one-off-the-end bug:

  void *p = malloc (4);
  if (!p)
return;
  int32_t *i = ::new (p + 1) int32_t;
  *i = 42;

So maybe call_details::maybe_get_arg_region should instead be:

/* Return the region that argument IDX points to.  */

const region *
call_details::deref_ptr_arg (unsigned idx) const
{
  const svalue *ptr_sval = get_arg_svalue (idx);
  return m_model->deref_rvalue (ptr_sval, get_arg_tree (idx), m_ctxt);
}

(caveat: I didn't test this)

> + const region *base_reg = ptr_reg->get_base_region ();
> + const svalue *num_bytes_sval = cd.get_arg_svalue (0);
> + const region *sized_new_reg
> + = mgr->get_sized_region (base_reg,
> +  cd.get_lhs_type (),
> +  num_bytes_sval);

Why do you use the base_reg here, rather than just ptr_reg?

In the example above, the *(p + 1) has base region
heap_allocated_region, but the ptr_reg is one byte higher; hence
check_region_for_write of 4 bytes ought to detect a problem with
writing 4 bytes to *(p + 1), but wouldn't complain about the write to
*p.

...assuming that I'm reading this code correctly.

> + model->check_region_for_write (sized_new_reg,
> +nullptr,
> +ctxt);
> + const svalue *ptr_sval
> +   = mgr->get_ptr_svalue (cd.get_lhs_type (), sized_new_reg);
> + cd.maybe_set_lhs (ptr_sval);
> +   }
> +  }

[...snip...]

The patch is OK for trunk as is; but please can you look into the
above.

If the above is a problem, you can either do another version of the
patch, or do it as a followup patch (whichever you're more comfortable
with, but it might be best to get the patch into trunk as-is, given
that the GSoC period is nearly over).

Thanks
Dave



Re: [PATCH] analyzer: implement reference count checking for CPython plugin [PR107646]

2023-08-31 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-30 at 18:15 -0400, Eric Feng wrote:
> On Tue, Aug 29, 2023 at 5:14 PM David Malcolm 
> wrote:
> > 
> > On Tue, 2023-08-29 at 13:28 -0400, Eric Feng wrote:
> > > Additionally, by using the old model and the pointer per your
> > > suggestion,
> > > we are able to find the representative tree and emit a more
> > > accurate
> > > diagnostic!
> > > 
> > > rc3.c:23:10: warning: expected ‘item’ to have reference count:
> > > ‘1’
> > > but ob_refcnt field is: ‘2’
> > >    23 |   return list;
> > >   |  ^~~~
> > >   ‘create_py_object’: events 1-4
> > >     |
> > >     |    4 |   PyObject* item = PyLong_FromLong(3);
> > >     |  |    ^~
> > >     |  |    |
> > >     |  |    (1) when ‘PyLong_FromLong’
> > > succeeds
> > >     |    5 |   PyObject* list = PyList_New(1);
> > >     |  |    ~
> > >     |  |    |
> > >     |  |    (2) when ‘PyList_New’ succeeds
> > >     |..
> > >     |   14 |   PyList_Append(list, item);
> > >     |  |   ~
> > >     |  |   |
> > >     |  |   (3) when ‘PyList_Append’ succeeds, moving buffer
> > >     |..
> > >     |   23 |   return list;
> > >     |  |  
> > >     |  |  |
> > >     |  |  (4) here
> > >     |
> > 
> > Excellent, that's a big improvement.
> > 
> > > 
> > > If a representative tree is not found, I decided we should just
> > > bail
> > > out
> > > of emitting a diagnostic for now, to avoid confusing the user on
> > > what
> > > the problem is.
> > 
> > Fair enough.
> > 
> > > 
> > > I've attached the patch for this (on top of the previous one)
> > > below.
> > > If
> > > it also looks good, I can merge it with the last patch and push
> > > it in
> > > at
> > > the same time.
> > 
> > I don't mind either way, but please can you update the tests so
> > that we
> > have some automated test coverage that the correct name is being
> > printed in the warning.
> > 
> > Thanks
> > Dave
> > 
> 
> Sorry — forgot to hit 'reply all' in the previous e-mail. Resending
> to
> preserve our chain on the list:
> 
> ---
> 
> Thanks; pushed to trunk with nits fixed:
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=597b9ec69bca8acb7a3d65641c0a730de8b27ed4
> .

Thanks; looks good.

Do you want to add this to the GCC 14 part of the "History" section on
the wiki page:
  https://gcc.gnu.org/wiki/StaticAnalyzer
or should I?

> 
> Incidentally, I updated my formatting settings in VSCode, which I've
> previously mentioned in passing. In case anyone is interested:
> 
> "C_Cpp.clang_format_style": "{ BasedOnStyle: GNU, UseTab: Always,
> TabWidth: 8, IndentWidth: 2, BinPackParameters: false,
> AlignAfterOpenBracket: Align,
> AllowAllParametersOfDeclarationOnNextLine: true }",
> 
> This fixes some issues with the indent width and also ensures
> function
> parameters of appropriate length are aligned properly and on a new
> line each (like the rest of the analyzer code).

Thanks
Dave




Re: Analyzer failure due to missing header

2023-08-30 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-30 at 23:24 +0200, FX Coudert wrote:
> > std::max and std::min, introduced by d99d73c77d1e and 2bad0eeb5573,
> > are not available because  is not included.
> 
> I originally thought this was only seen in cross-compilers, but it
> actually broke bootstrap on darwin.
> Attached patch restores it, OK to commit?

LGTM

Thanks
Dave



Re: [pushed] analyzer: fix ICE in text art strings support

2023-08-30 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-30 at 11:52 +0530, Prathamesh Kulkarni wrote:
> On Wed, 30 Aug 2023 at 04:21, David Malcolm 
> wrote:
> > 
> > On Tue, 2023-08-29 at 11:01 +0530, Prathamesh Kulkarni wrote:
> > > On Fri, 25 Aug 2023 at 18:15, David Malcolm via Gcc-patches
> > >  wrote:
> > > > 
> > > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> > > > Pushed to trunk as r14-3481-g99a3fcb8ff0bf2.
> > > Hi David,
> > > It seems the new tests FAIL on arm for LTO bootstrap config:
> > > https://ci.linaro.org/job/tcwg_bootstrap_check--master-arm-check_bootstrap_lto-build/263/artifact/artifacts/06-check_regression/fails.sum/*view*/
> > 
> > Sorry about this.
> > 
> > Looking at e.g. the console.log.xz, I just see the status of the
> > failing tests.
> > 
> > Is there an easy way to get at the stderr from the tests without
> > rerunning this?
> > 
> > Otherwise, I'd appreciate help with reproducing this.
> Hi David,
> I have attached make check log for the failing tests.
> To reproduce, I configured and built gcc with following options on
> armv8 machine:
> ../gcc/configure --enable-languages=c,c++,fortran --with-float=hard
> --with-fpu=neon-fp-armv8 --with-mode=thumb --with-arch=armv8-a
> --disable-werror --with-build-config=bootstrap-lto
> make -j$(nproc)

Thanks.

Looks a lot like PR analyzer/110483, which I'm working on now (sorry!)

What's the endianness of the host?


Specifically, the pertinent part of the log is:

FAIL: gcc.dg/analyzer/out-of-bounds-diagram-17.c (test for excess errors)
Excess errors:
   ┌─┬─┬┬┬┐┌─┬─┬─┐
   │ [1] │ [1] │[1] │[1] │[1] ││ [1] │ [1] │ [1] │
   ├─┼─┼┼┼┤├─┼─┼─┤
   │ ' ' │ 'w' │'o' │'r' │'l' ││ 'd' │ '!' │ NUL │
   ├─┴─┴┴┴┴┴─┴─┴─┤
   │  string literal (type: 'char[8]')   │
   └─┘
  │ ││││  │ │ │
  │ ││││  │ │ │
  v vvvv  v v v
  ┌─┬┬┐┌─┐
  │ [0] │  ...   │[9] ││ │
  ├─┴┴┤│after valid range│
  │ 'buf' (type: 'char[10]')  ││ │
  └───┘└─┘
  ├─┬─┤├┬┤
│   │
  ╭─┴╮╭─┴─╮
  │capacity: 10 bytes││overflow of 3 bytes│
  ╰──╯╰───╯

where the issue seems to be all those [1], which are meant to be index
[0], [1], [2], etc.


Dave


Re: [pushed] analyzer: fix ICE in text art strings support

2023-08-29 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-29 at 11:01 +0530, Prathamesh Kulkarni wrote:
> On Fri, 25 Aug 2023 at 18:15, David Malcolm via Gcc-patches
>  wrote:
> > 
> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> > Pushed to trunk as r14-3481-g99a3fcb8ff0bf2.
> Hi David,
> It seems the new tests FAIL on arm for LTO bootstrap config:
> https://ci.linaro.org/job/tcwg_bootstrap_check--master-arm-check_bootstrap_lto-build/263/artifact/artifacts/06-check_regression/fails.sum/*view*/

Sorry about this.

Looking at e.g. the console.log.xz, I just see the status of the
failing tests.

Is there an easy way to get at the stderr from the tests without
rerunning this?

Otherwise, I'd appreciate help with reproducing this.

Thanks
Dave

> Please let me know if you need any help in reproducing these
> failures.
> 
> Thanks,
> Prathamesh
> > 
> > gcc/analyzer/ChangeLog:
> >     * access-diagram.cc (class string_region_spatial_item):
> > Remove
> >     assumption that the string is written to the start of the
> > cluster.
> > 
> > gcc/testsuite/ChangeLog:
> >     * gcc.dg/analyzer/out-of-bounds-diagram-17.c: New test.
> >     * gcc.dg/analyzer/out-of-bounds-diagram-18.c: New test.
> >     * gcc.dg/analyzer/out-of-bounds-diagram-19.c: New test.
> > ---
> >  gcc/analyzer/access-diagram.cc    | 57 ---
> > 
> >  .../analyzer/out-of-bounds-diagram-17.c   | 34 +++
> >  .../analyzer/out-of-bounds-diagram-18.c   | 38 +
> >  .../analyzer/out-of-bounds-diagram-19.c   | 45 +++
> >  4 files changed, 155 insertions(+), 19 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-
> > diagram-17.c
> >  create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-
> > diagram-18.c
> >  create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-
> > diagram-19.c
> > 
> > diff --git a/gcc/analyzer/access-diagram.cc b/gcc/analyzer/access-
> > diagram.cc
> > index d7b669a4e38e..a51d594b5b2c 100644
> > --- a/gcc/analyzer/access-diagram.cc
> > +++ b/gcc/analyzer/access-diagram.cc
> > @@ -1509,10 +1509,16 @@ public:
> >    out.add_all_bytes_in_range (m_actual_bits);
> >  else
> >    {
> > -   byte_range head_of_string (0, m_ellipsis_head_len);
> > +   byte_range bytes (0, 0);
> > +   bool valid = m_actual_bits.as_concrete_byte_range ();
> > +   gcc_assert (valid);
> > +   byte_range head_of_string (bytes.get_start_byte_offset (),
> > +  m_ellipsis_head_len);
> >     out.add_all_bytes_in_range (head_of_string);
> >     byte_range tail_of_string
> > - (TREE_STRING_LENGTH (string_cst) - m_ellipsis_tail_len,
> > + ((bytes.get_start_byte_offset ()
> > +   + TREE_STRING_LENGTH (string_cst)
> > +   - m_ellipsis_tail_len),
> >    m_ellipsis_tail_len);
> >     out.add_all_bytes_in_range (tail_of_string);
> >     /* Adding the above pair of ranges will also effectively
> > add
> > @@ -1535,11 +1541,14 @@ public:
> >  tree string_cst = get_string_cst ();
> >  if (m_show_full_string)
> >    {
> > -   for (byte_offset_t byte_idx = bytes.get_start_byte_offset
> > ();
> > -   byte_idx < bytes.get_next_byte_offset ();
> > -   byte_idx = byte_idx + 1)
> > -    add_column_for_byte (t, btm, sm, byte_idx,
> > - byte_idx_table_y, byte_val_table_y);
> > +   for (byte_offset_t byte_idx_within_cluster
> > + = bytes.get_start_byte_offset ();
> > +   byte_idx_within_cluster < bytes.get_next_byte_offset
> > ();
> > +   byte_idx_within_cluster = byte_idx_within_cluster + 1)
> > +    add_column_for_byte
> > +  (t, btm, sm, byte_idx_within_cluster,
> > +   byte_idx_within_cluster - bytes.get_start_byte_offset
> > (),
> > +   byte_idx_table_y, byte_val_table_y);
> > 
> >     if (m_show_utf8)
> >  {
> > @@ -1566,10 +1575,13 @@ public:
> >  = decoded_char.m_start_byte - TREE_STRING_POINTER
> > (string_cst);
> >    byte_size_t size_in_bytes
> >  = decoded_char.m_next_byte -
> > decoded_char.m_start_byte;
> > -  byte_range bytes (start_byte_idx, size_in_bytes);
> > +  byte_range cluster_bytes_for_codepoint
> > +    (start_byte_idx + bytes.get_start_

[pushed] analyzer: new warning: -Wanalyzer-overlapping-buffers [PR99860]

2023-08-29 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3556-g034d99e81484fb.

gcc/ChangeLog:
PR analyzer/99860
* Makefile.in (ANALYZER_OBJS): Add analyzer/ranges.o.

gcc/analyzer/ChangeLog:
PR analyzer/99860
* analyzer-selftests.cc (selftest::run_analyzer_selftests): Call
selftest::analyzer_ranges_cc_tests.
* analyzer-selftests.h (selftest::run_analyzer_selftests): New
decl.
* analyzer.opt (Wanalyzer-overlapping-buffers): New option.
* call-details.cc: Include "analyzer/ranges.h" and "make-unique.h".
(class overlapping_buffers): New.
(call_details::complain_about_overlap): New.
* call-details.h (call_details::complain_about_overlap): New decl.
* kf.cc (kf_memcpy_memmove::impl_call_pre): Call
cd.complain_about_overlap for memcpy and memcpy_chk.
(kf_strcat::impl_call_pre): Call cd.complain_about_overlap.
(kf_strcpy::impl_call_pre): Likewise.
* ranges.cc: New file.
* ranges.h: New file.

gcc/ChangeLog:
PR analyzer/99860
* doc/invoke.texi: Add -Wanalyzer-overlapping-buffers.

gcc/testsuite/ChangeLog:
PR analyzer/99860
* c-c++-common/analyzer/overlapping-buffers.c: New test.
---
 gcc/Makefile.in   |   1 +
 gcc/analyzer/analyzer-selftests.cc|   1 +
 gcc/analyzer/analyzer-selftests.h |   1 +
 gcc/analyzer/analyzer.opt |   4 +
 gcc/analyzer/call-details.cc  | 106 ++
 gcc/analyzer/call-details.h   |   5 +
 gcc/analyzer/kf.cc|  19 +-
 gcc/analyzer/ranges.cc| 324 ++
 gcc/analyzer/ranges.h |  96 ++
 gcc/doc/invoke.texi   |  20 ++
 .../analyzer/overlapping-buffers.c| 147 
 11 files changed, 722 insertions(+), 2 deletions(-)
 create mode 100644 gcc/analyzer/ranges.cc
 create mode 100644 gcc/analyzer/ranges.h
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/overlapping-buffers.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 78779546459f..5930b52462aa 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1313,6 +1313,7 @@ ANALYZER_OBJS = \
analyzer/pending-diagnostic.o \
analyzer/program-point.o \
analyzer/program-state.o \
+   analyzer/ranges.o \
analyzer/region.o \
analyzer/region-model.o \
analyzer/region-model-asm.o \
diff --git a/gcc/analyzer/analyzer-selftests.cc 
b/gcc/analyzer/analyzer-selftests.cc
index 63b8cdfa1369..d06b4c374430 100644
--- a/gcc/analyzer/analyzer-selftests.cc
+++ b/gcc/analyzer/analyzer-selftests.cc
@@ -55,6 +55,7 @@ run_analyzer_selftests ()
   analyzer_function_set_cc_tests ();
   analyzer_program_point_cc_tests ();
   analyzer_program_state_cc_tests ();
+  analyzer_ranges_cc_tests ();
   analyzer_region_model_cc_tests ();
   analyzer_sm_file_cc_tests ();
   analyzer_sm_signal_cc_tests ();
diff --git a/gcc/analyzer/analyzer-selftests.h 
b/gcc/analyzer/analyzer-selftests.h
index d848ed9bc941..de494bfceae8 100644
--- a/gcc/analyzer/analyzer-selftests.h
+++ b/gcc/analyzer/analyzer-selftests.h
@@ -38,6 +38,7 @@ extern void analyzer_constraint_manager_cc_tests ();
 extern void analyzer_function_set_cc_tests ();
 extern void analyzer_program_point_cc_tests ();
 extern void analyzer_program_state_cc_tests ();
+extern void analyzer_ranges_cc_tests ();
 extern void analyzer_region_model_cc_tests ();
 extern void analyzer_sm_file_cc_tests ();
 extern void analyzer_sm_signal_cc_tests ();
diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt
index 7917473d1223..25df89d9c06b 100644
--- a/gcc/analyzer/analyzer.opt
+++ b/gcc/analyzer/analyzer.opt
@@ -154,6 +154,10 @@ Wanalyzer-out-of-bounds
 Common Var(warn_analyzer_out_of_bounds) Init(1) Warning
 Warn about code paths in which a write or read to a buffer is out-of-bounds.
 
+Wanalyzer-overlapping-buffers
+Common Var(warn_analyzer_overlapping_buffers) Init(1) Warning
+Warn about code paths in which undefined behavior would occur due to 
overlapping buffers.
+
 Wanalyzer-possible-null-argument
 Common Var(warn_analyzer_possible_null_argument) Init(1) Warning
 Warn about code paths in which a possibly-NULL value is passed to a 
must-not-be-NULL function argument.
diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc
index ce1f859c9996..66fb0fe871e2 100644
--- a/gcc/analyzer/call-details.cc
+++ b/gcc/analyzer/call-details.cc
@@ -34,8 +34,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-pretty-print.h"
 #include "analyzer/region-model.h"
 #include "analyzer/call-details.h"
+#include "analyzer/ranges.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "make-unique.h"
 
 #if ENABLE_ANALYZER
 
@@ -405,6 +407,110 @@ check_for_null_terminated_string_arg (unsigned arg_idx,
 

Re: [PATCH] analyzer: implement reference count checking for CPython plugin [PR107646]

2023-08-29 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-29 at 13:28 -0400, Eric Feng wrote:
> Additionally, by using the old model and the pointer per your
> suggestion,
> we are able to find the representative tree and emit a more accurate
> diagnostic!
> 
> rc3.c:23:10: warning: expected ‘item’ to have reference count: ‘1’
> but ob_refcnt field is: ‘2’
>    23 |   return list;
>   |  ^~~~
>   ‘create_py_object’: events 1-4
>     |
>     |    4 |   PyObject* item = PyLong_FromLong(3);
>     |  |    ^~
>     |  |    |
>     |  |    (1) when ‘PyLong_FromLong’ succeeds
>     |    5 |   PyObject* list = PyList_New(1);
>     |  |    ~
>     |  |    |
>     |  |    (2) when ‘PyList_New’ succeeds
>     |..
>     |   14 |   PyList_Append(list, item);
>     |  |   ~
>     |  |   |
>     |  |   (3) when ‘PyList_Append’ succeeds, moving buffer
>     |..
>     |   23 |   return list;
>     |  |  
>     |  |  |
>     |  |  (4) here
>     |

Excellent, that's a big improvement.

> 
> If a representative tree is not found, I decided we should just bail
> out
> of emitting a diagnostic for now, to avoid confusing the user on what
> the problem is.

Fair enough.

> 
> I've attached the patch for this (on top of the previous one) below.
> If
> it also looks good, I can merge it with the last patch and push it in
> at
> the same time.

I don't mind either way, but please can you update the tests so that we
have some automated test coverage that the correct name is being
printed in the warning.

Thanks
Dave



Re: [PATCH] analyzer: implement reference count checking for CPython plugin [PR107646]

2023-08-29 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-29 at 00:31 -0400, Eric Feng wrote:
> Hi Dave,

Hi Eric.

Thanks for the updated patch.

A few nits below; this is OK for trunk with them fixed...

[...snip...]

> 
> gcc/analyzer/ChangeLog:
>   PR analyzer/107646
>   * engine.cc (impl_region_model_context::warn): New optional parameter.
>   * exploded-graph.h (class impl_region_model_context): Likewise.
>   * region-model.cc (region_model::pop_frame): New callback feature for
>   * region_model::pop_frame.
>   * region-model.h (struct append_regions_cb_data): Likewise.
>   (class region_model): Likewise.
>   (class region_model_context): New optional parameter.
>   (class region_model_context_decorator): Likewise.
> 
> gcc/testsuite/ChangeLog:
>   PR analyzer/107646
>   * gcc.dg/plugin/analyzer_cpython_plugin.c: Implements reference count
>   * checking for PyObjects.
>   * gcc.dg/plugin/cpython-plugin-test-2.c: Moved to...
>   * gcc.dg/plugin/cpython-plugin-test-PyList_Append.c: ...here (and
>   * added more tests).
>   * gcc.dg/plugin/cpython-plugin-test-1.c: Moved to...
>   * gcc.dg/plugin/cpython-plugin-test-no-plugin.c: ...here (and added
>   * more tests).
>   * gcc.dg/plugin/plugin.exp: New tests.
>   * gcc.dg/plugin/cpython-plugin-test-PyList_New.c: New test.
>   * gcc.dg/plugin/cpython-plugin-test-PyLong_FromLong.c: New test.
>   * gcc.dg/plugin/cpython-plugin-test-refcnt-checking.c: New test.

The ChangeLog formatting here seems wrong; lines starting with a '*'
should refer to a filename.  Continuation lines begin with just a tab
character.

[...snip...]

> diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
> index 10b2a59e787..440ea6d828d 100644
> --- a/gcc/analyzer/region-model.h
> +++ b/gcc/analyzer/region-model.h

[...snip...]

> @@ -840,7 +865,8 @@ private:
>  class region_model_context_decorator : public region_model_context
>  {
>   public:
> -  bool warn (std::unique_ptr d) override
> +  bool warn (std::unique_ptr d,
> +  const stmt_finder *custom_finder)
>{
>  if (m_inner)
>return m_inner->warn (std::move (d));

This should presumably pass the custom_finder on to the 2nd argument of
m_inner->warn, rather than have the inner call to warn implicitly use
the NULL default arg.

[...snip...]

> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c 
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-no-plugin.c
> similarity index 100%
> rename from gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
> rename to gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-no-plugin.c

Looks like
  "-no-Python-h.c"
would be a better suffix than
  "-no-plugin.c"
as it's the include that's missing, not the plugin.

[...snip...]

> diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp 
> b/gcc/testsuite/gcc.dg/plugin/plugin.exp
> index e1ed2d2589e..cbef6da8d86 100644
> --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
> +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
> @@ -161,8 +161,9 @@ set plugin_test_list [list \
> taint-CVE-2011-0521-6.c \
> taint-antipatterns-1.c } \
>  { analyzer_cpython_plugin.c \
> -   cpython-plugin-test-1.c \
> -   cpython-plugin-test-2.c } \
> +   cpython-plugin-test-PyList_Append.c \
> +   cpython-plugin-test-PyList_New.c \
> +   cpython-plugin-test-PyLong_FromLong.c } \

Looks like this is missing:
  cpython-plugin-test-no-plugin.c
and
  cpython-plugin-test-refcnt-checking.c
(though as noted above"cpython-plugin-test-no-Python-h.c" would be a
better name for the former)

so it wasn't actually compiling these tests.

Be sure to doublecheck that these tests pass when updating.

[...snip...]

OK for trunk with the above nits fixed.

Dave



Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters

2023-08-29 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-29 at 17:15 +0200, Guillaume Gomez wrote:
> We finished the investigation and found out the issue: when passing
> arguments by value to functions, rustc still provides "NoAlias" as
> attribute to the argument whereas it should never be passed in this
> case. Luckily for us, in case the argument is a function pointer
> coming from a struct field, it crashes GCC, which is what allowed us
> to figure out about this. A code which reproduces this bug:

[...snip...]

> So in short: the patch in the previous mail which added this check:
> 
> ```
> RETURN_NULL_IF_FAIL (type->is_pointer (), NULL, NULL, "not a pointer
> type");
> ```
> 
> is correct and ready. 

Thanks.  I've gone ahead and pushed it to gcc trunk for you as r14-
3552-g29763b002459cb.

[...snip...]

Dave



[pushed] analyzer: improve strdup handling [PR105899]

2023-08-29 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3549-gf687fc1ff6d4a4.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* kf.cc (kf_strdup::impl_call_pre): Set size of
dynamically-allocated buffer.  Simulate copying the string from
the source region to the new buffer.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* c-c++-common/analyzer/pr99193-2.c: Add
-Wno-analyzer-too-complex.
* gcc.dg/analyzer/strdup-1.c: Include "analyzer-decls.h".
(test_concrete_strlen): New.
(test_symbolic_strlen): New.
---
 gcc/analyzer/kf.cc| 28 +--
 .../c-c++-common/analyzer/pr99193-2.c |  2 ++
 gcc/testsuite/gcc.dg/analyzer/strdup-1.c  | 27 ++
 3 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 333ffd9751ae..37792aed909c 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1301,17 +1301,27 @@ public:
   void impl_call_pre (const call_details ) const final override
   {
 region_model *model = cd.get_model ();
+region_model_context *ctxt = cd.get_ctxt ();
 region_model_manager *mgr = cd.get_manager ();
-cd.check_for_null_terminated_string_arg (0);
-/* Ideally we'd get the size here, and simulate copying the bytes.  */
-const region *new_reg
-  = model->get_or_create_region_for_heap_alloc (NULL, cd.get_ctxt ());
-model->mark_region_as_unknown (new_reg, NULL);
-if (cd.get_lhs_type ())
+const svalue *bytes_to_copy;
+if (const svalue *num_bytes_read_sval
+   = cd.check_for_null_terminated_string_arg (0, true, _to_copy))
   {
-   const svalue *ptr_sval
- = mgr->get_ptr_svalue (cd.get_lhs_type (), new_reg);
-   cd.maybe_set_lhs (ptr_sval);
+   const region *new_reg
+ = model->get_or_create_region_for_heap_alloc (num_bytes_read_sval,
+   ctxt);
+   model->write_bytes (new_reg, num_bytes_read_sval, bytes_to_copy, ctxt);
+   if (cd.get_lhs_type ())
+ {
+   const svalue *ptr_sval
+ = mgr->get_ptr_svalue (cd.get_lhs_type (), new_reg);
+   cd.maybe_set_lhs (ptr_sval);
+ }
+  }
+else
+  {
+   if (ctxt)
+ ctxt->terminate_path ();
   }
   }
 };
diff --git a/gcc/testsuite/c-c++-common/analyzer/pr99193-2.c 
b/gcc/testsuite/c-c++-common/analyzer/pr99193-2.c
index 791b857dcc56..12326ef61f1c 100644
--- a/gcc/testsuite/c-c++-common/analyzer/pr99193-2.c
+++ b/gcc/testsuite/c-c++-common/analyzer/pr99193-2.c
@@ -3,6 +3,8 @@
Based on 
https://github.com/libguestfs/libguestfs/blob/f19fd566f6387ce7e4d82409528c9dde374d25e0/df/main.c#L404
which is GPLv2 or later.  */
 
+/* { dg-additional-options "-Wno-analyzer-too-complex" } */
+
 typedef __SIZE_TYPE__ size_t;
 typedef __builtin_va_list va_list;
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/strdup-1.c 
b/gcc/testsuite/gcc.dg/analyzer/strdup-1.c
index f6c176f174eb..11bc964922b5 100644
--- a/gcc/testsuite/gcc.dg/analyzer/strdup-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/strdup-1.c
@@ -1,6 +1,8 @@
 #include 
 #include 
 
+#include "analyzer-decls.h"
+
 extern void requires_nonnull (void *ptr)
   __attribute__((nonnull));
 
@@ -52,3 +54,28 @@ char *test_uninitialized (void)
   return strdup (buf); /* { dg-warning "use of uninitialized value 
'buf\\\[0\\\]'" } */
   /* { dg-message "while looking for null terminator for argument 1 
\\(''\\) of 'strdup'..." "event" { target *-*-* } .-1 } */
 }
+
+char *test_concrete_strlen (void)
+{
+  char *p = strdup ("abc");
+  if (!p)
+return p;
+  __analyzer_eval (__analyzer_get_strlen (p) == 3); /* { dg-warning "TRUE" } */
+  __analyzer_eval (p[0] == 'a'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (p[1] == 'b'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (p[2] == 'c'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (p[3] == '\0'); /* { dg-warning "TRUE" } */
+  return p;
+}
+
+char *test_symbolic_strlen (const char *p)
+{
+  char *q = strdup (p);
+  if (!q)
+return q;
+  __analyzer_eval (__analyzer_get_strlen (p) == __analyzer_get_strlen (q)); /* 
{ dg-warning "UNKNOWN" } */
+  // TODO: should be TRUE
+  __analyzer_eval (p[0] == q[0]); /* { dg-warning "UNKNOWN" } */
+  // TODO: should be TRUE
+  return q;
+}
-- 
2.26.3



Re: [PATCH v2] analyzer: Move gcc.dg/analyzer tests to c-c++-common (1) [PR96395]

2023-08-26 Thread David Malcolm via Gcc-patches
On Sat, 2023-08-26 at 14:22 +0200, priour...@gmail.com wrote:


> From: benjamin priour 
> 
> Hi,
> 
> Updated version of the patch, regstrapping the changes described in
> https://gcc.gnu.org/pipermail/gcc-patches/2023-August/628455.html.
> 
> Regstrapped off trunk 66be6ed81f369573824f1a8f5a3538a63472292f
> on x86_64-linux-gnu.
> 
> OK for trunk ?

Thanks for the v2 patch.

This is almost ready, some minor nits below...

[...snip...]

> 
> gcc/analyzer/ChangeLog:
> 
>   analyzer/PR 96395

This should be PR analyzer/96395.

>   * analyzer.h (class known_function): Add virtual casts
>   to builtin_known_function.
>   (class builtin_known_function): New subclass of known_function
>   for builtins.

[...snip...]
 
> 
> gcc/testsuite/ChangeLog:
> 
>   analyzer/PR 96395

Likewise here.

>   * gcc.dg/analyzer/aliasing-3.c: Moved to...
>   * c-c++-common/analyzer/aliasing-3.c: ...here.
>   * gcc.dg/analyzer/aliasing-pr106473.c: Moved to...
>   * c-c++-common/analyzer/aliasing-pr106473.c: ...here.

[...snip...]
 
> diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
> index 93a28b4b5cf..d42771f1e20 100644
> --- a/gcc/analyzer/analyzer.h
> +++ b/gcc/analyzer/analyzer.h

[...snip...]

> @@ -279,6 +283,26 @@ public:
>{
>  return;
>}
> +
> +  virtual const builtin_known_function *
> +  dyn_cast_builtin_kf () const { return NULL; }
> +  virtual builtin_known_function *
> +  dyn_cast_builtin_kf () { return NULL; }

As noted in the review of v1, I don't think we ever work with non-const
known_function pointers, so we don't need the non-const version of the
vfunc.

[...snip...]

> diff --git a/gcc/testsuite/c-c++-common/analyzer/pr61861.c 
> b/gcc/testsuite/c-c++-common/analyzer/pr61861.c
> new file mode 100644
> index 000..bb9e039ebd5
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/analyzer/pr61861.c
> @@ -0,0 +1,3 @@
> +/* { dg-additional-options "-Wno-int-conversion" } */
> +/* { dg-skip-if "-Wno-int-conversion for C++" { c++ } } */
> +#include "../../gcc.dg/pr61861.c"

For tests like this that aren't going to be portable to C++, let's keep
it in the gcc.dg subdirectory, with a suitable comment, rather than
moving them and having a dg-skip-if on it.

Perhaps:

/* C only: -Wno-int-conversion is not valid for C++.  */

That way we can easily grep for the absence of "C only" when checking
to see which analyzer tests below gcc.dg still need considering for
moving below c-c++-common.  The string "C only" is conveniently short.

[...snip...]

> diff --git a/gcc/testsuite/gcc.dg/analyzer/pr95152-4.c 
> b/gcc/testsuite/c-c++-common/analyzer/pr95152-4.c
> similarity index 55%
> rename from gcc/testsuite/gcc.dg/analyzer/pr95152-4.c
> rename to gcc/testsuite/c-c++-common/analyzer/pr95152-4.c
> index f2a72cad01c..5ebbae85aee 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/pr95152-4.c
> +++ b/gcc/testsuite/c-c++-common/analyzer/pr95152-4.c
> @@ -1,4 +1,6 @@
> +/* { dg-skip-if "'-Wno-pointer-to-int-cast' invalid for C++" { c++ } } */
>  /* { dg-additional-options "-Wno-pointer-to-int-cast" } */

Likewise here.

[...snip...]

> diff --git a/gcc/testsuite/gcc.dg/analyzer/pr95152-5.c 
> b/gcc/testsuite/c-c++-common/analyzer/pr95152-5.c
> similarity index 61%
> rename from gcc/testsuite/gcc.dg/analyzer/pr95152-5.c
> rename to gcc/testsuite/c-c++-common/analyzer/pr95152-5.c
> index 604b78458c7..fbc4753e0b4 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/pr95152-5.c
> +++ b/gcc/testsuite/c-c++-common/analyzer/pr95152-5.c
> @@ -1,3 +1,4 @@
> +/* { dg-skip-if "'-Wno-incompatible-pointer-types' invalid for C++" { c++ } 
> } */
>  /* { dg-additional-options "-Wno-incompatible-pointer-types" } */
>  void foo(void)
>  {

Likewise here.

[...snip...]

> diff --git a/gcc/testsuite/gcc.dg/analyzer/write-to-function-1.c 
> b/gcc/testsuite/c-c++-common/analyzer/write-to-function-1.c
> similarity index 81%
> rename from gcc/testsuite/gcc.dg/analyzer/write-to-function-1.c
> rename to gcc/testsuite/c-c++-common/analyzer/write-to-function-1.c
> index c1bece632ce..dd4adc13141 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/write-to-function-1.c
> +++ b/gcc/testsuite/c-c++-common/analyzer/write-to-function-1.c
> @@ -1,3 +1,5 @@
> +/* { dg-skip-if "c++ does not allow for conversion from function pointer to 
> 'void *'" { c++ } } */
> +

Likewise here.

[...snip...]

> diff --git a/gcc/testsuite/gcc.dg/analyzer/pr104369-1.c 
> b/gcc/testsuite/gcc.dg/analyzer/pr104369-1.c
> index c05137bb219..788a2059013 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/pr104369-1.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/pr104369-1.c
> @@ -1,5 +1,8 @@
>  /* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-fd-leak" 
> } */
>  // TODO: remove need for these options
> +/* This test needs not be moved to c-c++-common/analyzer as C++
> +   does not support transparent_union. */
> +

Perhaps update this comment to:

/* C only: C++ does not support transparent_union.*/

so that we can grep for (the 

Re: [PATCH] analyzer: Move gcc.dg/analyzer tests to c-c++-common (1).

2023-08-25 Thread David Malcolm via Gcc-patches
On Fri, 2023-08-25 at 14:48 +0200, Benjamin Priour wrote:
> Hi David,
> 
> Thanks for the review.
> 
> On Fri, Aug 25, 2023 at 2:12 AM David Malcolm 
> wrote:
> 
> > > From: benjamin priour 
> > > 
> > > Hi,
> > > 
> > > Below the first batch of a serie of patches to transition
> > > the analyzer testsuite from gcc.dg/analyzer to c-c++-
> > > common/analyzer.
> > > I do not know how long this serie will be, thus the patch was not
> > > numbered.
> > > 
> > > For the grand majority of the tests, the transition only required
> > > some
> > > adjustement over the syntax and casts to be C++-friendly, or to
> > > adjust
> > > the warnings regexes to fit the C++ FE.
> > > 
> > > The most noteworthy change is in the handling of known_functions,
> > > as described in the below patch.
> > 
> > Hi Benjamin.
> > 
> > Many thanks for putting this together, it looks like it was a lot
> > of
> > work.
> > 
> > > Successfully regstrapped on x86_64-linux-gnu off trunk
> > > 18befd6f050e70f11ecca1dd58624f0ee3c68cc7.
> > 
> > Did you compare the before/after results from DejaGnu somehow?
> > 
> > Note that I've pushed 9 patches to the analyzer since
> > 18befd6f050e70f11ecca1dd58624f0ee3c68cc7 and some of those touch
> > the
> > files below, so it's worth rebasing and double-checking the
> > results.
> > 
> > 

[...snip...]

> 
> > I confess I'm still a little hazy as to the whole builtin_kf logic,
> > but
> > I trust you that this is needed.
> > 
> > Please can you add a paragraph to this comment to explain the
> > motivation here (perhaps giving examples?)
> > 
> > > +
> > > +const builtin_known_function *
> > > +region_model::get_builtin_kf (const gcall *call,
> > > +    region_model_context *ctxt /* = NULL
> > > */)
> > const
> > > +{
> > > +  region_model *mut_this = const_cast  (this);
> > > +  tree callee_fndecl = mut_this->get_fndecl_for_call (call,
> > > ctxt);
> > > +  if (! callee_fndecl)
> > > +    return NULL;
> > > +
> > > +  call_details cd (call, mut_this, ctxt);
> > > +  if (const known_function *kf = get_known_function
> > > (callee_fndecl, cd))
> > > +    return kf->dyn_cast_builtin_kf ();
> > > +
> > > +  return NULL;
> > > +}
> > > +
> > 
> > 
> The new comment is as follow:
> 
> /* Get any builtin_known_function for CALL and emit any warning to
> CTXT
>    if not NULL.
> 
>    The call must match all assumptions made by the known_function
> (such as
>    e.g. "argument 1's type must be a pointer type").
> 
>    Return NULL if no builtin_known_function is found, or it does
>    not match the assumption(s).
> 
>    Internally calls get_known_function to find a known_function and
> cast it
>    to a builtin_known_function.
> 
>    For instance, calloc is a C builtin, defined in gcc/builtins.def
>    by the DEF_LIB_BUILTIN macro. Such builtins are recognized by the
>    analyzer by their name, so that even in C++ or if the user
> redeclares
>    them but mismatch their signature, they are still recognized as
> builtins.
> 
>    Cases when a supposed builtin is not flagged as one by the FE:
> 
>     The C++ FE does not recognize calloc as a builtin if it has not
> been
>     included from a standard header, but the C FE does. Hence in C++
> if
>     CALL comes from a calloc and stdlib is not included,
>     gcc/tree.h:fndecl_built_in_p (CALL) would be false.
> 
>     In C code, a __SIZE_TYPE__ calloc (__SIZE_TYPE__, __SIZE_TYPE__)
> user
>     declaration has obviously a mismatching signature from the
> standard, and
>     its function_decl tree won't be unified by
>     gcc/c-decl.cc:match_builtin_function_types.
> 
>    Yet in both cases the analyzer should treat the calls as a builtin
> calloc
>    so that extra attributes unspecified by the standard but added by
> GCC
>    (e.g. sprintf attributes in gcc/builtins.def), useful for the
> detection
> of
>    dangerous behavior, are indeed processed.
> 
>    Therefore for those cases when a "builtin flag" is not added by
> the FE,
>    builtins' kf are derived from builtin_known_function, whose method
>    builtin_known_function::builtin_decl returns the builtin's
>    function_decl tree as defined in gcc/builtins.def, with all the
> extra
>    attributes.  */
> 
> I hope it clarifies the new kf subclass's purpose.

Thanks!

[...snip...]

> > 
> 
> > > diff --git a/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > b/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > > index f8dc806d619..e94c0561665 100644
> > > --- a/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > > +++ b/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > > @@ -1,53 +1,14 @@
> > >  /* See e.g. https://en.cppreference.com/w/c/io/fprintf
> > >     and
> > > https://www.man7.org/linux/man-pages/man3/sprintf.3.html */
> > > 
> > > +/* { dg-skip-if "C++ fpermissive already throws an error" { c++
> > > } } */
> > 
> > Given that this is in the gcc.dg directory, this directive
> > presumably
> > never skips.
> > 
> > Is the intent here to document that
> > (a) this set of tests is just 

[pushed] analyzer: fix ICE in text art strings support

2023-08-25 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3481-g99a3fcb8ff0bf2.

gcc/analyzer/ChangeLog:
* access-diagram.cc (class string_region_spatial_item): Remove
assumption that the string is written to the start of the cluster.

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/out-of-bounds-diagram-17.c: New test.
* gcc.dg/analyzer/out-of-bounds-diagram-18.c: New test.
* gcc.dg/analyzer/out-of-bounds-diagram-19.c: New test.
---
 gcc/analyzer/access-diagram.cc| 57 ---
 .../analyzer/out-of-bounds-diagram-17.c   | 34 +++
 .../analyzer/out-of-bounds-diagram-18.c   | 38 +
 .../analyzer/out-of-bounds-diagram-19.c   | 45 +++
 4 files changed, 155 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-17.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-18.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c

diff --git a/gcc/analyzer/access-diagram.cc b/gcc/analyzer/access-diagram.cc
index d7b669a4e38e..a51d594b5b2c 100644
--- a/gcc/analyzer/access-diagram.cc
+++ b/gcc/analyzer/access-diagram.cc
@@ -1509,10 +1509,16 @@ public:
   out.add_all_bytes_in_range (m_actual_bits);
 else
   {
-   byte_range head_of_string (0, m_ellipsis_head_len);
+   byte_range bytes (0, 0);
+   bool valid = m_actual_bits.as_concrete_byte_range ();
+   gcc_assert (valid);
+   byte_range head_of_string (bytes.get_start_byte_offset (),
+  m_ellipsis_head_len);
out.add_all_bytes_in_range (head_of_string);
byte_range tail_of_string
- (TREE_STRING_LENGTH (string_cst) - m_ellipsis_tail_len,
+ ((bytes.get_start_byte_offset ()
+   + TREE_STRING_LENGTH (string_cst)
+   - m_ellipsis_tail_len),
   m_ellipsis_tail_len);
out.add_all_bytes_in_range (tail_of_string);
/* Adding the above pair of ranges will also effectively add
@@ -1535,11 +1541,14 @@ public:
 tree string_cst = get_string_cst ();
 if (m_show_full_string)
   {
-   for (byte_offset_t byte_idx = bytes.get_start_byte_offset ();
-   byte_idx < bytes.get_next_byte_offset ();
-   byte_idx = byte_idx + 1)
-add_column_for_byte (t, btm, sm, byte_idx,
- byte_idx_table_y, byte_val_table_y);
+   for (byte_offset_t byte_idx_within_cluster
+ = bytes.get_start_byte_offset ();
+   byte_idx_within_cluster < bytes.get_next_byte_offset ();
+   byte_idx_within_cluster = byte_idx_within_cluster + 1)
+add_column_for_byte
+  (t, btm, sm, byte_idx_within_cluster,
+   byte_idx_within_cluster - bytes.get_start_byte_offset (),
+   byte_idx_table_y, byte_val_table_y);
 
if (m_show_utf8)
 {
@@ -1566,10 +1575,13 @@ public:
 = decoded_char.m_start_byte - TREE_STRING_POINTER (string_cst);
   byte_size_t size_in_bytes
 = decoded_char.m_next_byte - decoded_char.m_start_byte;
-  byte_range bytes (start_byte_idx, size_in_bytes);
+  byte_range cluster_bytes_for_codepoint
+(start_byte_idx + bytes.get_start_byte_offset (),
+ size_in_bytes);
 
   const table::rect_t code_point_table_rect
-= btm.get_table_rect (_string_reg, bytes,
+= btm.get_table_rect (_string_reg,
+  cluster_bytes_for_codepoint,
   utf8_code_point_table_y, 1);
   char buf[100];
   sprintf (buf, "U+%04x", decoded_char.m_ch);
@@ -1579,7 +1591,8 @@ public:
   if (show_unichars)
 {
   const table::rect_t character_table_rect
-= btm.get_table_rect (_string_reg, bytes,
+= btm.get_table_rect (_string_reg,
+  cluster_bytes_for_codepoint,
   utf8_character_table_y, 1);
   if (cpp_is_printable_char (decoded_char.m_ch))
 t.set_cell_span (character_table_rect,
@@ -1598,12 +1611,14 @@ public:
   {
/* Head of string.  */
for (int byte_idx = 0; byte_idx < m_ellipsis_head_len; byte_idx++)
- add_column_for_byte (t, btm, sm, byte_idx,
+ add_column_for_byte (t, btm, sm,
+  byte_idx + bytes.get_start_byte_offset (),
+  byte_idx,
   byte_idx_table_y, byte_val_table_y);
 
/* Ellipsis (two rows high).  */
const byte_range ellipsis_bytes
- (m_ellipsis_head_len,
+ (m_ellipsis_head_len + bytes.get_start_byte_offset (),
   TREE_STRING_LENGTH (string_cst)
   - 

Re: [PATCH] analyzer: Move gcc.dg/analyzer tests to c-c++-common (1).

2023-08-24 Thread David Malcolm via Gcc-patches
> From: benjamin priour 
> 
> Hi,
> 
> Below the first batch of a serie of patches to transition
> the analyzer testsuite from gcc.dg/analyzer to c-c++-common/analyzer.
> I do not know how long this serie will be, thus the patch was not
> numbered.
> 
> For the grand majority of the tests, the transition only required some
> adjustement over the syntax and casts to be C++-friendly, or to adjust
> the warnings regexes to fit the C++ FE.
> 
> The most noteworthy change is in the handling of known_functions,
> as described in the below patch.

Hi Benjamin.

Many thanks for putting this together, it looks like it was a lot of
work.

> Successfully regstrapped on x86_64-linux-gnu off trunk
> 18befd6f050e70f11ecca1dd58624f0ee3c68cc7.

Did you compare the before/after results from DejaGnu somehow?

Note that I've pushed 9 patches to the analyzer since
18befd6f050e70f11ecca1dd58624f0ee3c68cc7 and some of those touch the
files below, so it's worth rebasing and double-checking the results.

> Is it OK for trunk ?

It's *almost* ready; various comments inline below, throughout...

> 
> Thanks,
> Benjamin.
> 
> Patch below.
> ---
> 
> First batch of moving tests from under gcc.dg/analyzer into
> c-c++-common/analyzer.
> 
> C builtins are not recognized as such by C++, therefore
> this patch no longer uses tree.h:fndecl_built_in_p to recognize
> a builtin function, but rather the function names.
> 
> Thus functions named as C builtins - such as calloc, sprintf ... -
> are recognized as such both in C and C++ sources by the analyzer.
> 
> For user-declared functions named after builtins, the latters' function_decl
> tree are now preferred over the function_decl the user declared, even
> when the FE consider their declaration to mismatch
> (Wbuiltin-declaration-mismatch emitted). This mainly comes into account
> in the handling of these function attributes : the analyzer uses
> the builtin's attributes defined in gcc/builtins.def.
> 
> Signed-off-by: benjamin priour 
> 
> gcc/analyzer/ChangeLog:

Please add
PR analyzer/96395
to the ChangeLog entries, and [PR96395] to the end of the Subject of
the commit, so that these get tracked within that bug as they get
pushed.

> 
>   * analyzer.h (class known_function): Add virtual casts to
>   builtin_known_function.
>   (class builtin_known_function): New subclass of known_function
>   for builtins.
>   * kf.cc (class kf_alloca): Now derived from
>   builtin_known_function
>   (class kf_calloc): Likewise.
>   (class kf_free): Likewise.
>   (class kf_malloc): Likewise.
>   (class kf_memcpy_memmove): Likewise.
>   (class kf_memset): Likewise.
>   (class kf_realloc): Likewise.
>   (class kf_strchr): Likewise.
>   (class kf_sprintf): Likewise.
>   (class kf_strcpy): Likewise.
>   (class kf_strdup): Likewise.
>   (class kf_strlen): Likewise.
>   (class kf_strndup): Likewise.
>   (register_known_functions): Builtins are now registered as
>   known_functions by name rather than by their BUILTIN_CODE.
>   * known-function-manager.cc (get_normal_builtin): New overload.
>   * known-function-manager.h: New overload declaration.
>   * region-model.cc (region_model::get_builtin_kf): New function.
>   * region-model.h (class region_model): Add declaration of
>   get_builtin_kf.
>   * sm-fd.cc: For called recognized as builtins, use the attributes
>   of that builtin as defined in gcc/builtins.def rather than the user's.
>   * sm-malloc.cc (malloc_state_machine::on_stmt): Likewise.
> 
> gcc/testsuite/ChangeLog:

Add
PR analyzer/96395
here, as well, please.

> 
>   * gcc.dg/analyzer/aliasing-3.c: Moved to...
>   * c-c++-common/analyzer/aliasing-3.c: ...here.

[...snip...]

> diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
> index 93a28b4b5cf..63a220c9b6d 100644
> --- a/gcc/analyzer/analyzer.h
> +++ b/gcc/analyzer/analyzer.h
> @@ -128,6 +128,10 @@ struct interesting_t;
>  
>  class feasible_node;
>  
> +class known_function;
> +  class builtin_known_function;
> +  class internal_known_function;
> +
>  /* Forward decls of functions.  */
>  
>  extern void dump_tree (pretty_printer *pp, tree t);
> @@ -279,6 +283,28 @@ public:
>{
>  return;
>}
> +
> +  virtual const builtin_known_function *
> +  dyn_cast_builtin_kf () const { return NULL; }
> +  virtual builtin_known_function *
> +  dyn_cast_builtin_kf () { return NULL; }

I don't think we ever work with non-const known_function pointers, so
we don't need this non-const version of the vfunc.

> +};
> +
> +/* Subclass of known_function for builtin functions.  */
> +
> +class builtin_known_function : public known_function
> +{
> +public:
> +  virtual enum built_in_function builtin_code () const = 0;
> +  tree builtin_decl () const {
> +gcc_assert (builtin_code () < END_BUILTINS);
> +return builtin_info[builtin_code ()].decl;
> +  }
> +
> +  virtual const builtin_known_function *
> + 

[PATCH 8/9] analyzer: handle strlen(BITS_WITHIN) [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
PR analyzer/105899
* region-model.cc (fragment::has_null_terminator): Handle
SK_BITS_WITHIN.
---
 gcc/analyzer/region-model.cc | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 6574ec140074..025b555d7b97 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3357,10 +3357,29 @@ struct fragment
}
break;
 
+  case SK_BITS_WITHIN:
+   {
+ const bits_within_svalue *bits_within_sval
+   = (const bits_within_svalue *)m_sval;
+ byte_range bytes (0, 0);
+ if (bits_within_sval->get_bits ().as_byte_range ())
+   {
+ const svalue *inner_sval = bits_within_sval->get_inner_svalue ();
+ fragment f (byte_range
+ (start_read_offset - bytes.get_start_bit_offset (),
+  std::max (bytes.m_size_in_bytes,
+ available_bytes)),
+ inner_sval);
+ return f.has_null_terminator (start_read_offset, out_bytes_read);
+   }
+   }
+   break;
+
   default:
// TODO: it may be possible to handle other cases here.
-   return tristate::TS_UNKNOWN;
+   break;
   }
+return tristate::TS_UNKNOWN;
   }
 
   static tristate
-- 
2.26.3



[PATCH 7/9] analyzer: handle INIT_VAL(ELEMENT_REG(STRING_REG), CONSTANT_SVAL) [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
PR analyzer/105899
* region-model-manager.cc
(region_model_manager::get_or_create_initial_value): Simplify
INIT_VAL(ELEMENT_REG(STRING_REG), CONSTANT_SVAL) to
CONSTANT_SVAL(STRING[N]).
---
 gcc/analyzer/region-model-manager.cc | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/gcc/analyzer/region-model-manager.cc 
b/gcc/analyzer/region-model-manager.cc
index 65b719056c84..22246876f8f9 100644
--- a/gcc/analyzer/region-model-manager.cc
+++ b/gcc/analyzer/region-model-manager.cc
@@ -310,6 +310,25 @@ region_model_manager::get_or_create_initial_value (const 
region *reg,
 get_or_create_initial_value (original_reg));
 }
 
+  /* Simplify:
+   INIT_VAL(ELEMENT_REG(STRING_REG), CONSTANT_SVAL)
+ to:
+   CONSTANT_SVAL(STRING[N]).  */
+  if (const element_region *element_reg = reg->dyn_cast_element_region ())
+if (tree cst_idx = element_reg->get_index ()->maybe_get_constant ())
+  if (const string_region *string_reg
+ = element_reg->get_parent_region ()->dyn_cast_string_region ())
+   if (tree_fits_shwi_p (cst_idx))
+ {
+   HOST_WIDE_INT idx = tree_to_shwi (cst_idx);
+   tree string_cst = string_reg->get_string_cst ();
+   if (idx >= 0 && idx <= TREE_STRING_LENGTH (string_cst))
+ {
+   int ch = TREE_STRING_POINTER (string_cst)[idx];
+   return get_or_create_int_cst (reg->get_type (), ch);
+ }
+ }
+
   /* INIT_VAL (*UNKNOWN_PTR) -> UNKNOWN_VAL.  */
   if (reg->symbolic_for_unknown_ptr_p ())
 return get_or_create_unknown_svalue (reg->get_type ());
-- 
2.26.3



[pushed 0/9] analyzer: strlen, strcpy, and strcat [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
This patch kit makes improvements to the analyzer's new strlen
implementation, and wires it up to strcpy and strcat.

For example, given:

  #include 

  void test (void)
  {
char buf[10];
strcpy (buf, "hello world!");
  }

we now emit:

demo.c: In function ‘test’:
demo.c:6:3: warning: stack-based buffer overflow [CWE-121] 
[-Wanalyzer-out-of-bounds]
6 |   strcpy (buf, "hello world!");
  |   ^~~~
  ‘test’: events 1-2
|
|5 |   char buf[10];
|  |^~~
|  ||
|  |(1) capacity: 10 bytes
|6 |   strcpy (buf, "hello world!");
|  |   
|  |   |
|  |   (2) out-of-bounds write from byte 10 till byte 12 but ‘buf’ ends 
at byte 10
|
demo.c:6:3: note: write of 3 bytes to beyond the end of ‘buf’
6 |   strcpy (buf, "hello world!");
  |   ^~~~
demo.c:6:3: note: valid subscripts for ‘buf’ are ‘[0]’ to ‘[9]’

  ┌─┬─┬┬┬┬┬┬┬┬┐┌─┬─┬─┐
  │ [0] │ [1] │[2] │[3] │[4] │[5] │[6] │[7] │[8] │[9] ││[10] │[11] │[12] │
  ├─┼─┼┼┼┼┼┼┼┼┤├─┼─┼─┤
  │ ‘h’ │ ‘e’ │‘l’ │‘l’ │‘o’ │‘ ’ │‘w’ │‘o’ │‘r’ │‘l’ ││ ‘d’ │ ‘!’ │ NUL │
  ├─┴─┴┴┴┴┴┴┴┴┴┴─┴─┴─┤
  │  string literal (type: ‘char[13]’)   │
  └──┘
 │ │││││││││  │ │ │
 │ │││││││││  │ │ │
 v vvvvvvvvv  v v v
  ┌─┬┬┐┌─┐
  │ [0] │  ...   │[9] ││ │
  ├─┴┴┤│after valid range│
  │ ‘buf’ (type: ‘char[10]’)  ││ │
  └───┘└─┘
  ├─┬─┤├┬┤
│   │
  ╭─┴╮  ╭───┴──╮
  │capacity: 10 bytes│  │⚠️  overflow of 3 bytes│
  ╰──╯  ╰──╯

in addition to the pre-existing:

demo.c:6:3: warning: ‘__builtin_memcpy’ writing 13 bytes into a region of size 
10 overflows the destination [-Wstringop-overflow=]
demo.c:5:8: note: destination object ‘buf’ of size 10
5 |   char buf[10];
  |^~~

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3461-g9aaec66917c96a through to
r14-3469-gbbdc0e0d0042ae.

David Malcolm (9):
  analyzer: add logging to impl_path_context
  analyzer: handle symbolic bindings in scan_for_null_terminator
[PR105899]
  analyzer: reimplement kf_strcpy [PR105899]
  analyzer: eliminate region_model::get_string_size [PR105899]
  analyzer: reimplement kf_memcpy_memmove
  analyzer: handle strlen(INIT_VAL(STRING_REG)) [PR105899]
  analyzer: handle INIT_VAL(ELEMENT_REG(STRING_REG), CONSTANT_SVAL)
[PR105899]
  analyzer: handle strlen(BITS_WITHIN) [PR105899]
  analyzer: implement kf_strcat [PR105899]

 gcc/analyzer/call-details.cc  |  12 +-
 gcc/analyzer/call-details.h   |   5 +-
 gcc/analyzer/engine.cc|  13 +-
 gcc/analyzer/kf.cc| 116 +---
 gcc/analyzer/region-model-manager.cc  |  19 ++
 gcc/analyzer/region-model.cc  | 261 +-
 gcc/analyzer/region-model.h   |  22 +-
 gcc/doc/invoke.texi   |   1 +
 .../analyzer/out-of-bounds-diagram-16.c   |  31 +++
 gcc/testsuite/gcc.dg/analyzer/sprintf-1.c |  11 +
 gcc/testsuite/gcc.dg/analyzer/strcat-1.c  | 136 +
 gcc/testsuite/gcc.dg/analyzer/strcpy-1.c  |  22 ++
 gcc/testsuite/gcc.dg/analyzer/strcpy-3.c  |   8 +
 gcc/testsuite/gcc.dg/analyzer/strcpy-4.c  |  51 
 14 files changed, 601 insertions(+), 107 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcat-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcpy-4.c

-- 
2.26.3



[PATCH 6/9] analyzer: handle strlen(INIT_VAL(STRING_REG)) [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
PR analyzer/105899
* region-model.cc (fragment::has_null_terminator): Move STRING_CST
handling to fragment::string_cst_has_null_terminator; also use it to
handle INIT_VAL(STRING_REG).
(fragment::string_cst_has_null_terminator): New, from above.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/strcpy-3.c (test_2): New.
---
 gcc/analyzer/region-model.cc | 68 
 gcc/testsuite/gcc.dg/analyzer/strcpy-3.c |  7 +++
 2 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 00c306ab7dae..6574ec140074 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3310,27 +3310,10 @@ struct fragment
  switch (TREE_CODE (cst))
{
case STRING_CST:
- {
-   /* Look for the first 0 byte within STRING_CST
-  from START_READ_OFFSET onwards.  */
-   const HOST_WIDE_INT num_bytes_to_search
- = std::min ((TREE_STRING_LENGTH (cst)
- - rel_start_read_offset_hwi),
-available_bytes_hwi);
-   const char *start = (TREE_STRING_POINTER (cst)
-+ rel_start_read_offset_hwi);
-   if (num_bytes_to_search >= 0)
- if (const void *p = memchr (start, 0,
- num_bytes_to_search))
-   {
- *out_bytes_read = (const char *)p - start + 1;
- return tristate (true);
-   }
-
-   *out_bytes_read = available_bytes;
-   return tristate (false);
- }
- break;
+ return string_cst_has_null_terminator (cst,
+rel_start_read_offset_hwi,
+available_bytes_hwi,
+out_bytes_read);
case INTEGER_CST:
  if (rel_start_read_offset_hwi == 0
  && integer_onep (TYPE_SIZE_UNIT (TREE_TYPE (cst
@@ -3357,12 +3340,55 @@ struct fragment
}
}
break;
+
+  case SK_INITIAL:
+   {
+ const initial_svalue *initial_sval = (const initial_svalue *)m_sval;
+ const region *reg = initial_sval->get_region ();
+ if (const string_region *string_reg = reg->dyn_cast_string_region ())
+   {
+ tree string_cst = string_reg->get_string_cst ();
+ return string_cst_has_null_terminator (string_cst,
+rel_start_read_offset_hwi,
+available_bytes_hwi,
+out_bytes_read);
+   }
+ return tristate::TS_UNKNOWN;
+   }
+   break;
+
   default:
// TODO: it may be possible to handle other cases here.
return tristate::TS_UNKNOWN;
   }
   }
 
+  static tristate
+  string_cst_has_null_terminator (tree string_cst,
+ HOST_WIDE_INT rel_start_read_offset_hwi,
+ HOST_WIDE_INT available_bytes_hwi,
+ byte_offset_t *out_bytes_read)
+  {
+/* Look for the first 0 byte within STRING_CST
+   from START_READ_OFFSET onwards.  */
+const HOST_WIDE_INT num_bytes_to_search
+  = std::min ((TREE_STRING_LENGTH (string_cst)
+ - rel_start_read_offset_hwi),
+available_bytes_hwi);
+const char *start = (TREE_STRING_POINTER (string_cst)
++ rel_start_read_offset_hwi);
+if (num_bytes_to_search >= 0)
+  if (const void *p = memchr (start, 0,
+ num_bytes_to_search))
+   {
+ *out_bytes_read = (const char *)p - start + 1;
+ return tristate (true);
+   }
+
+*out_bytes_read = available_bytes_hwi;
+return tristate (false);
+  }
+
   byte_range m_byte_range;
   const svalue *m_sval;
 };
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c 
b/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
index abb49bc39f27..a7b324fc445e 100644
--- a/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
+++ b/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
@@ -22,3 +22,10 @@ void test_1 (void)
   __analyzer_eval (result[5] == 0); /* { dg-warning "TRUE" } */
   __analyzer_eval (strlen (result) == 5); /* { dg-warning "TRUE" } */
 }
+
+void test_2 (void)
+{
+  char buf[16];
+  __builtin_strcpy (buf, "abc");
+  __analyzer_eval (strlen (buf) == 3); /* { dg-warning "TRUE" } */
+}
-- 
2.26.3



[PATCH 2/9] analyzer: handle symbolic bindings in scan_for_null_terminator [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
PR analyzer/105899
* region-model.cc (iterable_cluster::iterable_cluster): Add
symbolic binding keys to m_symbolic_bindings.
(iterable_cluster::has_symbolic_bindings_p): New.
(iterable_cluster::m_symbolic_bindings): New field.
(region_model::scan_for_null_terminator): Treat clusters with
symbolic bindings as having unknown strlen.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/sprintf-1.c: Include "analyzer-decls.h".
(test_strlen_1): New.
---
 gcc/analyzer/region-model.cc  | 15 +++
 gcc/testsuite/gcc.dg/analyzer/sprintf-1.c | 11 +++
 2 files changed, 26 insertions(+)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 99817aee3a93..7a2f81f36e0f 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3420,6 +3420,8 @@ public:
if (concrete_key->get_byte_range (_bytes))
  m_fragments.safe_push (fragment (fragment_bytes, sval));
  }
+   else
+ m_symbolic_bindings.safe_push (key);
   }
 m_fragments.qsort (fragment::cmp_ptrs);
   }
@@ -3440,8 +3442,14 @@ public:
 return false;
   }
 
+  bool has_symbolic_bindings_p () const
+  {
+return !m_symbolic_bindings.is_empty ();
+  }
+
 private:
   auto_vec m_fragments;
+  auto_vec m_symbolic_bindings;
 };
 
 /* Simulate reading the bytes at BYTES from BASE_REG.
@@ -3610,6 +3618,13 @@ region_model::scan_for_null_terminator (const region 
*reg,
   /* No binding for this base_region, or no binding at src_byte_offset
  (or a symbolic binding).  */
 
+  if (c.has_symbolic_bindings_p ())
+{
+  if (out_sval)
+   *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+  return m_mgr->get_or_create_unknown_svalue (size_type_node);
+}
+
   /* TODO: the various special-cases seen in
  region_model::get_store_value.  */
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c 
b/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
index f8dc806d6192..e7c2b3089c5b 100644
--- a/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
@@ -1,6 +1,8 @@
 /* See e.g. https://en.cppreference.com/w/c/io/fprintf
and https://www.man7.org/linux/man-pages/man3/sprintf.3.html */
 
+#include "analyzer-decls.h"
+
 extern int
 sprintf(char* dst, const char* fmt, ...)
   __attribute__((__nothrow__));
@@ -64,3 +66,12 @@ test_fmt_not_terminated (char *dst)
   return sprintf (dst, fmt); /* { dg-warning "stack-based buffer over-read" } 
*/
   /* { dg-message "while looking for null terminator for argument 2 
\\(''\\) of 'sprintf'..." "event" { target *-*-* } .-1 } */
 }
+
+void
+test_strlen_1 (void)
+{
+  char buf[10];
+  sprintf (buf, "msg: %s\n", "abc");
+  __analyzer_eval (__builtin_strlen (buf) == 8); /* { dg-warning "UNKNOWN" } */
+  // TODO: ideally would be TRUE  
+}
-- 
2.26.3



[PATCH 9/9] analyzer: implement kf_strcat [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
PR analyzer/105899
* call-details.cc
(call_details::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param.
* call-details.h: Likewise.
* kf.cc (class kf_strcat): New.
(kf_strcpy::impl_call_pre): Update for change to
check_for_null_terminated_string_arg.
(register_known_functions): Register kf_strcat.
* region-model.cc
(region_model::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param.  When returning an svalue, handle
"include_terminator" being false by subtracting one.
* region-model.h
(region_model::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param.

gcc/ChangeLog:
PR analyzer/105899
* doc/invoke.texi (Static Analyzer Options): Add "strcat" to the
list of functions known to the analyzer.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/strcat-1.c: New test.
---
 gcc/analyzer/call-details.cc |  12 +-
 gcc/analyzer/call-details.h  |   5 +-
 gcc/analyzer/kf.cc   |  72 ++--
 gcc/analyzer/region-model.cc |  63 +--
 gcc/analyzer/region-model.h  |   6 +-
 gcc/doc/invoke.texi  |   1 +
 gcc/testsuite/gcc.dg/analyzer/strcat-1.c | 136 +++
 7 files changed, 275 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcat-1.c

diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc
index 8f5b28ce6c26..ce1f859c9996 100644
--- a/gcc/analyzer/call-details.cc
+++ b/gcc/analyzer/call-details.cc
@@ -386,13 +386,23 @@ call_details::lookup_function_attribute (const char 
*attr_name) const
   return lookup_attribute (attr_name, TYPE_ATTRIBUTES (allocfntype));
 }
 
+void
+call_details::check_for_null_terminated_string_arg (unsigned arg_idx) const
+{
+  check_for_null_terminated_string_arg (arg_idx, false, nullptr);
+}
+
 const svalue *
 call_details::
 check_for_null_terminated_string_arg (unsigned arg_idx,
+ bool include_terminator,
  const svalue **out_sval) const
 {
   region_model *model = get_model ();
-  return model->check_for_null_terminated_string_arg (*this, arg_idx, 
out_sval);
+  return model->check_for_null_terminated_string_arg (*this,
+ arg_idx,
+ include_terminator,
+ out_sval);
 }
 
 } // namespace ana
diff --git a/gcc/analyzer/call-details.h b/gcc/analyzer/call-details.h
index 58b5ccd2acde..ae528e4ab116 100644
--- a/gcc/analyzer/call-details.h
+++ b/gcc/analyzer/call-details.h
@@ -72,9 +72,12 @@ public:
 
   tree lookup_function_attribute (const char *attr_name) const;
 
+  void
+  check_for_null_terminated_string_arg (unsigned arg_idx) const;
   const svalue *
   check_for_null_terminated_string_arg (unsigned arg_idx,
-   const svalue **out_sval = nullptr) 
const;
+   bool include_terminator,
+   const svalue **out_sval) const;
 
 private:
   const gcall *m_call;
diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 3eddbe200387..36d9d10bb013 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1106,6 +1106,61 @@ public:
   /* Currently a no-op.  */
 };
 
+/* Handler for "strcat" and "__builtin_strcat_chk".  */
+
+class kf_strcat : public known_function
+{
+public:
+  kf_strcat (unsigned int num_args) : m_num_args (num_args) {}
+  bool matches_call_types_p (const call_details ) const final override
+  {
+return (cd.num_args () == m_num_args
+   && cd.arg_is_pointer_p (0)
+   && cd.arg_is_pointer_p (1));
+  }
+
+  void impl_call_pre (const call_details ) const final override
+  {
+region_model *model = cd.get_model ();
+region_model_manager *mgr = cd.get_manager ();
+
+const svalue *dest_sval = cd.get_arg_svalue (0);
+const region *dest_reg = model->deref_rvalue (dest_sval, cd.get_arg_tree 
(0),
+ cd.get_ctxt ());
+
+const svalue *dst_strlen_sval
+  = cd.check_for_null_terminated_string_arg (0, false, nullptr);
+if (!dst_strlen_sval)
+  {
+   if (cd.get_ctxt ())
+ cd.get_ctxt ()->terminate_path ();
+   return;
+  }
+
+const svalue *bytes_to_copy;
+const svalue *num_src_bytes_read_sval
+  = cd.check_for_null_terminated_string_arg (1, true, _to_copy);
+if (!num_src_bytes_read_sval)
+  {
+   if 

[PATCH 3/9] analyzer: reimplement kf_strcpy [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
This patch reimplements the analyzer's implementation of strcpy using
the region_model::scan_for_null_terminator infrastructure, so that e.g.
it can complain about out-of-bounds reads/writes, unterminated strings,
etc.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* kf.cc (kf_strcpy::impl_call_pre): Reimplement using
check_for_null_terminated_string_arg.
* region-model.cc (region_model::get_store_bytes): Shortcut
reading all of a string_region.
(region_model::scan_for_null_terminator): Use get_store_value for
the bytes rather than "unknown" when returning an unknown length.
(region_model::write_bytes): New.
* region-model.h (region_model::write_bytes): New decl.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/out-of-bounds-diagram-16.c: New test.
* gcc.dg/analyzer/strcpy-1.c: Add test coverage.
* gcc.dg/analyzer/strcpy-3.c: Likewise.
* gcc.dg/analyzer/strcpy-4.c: New test.
---
 gcc/analyzer/kf.cc| 32 +---
 gcc/analyzer/region-model.cc  | 32 ++--
 gcc/analyzer/region-model.h   |  4 ++
 .../analyzer/out-of-bounds-diagram-16.c   | 31 +++
 gcc/testsuite/gcc.dg/analyzer/strcpy-1.c  | 22 
 gcc/testsuite/gcc.dg/analyzer/strcpy-3.c  |  1 +
 gcc/testsuite/gcc.dg/analyzer/strcpy-4.c  | 51 +++
 7 files changed, 150 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcpy-4.c

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 59f46bab581c..6b33cd159dac 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1135,29 +1135,25 @@ void
 kf_strcpy::impl_call_pre (const call_details ) const
 {
   region_model *model = cd.get_model ();
-  region_model_manager *mgr = cd.get_manager ();
+  region_model_context *ctxt = cd.get_ctxt ();
 
   const svalue *dest_sval = cd.get_arg_svalue (0);
   const region *dest_reg = model->deref_rvalue (dest_sval, cd.get_arg_tree (0),
-cd.get_ctxt ());
-  const svalue *src_sval = cd.get_arg_svalue (1);
-  const region *src_reg = model->deref_rvalue (src_sval, cd.get_arg_tree (1),
-   cd.get_ctxt ());
-  const svalue *src_contents_sval = model->get_store_value (src_reg,
-   cd.get_ctxt ());
-  cd.check_for_null_terminated_string_arg (1);
-
+   ctxt);
+  /* strcpy returns the initial param.  */
   cd.maybe_set_lhs (dest_sval);
 
-  /* Try to get the string size if SRC_REG is a string_region.  */
-  const svalue *copied_bytes_sval = model->get_string_size (src_reg);
-  /* Otherwise, check if the contents of SRC_REG is a string.  */
-  if (copied_bytes_sval->get_kind () == SK_UNKNOWN)
-copied_bytes_sval = model->get_string_size (src_contents_sval);
-
-  const region *sized_dest_reg
-= mgr->get_sized_region (dest_reg, NULL_TREE, copied_bytes_sval);
-  model->set_value (sized_dest_reg, src_contents_sval, cd.get_ctxt ());
+  const svalue *bytes_to_copy;
+  if (const svalue *num_bytes_read_sval
+   = cd.check_for_null_terminated_string_arg (1, _to_copy))
+{
+  model->write_bytes (dest_reg, num_bytes_read_sval, bytes_to_copy, ctxt);
+}
+  else
+{
+  if (cd.get_ctxt ())
+   cd.get_ctxt ()->terminate_path ();
+}
 }
 
 /* Handler for "strdup" and "__builtin_strdup".  */
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 7a2f81f36e0f..cc8d895d9665 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3460,6 +3460,13 @@ region_model::get_store_bytes (const region *base_reg,
   const byte_range ,
   region_model_context *ctxt) const
 {
+  /* Shortcut reading all of a string_region.  */
+  if (bytes.get_start_byte_offset () == 0)
+if (const string_region *string_reg = base_reg->dyn_cast_string_region ())
+  if (bytes.m_size_in_bytes
+ == TREE_STRING_LENGTH (string_reg->get_string_cst ()))
+   return m_mgr->get_or_create_initial_value (base_reg);
+
   const svalue *index_sval
 = m_mgr->get_or_create_int_cst (size_type_node,
bytes.get_start_byte_offset ());
@@ -3533,14 +3540,14 @@ region_model::scan_for_null_terminator (const region 
*reg,
   if (offset.symbolic_p ())
 {
   if (out_sval)
-   *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+   *out_sval = get_store_value (reg, nullptr);
   return m_mgr->get_or_create_unknown_svalue (size_type_node);
 }
   byte_offset_t src_byte_offset;
   if (!offset.get_concrete_byte_offset (_byte_offset))
 {
   if (out_sval)
-   *out_sval = m_mgr->get_or_create_unknown_svalue 

[PATCH 5/9] analyzer: reimplement kf_memcpy_memmove

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
* kf.cc (kf_memcpy_memmove::impl_call_pre): Reimplement using
region_model::copy_bytes.
* region-model.cc (region_model::read_bytes): New.
(region_model::copy_bytes): New.
* region-model.h (region_model::read_bytes): New decl.
(region_model::copy_bytes): New decl.
---
 gcc/analyzer/kf.cc   | 14 --
 gcc/analyzer/region-model.cc | 35 +++
 gcc/analyzer/region-model.h  |  9 +
 3 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 6b33cd159dac..3eddbe200387 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -541,7 +541,6 @@ kf_memcpy_memmove::impl_call_pre (const call_details ) 
const
   const svalue *num_bytes_sval = cd.get_arg_svalue (2);
 
   region_model *model = cd.get_model ();
-  region_model_manager *mgr = cd.get_manager ();
 
   const region *dest_reg
 = model->deref_rvalue (dest_ptr_sval, cd.get_arg_tree (0), cd.get_ctxt ());
@@ -550,15 +549,10 @@ kf_memcpy_memmove::impl_call_pre (const call_details ) 
const
 
   cd.maybe_set_lhs (dest_ptr_sval);
 
-  const region *sized_src_reg
-= mgr->get_sized_region (src_reg, NULL_TREE, num_bytes_sval);
-  const region *sized_dest_reg
-= mgr->get_sized_region (dest_reg, NULL_TREE, num_bytes_sval);
-  const svalue *src_contents_sval
-= model->get_store_value (sized_src_reg, cd.get_ctxt ());
-  model->check_for_poison (src_contents_sval, cd.get_arg_tree (1),
-  sized_src_reg, cd.get_ctxt ());
-  model->set_value (sized_dest_reg, src_contents_sval, cd.get_ctxt ());
+  model->copy_bytes (dest_reg,
+src_reg, cd.get_arg_tree (1),
+num_bytes_sval,
+cd.get_ctxt ());
 }
 
 /* Handler for "memset" and "__builtin_memset".  */
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 1fe66f4719fa..00c306ab7dae 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3794,6 +3794,41 @@ region_model::write_bytes (const region *dest_reg,
   set_value (sized_dest_reg, sval, ctxt);
 }
 
+/* Read NUM_BYTES_SVAL from SRC_REG.
+   Use CTXT to report any warnings associated with the copy
+   (e.g. out-of-bounds reads, copying of uninitialized values, etc).  */
+
+const svalue *
+region_model::read_bytes (const region *src_reg,
+ tree src_ptr_expr,
+ const svalue *num_bytes_sval,
+ region_model_context *ctxt) const
+{
+  const region *sized_src_reg
+= m_mgr->get_sized_region (src_reg, NULL_TREE, num_bytes_sval);
+  const svalue *src_contents_sval = get_store_value (sized_src_reg, ctxt);
+  check_for_poison (src_contents_sval, src_ptr_expr,
+   sized_src_reg, ctxt);
+  return src_contents_sval;
+}
+
+/* Copy NUM_BYTES_SVAL bytes from SRC_REG to DEST_REG.
+   Use CTXT to report any warnings associated with the copy
+   (e.g. out-of-bounds reads/writes, copying of uninitialized values,
+   etc).  */
+
+void
+region_model::copy_bytes (const region *dest_reg,
+ const region *src_reg,
+ tree src_ptr_expr,
+ const svalue *num_bytes_sval,
+ region_model_context *ctxt)
+{
+  const svalue *data_sval
+= read_bytes (src_reg, src_ptr_expr, num_bytes_sval, ctxt);
+  write_bytes (dest_reg, num_bytes_sval, data_sval, ctxt);
+}
+
 /* Mark REG as having unknown content.  */
 
 void
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 41df1885ad5b..b1c705e22c28 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -371,6 +371,15 @@ class region_model
const svalue *num_bytes_sval,
const svalue *sval,
region_model_context *ctxt);
+  const svalue *read_bytes (const region *src_reg,
+   tree src_ptr_expr,
+   const svalue *num_bytes_sval,
+   region_model_context *ctxt) const;
+  void copy_bytes (const region *dest_reg,
+  const region *src_reg,
+  tree src_ptr_expr,
+  const svalue *num_bytes_sval,
+  region_model_context *ctxt);
   void mark_region_as_unknown (const region *reg, uncertainty_t *uncertainty);
 
   tristate eval_condition (const svalue *lhs,
-- 
2.26.3



[PATCH 4/9] analyzer: eliminate region_model::get_string_size [PR105899]

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
PR analyzer/105899
* region-model.cc (region_model::get_string_size): Delete both.
* region-model.h (region_model::get_string_size): Delete both
decls.
---
 gcc/analyzer/region-model.cc | 29 -
 gcc/analyzer/region-model.h  |  3 ---
 2 files changed, 32 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index cc8d895d9665..1fe66f4719fa 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -2794,35 +2794,6 @@ region_model::get_capacity (const region *reg) const
   return m_mgr->get_or_create_unknown_svalue (sizetype);
 }
 
-/* Return the string size, including the 0-terminator, if SVAL is a
-   constant_svalue holding a string.  Otherwise, return an unknown_svalue.  */
-
-const svalue *
-region_model::get_string_size (const svalue *sval) const
-{
-  tree cst = sval->maybe_get_constant ();
-  if (!cst || TREE_CODE (cst) != STRING_CST)
-return m_mgr->get_or_create_unknown_svalue (size_type_node);
-
-  tree out = build_int_cst (size_type_node, TREE_STRING_LENGTH (cst));
-  return m_mgr->get_or_create_constant_svalue (out);
-}
-
-/* Return the string size, including the 0-terminator, if REG is a
-   string_region.  Otherwise, return an unknown_svalue.  */
-
-const svalue *
-region_model::get_string_size (const region *reg) const
-{
-  const string_region *str_reg = dyn_cast  (reg);
-  if (!str_reg)
-return m_mgr->get_or_create_unknown_svalue (size_type_node);
-
-  tree cst = str_reg->get_string_cst ();
-  tree out = build_int_cst (size_type_node, TREE_STRING_LENGTH (cst));
-  return m_mgr->get_or_create_constant_svalue (out);
-}
-
 /* If CTXT is non-NULL, use it to warn about any problems accessing REG,
using DIR to determine if this access is a read or write.
Return TRUE if an OOB access was detected.
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 9c6e60bbe824..41df1885ad5b 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -469,9 +469,6 @@ class region_model
 
   const svalue *get_capacity (const region *reg) const;
 
-  const svalue *get_string_size (const svalue *sval) const;
-  const svalue *get_string_size (const region *reg) const;
-
   bool replay_call_summary (call_summary_replay ,
const region_model );
 
-- 
2.26.3



[PATCH 1/9] analyzer: add logging to impl_path_context

2023-08-24 Thread David Malcolm via Gcc-patches
gcc/analyzer/ChangeLog:
* engine.cc (impl_path_context::impl_path_context): Add logger
param.
(impl_path_context::bifurcate): Add log message.
(impl_path_context::terminate_path): Likewise.
(impl_path_context::m_logger): New field.
(exploded_graph::process_node): Pass logger to path_ctxt ctor.
---
 gcc/analyzer/engine.cc | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index 3700154eec2c..a1908cdb364e 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -3848,8 +3848,10 @@ exploded_graph::maybe_create_dynamic_call (const gcall 
*call,
 class impl_path_context : public path_context
 {
 public:
-  impl_path_context (const program_state *cur_state)
+  impl_path_context (const program_state *cur_state,
+logger *logger)
   : m_cur_state (cur_state),
+m_logger (logger),
 m_terminate_path (false)
   {
   }
@@ -3868,6 +3870,9 @@ public:
   void
   bifurcate (std::unique_ptr info) final override
   {
+if (m_logger)
+  m_logger->log ("bifurcating path");
+
 if (m_state_at_bifurcation)
   /* Verify that the state at bifurcation is consistent when we
 split into multiple out-edges.  */
@@ -3884,6 +3889,8 @@ public:
 
   void terminate_path () final override
   {
+if (m_logger)
+  m_logger->log ("terminating path");
 m_terminate_path = true;
   }
 
@@ -3900,6 +3907,8 @@ public:
 private:
   const program_state *m_cur_state;
 
+  logger *m_logger;
+
   /* Lazily-created copy of the state before the split.  */
   std::unique_ptr m_state_at_bifurcation;
 
@@ -4044,7 +4053,7 @@ exploded_graph::process_node (exploded_node *node)
   exactly one stmt, the one that caused the change. */
program_state next_state (state);
 
-   impl_path_context path_ctxt (_state);
+   impl_path_context path_ctxt (_state, logger);
 
uncertainty_t uncertainty;
const supernode *snode = point.get_supernode ();
-- 
2.26.3



[pushed] analyzer: reimplement kf_strlen [PR105899]

2023-08-22 Thread David Malcolm via Gcc-patches
Reimplement kf_strlen in terms of the new string scanning
implementation, sharing strlen's implementation with
__analyzer_get_strlen.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3391-g3242fb533d48ab.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* kf-analyzer.cc (class kf_analyzer_get_strlen): Move to kf.cc.
(register_known_analyzer_functions): Use make_kf_strlen.
* kf.cc (class kf_strlen::impl_call_pre): Replace with
implementation of kf_analyzer_get_strlen from kf-analyzer.cc.
Handle "UNKNOWN" return from check_for_null_terminated_string_arg
by falling back to a conjured svalue.
(make_kf_strlen): New.
(register_known_functions): Use make_kf_strlen.
* known-function-manager.h (make_kf_strlen): New decl.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/null-terminated-strings-1.c: Update expected
results on symbolic values.
* gcc.dg/analyzer/strlen-1.c: New test.
---
 gcc/analyzer/kf-analyzer.cc   | 30 +-
 gcc/analyzer/kf.cc| 56 +--
 gcc/analyzer/known-function-manager.h |  2 +
 .../analyzer/null-terminated-strings-1.c  |  4 +-
 gcc/testsuite/gcc.dg/analyzer/strlen-1.c  | 54 ++
 5 files changed, 85 insertions(+), 61 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strlen-1.c

diff --git a/gcc/analyzer/kf-analyzer.cc b/gcc/analyzer/kf-analyzer.cc
index c767ebcb6615..7ae598a89123 100644
--- a/gcc/analyzer/kf-analyzer.cc
+++ b/gcc/analyzer/kf-analyzer.cc
@@ -358,33 +358,6 @@ public:
   }
 };
 
-/* Handler for "__analyzer_get_strlen".  */
-
-class kf_analyzer_get_strlen : public known_function
-{
-public:
-  bool matches_call_types_p (const call_details ) const final override
-  {
-return cd.num_args () == 1 && cd.arg_is_pointer_p (0);
-  }
-  void impl_call_pre (const call_details ) const final override
-  {
-if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0))
-  {
-   region_model_manager *mgr = cd.get_manager ();
-   /* strlen is (bytes_read - 1).  */
-   const svalue *strlen_sval
- = mgr->get_or_create_binop (size_type_node,
- MINUS_EXPR,
- bytes_read,
- mgr->get_or_create_int_cst 
(size_type_node, 1));
-   cd.maybe_set_lhs (strlen_sval);
-  }
-else
-  cd.set_any_lhs_with_defaults ();
-  }
-};
-
 /* Populate KFM with instances of known functions used for debugging the
analyzer and for writing DejaGnu tests, all with a "__analyzer_" prefix.  */
 
@@ -406,8 +379,7 @@ register_known_analyzer_functions (known_function_manager 
)
   kfm.add ("__analyzer_eval", make_unique ());
   kfm.add ("__analyzer_get_unknown_ptr",
   make_unique ());
-  kfm.add ("__analyzer_get_strlen",
-  make_unique ());
+  kfm.add ("__analyzer_get_strlen", make_kf_strlen ());
 }
 
 } // namespace ana
diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 1601cf15c685..59f46bab581c 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1187,7 +1187,7 @@ public:
   }
 };
 
-/* Handle the on_call_pre part of "strlen".  */
+/* Handler for "strlen" and for "__analyzer_get_strlen".  */
 
 class kf_strlen : public known_function
 {
@@ -1196,37 +1196,33 @@ public:
   {
 return (cd.num_args () == 1 && cd.arg_is_pointer_p (0));
   }
-  void impl_call_pre (const call_details ) const final override;
-};
-
-void
-kf_strlen::impl_call_pre (const call_details ) const
-{
-  region_model_context *ctxt = cd.get_ctxt ();
-  region_model *model = cd.get_model ();
-  region_model_manager *mgr = cd.get_manager ();
-
-  const svalue *arg_sval = cd.get_arg_svalue (0);
-  const region *buf_reg
-= model->deref_rvalue (arg_sval, cd.get_arg_tree (0), ctxt);
-  if (const string_region *str_reg
-  = buf_reg->dyn_cast_string_region ())
-{
-  tree str_cst = str_reg->get_string_cst ();
-  /* TREE_STRING_LENGTH is sizeof, not strlen.  */
-  int sizeof_cst = TREE_STRING_LENGTH (str_cst);
-  int strlen_cst = sizeof_cst - 1;
-  if (cd.get_lhs_type ())
+  void impl_call_pre (const call_details ) const final override
+  {
+if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0))
+  if (bytes_read->get_kind () != SK_UNKNOWN)
{
- tree t_cst = build_int_cst (cd.get_lhs_type (), strlen_cst);
- const svalue *result_sval
-   = mgr->get_or_create_constant_svalue (t_cst);
- cd.maybe_set_lhs (result_sval);
+ region_model_manager *mgr = cd.get_manager ();
+ /* strlen is (bytes_read - 1).  */
+ const svalue *one = mgr->get_or_create_int_cst (size_type_node, 1);
+ const svalue *strlen_sval = mgr->get_or_create_binop (size_type_node,
+

[pushed 6/6] analyzer: check format strings for null termination [PR105899]

2023-08-21 Thread David Malcolm via Gcc-patches
This patch extends -fanalyzer to check the format strings of calls
to functions marked with '__attribute__ ((format...))'.

The only checking done in this patch is to check that the format string
is a valid null-terminated string; this patch doesn't attempt to check
the content of the format string.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3376-g3b691e0190c6e7.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* call-details.cc (call_details::call_details): New ctor.
* call-details.h (call_details::call_details): New ctor decl.
(struct call_arg_details): Move here from region-model.cc.
* region-model.cc (region_model::check_call_format_attr): New.
(region_model::check_call_args): Call it.
(struct call_arg_details): Move it to call-details.h.
* region-model.h (region_model::check_call_format_attr): New decl.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/attr-format-1.c: New test.
* gcc.dg/analyzer/sprintf-1.c: Update expected results for
now-passing tests.
---
 gcc/analyzer/call-details.cc  |  10 ++
 gcc/analyzer/call-details.h   |  30 +
 gcc/analyzer/region-model.cc  | 125 +-
 gcc/analyzer/region-model.h   |   2 +
 gcc/testsuite/gcc.dg/analyzer/attr-format-1.c |  31 +
 gcc/testsuite/gcc.dg/analyzer/sprintf-1.c |   6 +-
 6 files changed, 172 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/attr-format-1.c

diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc
index e497fc58e028..8f5b28ce6c26 100644
--- a/gcc/analyzer/call-details.cc
+++ b/gcc/analyzer/call-details.cc
@@ -58,6 +58,16 @@ call_details::call_details (const gcall *call, region_model 
*model,
 }
 }
 
+/* call_details's ctor: copy CD, but override the context,
+   using CTXT instead.  */
+
+call_details::call_details (const call_details ,
+   region_model_context *ctxt)
+{
+  *this = cd;
+  m_ctxt = ctxt;
+}
+
 /* Get the manager from m_model.  */
 
 region_model_manager *
diff --git a/gcc/analyzer/call-details.h b/gcc/analyzer/call-details.h
index 86f0e68072bd..58b5ccd2acde 100644
--- a/gcc/analyzer/call-details.h
+++ b/gcc/analyzer/call-details.h
@@ -30,6 +30,7 @@ class call_details
 public:
   call_details (const gcall *call, region_model *model,
region_model_context *ctxt);
+  call_details (const call_details , region_model_context *ctxt);
 
   region_model *get_model () const { return m_model; }
   region_model_manager *get_manager () const;
@@ -83,6 +84,35 @@ private:
   const region *m_lhs_region;
 };
 
+/* A bundle of information about a problematic argument at a callsite
+   for use by pending_diagnostic subclasses for reporting and
+   for deduplication.  */
+
+struct call_arg_details
+{
+public:
+  call_arg_details (const call_details , unsigned arg_idx)
+  : m_call (cd.get_call_stmt ()),
+m_called_fndecl (cd.get_fndecl_for_call ()),
+m_arg_idx (arg_idx),
+m_arg_expr (cd.get_arg_tree (arg_idx))
+  {
+  }
+
+  bool operator== (const call_arg_details ) const
+  {
+return (m_call == other.m_call
+   && m_called_fndecl == other.m_called_fndecl
+   && m_arg_idx == other.m_arg_idx
+   && pending_diagnostic::same_tree_p (m_arg_expr, other.m_arg_expr));
+  }
+
+  const gcall *m_call;
+  tree m_called_fndecl;
+  unsigned m_arg_idx; // 0-based
+  tree m_arg_expr;
+};
+
 } // namespace ana
 
 #endif /* GCC_ANALYZER_CALL_DETAILS_H */
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 0fce18896fbc..99817aee3a93 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -1271,14 +1271,108 @@ region_model::on_stmt_pre (const gimple *stmt,
 }
 }
 
+/* Given a call CD with function attribute FORMAT_ATTR, check that the
+   format arg to the call is a valid null-terminated string.  */
+
+void
+region_model::check_call_format_attr (const call_details ,
+ tree format_attr) const
+{
+  /* We assume that FORMAT_ATTR has already been validated.  */
+
+  /* arg0 of the attribute should be kind of format strings
+ that this function expects (e.g. "printf").  */
+  const tree arg0_tree_list = TREE_VALUE (format_attr);
+  if (!arg0_tree_list)
+return;
+
+  /* arg1 of the attribute should be the 1-based parameter index
+ to treat as the format string.  */
+  const tree arg1_tree_list = TREE_CHAIN (arg0_tree_list);
+  if (!arg1_tree_list)
+return;
+  const tree arg1_value = TREE_VALUE (arg1_tree_list);
+  if (!arg1_value)
+return;
+
+  unsigned format_arg_idx = TREE_INT_CST_LOW (arg1_value) - 1;
+  if (cd.num_args () <= format_arg_idx)
+return;
+
+  /* Subclass of annotating_context that
+ adds a note about the format attr to any saved diagnostics.  */
+  class annotating_ctxt 

[pushed 4/6] analyzer: replace -Wanalyzer-unterminated-string with scan_for_null_terminator [PR105899]

2023-08-21 Thread David Malcolm via Gcc-patches
In r14-3169-g325f9e88802daa I added check_for_null_terminated_string_arg
to -fanalyzer, calling it in various places, with a sole check for
unterminated string constants, adding -Wanalyzer-unterminated-string for
this case.

This patch adds region_model::scan_for_null_terminator, which simulates
scanning memory for a zero byte, complaining about uninitiliazed bytes
and out-of-range accesses seen before any zero byte is seen.

This more flexible approach catches the issues we saw before with
-Wanalyzer-unterminated-string, and also catches uninitialized runs
of bytes, and I believe will be a better way to build checking of C
string operations in the analyzer.

Given that the patch makes -Wanalyzer-unterminated-string redundant
and that this option was only in trunk for 10 days and has no known
users, the patch simply removes the option without a compatibility
fallback.

The patch uses custom events and notes to provide context on where
the issues are coming from.  For example, given:

null-terminated-strings-1.c: In function ‘test_partially_initialized’:
null-terminated-strings-1.c:71:3: warning: use of uninitialized value ‘buf[1]’ 
[CWE-457] [-Wanalyzer-use-of-uninitialized-value]
   71 |   __analyzer_get_strlen (buf);
  |   ^~~
  ‘test_partially_initialized’: events 1-3
|
|   69 |   char buf[16];
|  |^~~
|  ||
|  |(1) region created on stack here
|   70 |   buf[0] = 'a';
|   71 |   __analyzer_get_strlen (buf);
|  |   ~~~
|  |   |
|  |   (2) while looking for null terminator for argument 1 (‘’) of 
‘__analyzer_get_strlen’...
|  |   (3) use of uninitialized value ‘buf[1]’ here
|
analyzer-decls.h:59:22: note: argument 1 of ‘__analyzer_get_strlen’ must be a 
pointer to a null-terminated string
   59 | extern __SIZE_TYPE__ __analyzer_get_strlen (const char *ptr);
  |  ^

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3374-gfe97f09a0caeff.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* analyzer.opt (Wanalyzer-unterminated-string): Delete.
* call-details.cc
(call_details::check_for_null_terminated_string_arg): Convert
return type from void to const svalue *.  Add param "out_sval".
* call-details.h
(call_details::check_for_null_terminated_string_arg): Likewise.
* kf-analyzer.cc (kf_analyzer_get_strlen::impl_call_pre): Wire up
to result of check_for_null_terminated_string_arg.
* region-model.cc (get_strlen): Delete.
(class unterminated_string_arg): Delete.
(struct fragment): New.
(class iterable_cluster): New.
(region_model::get_store_bytes): New.
(get_tree_for_byte_offset): New.
(region_model::scan_for_null_terminator): New.
(region_model::check_for_null_terminated_string_arg): Convert
return type from void to const svalue *.  Add param "out_sval".
Reimplement in terms of scan_for_null_terminator, dropping the
special-case for -Wanalyzer-unterminated-string.
* region-model.h (region_model::get_store_bytes): New decl.
(region_model::scan_for_null_terminator): New decl.
(region_model::check_for_null_terminated_string_arg): Convert
return type from void to const svalue *.  Add param "out_sval".
* store.cc (concrete_binding::get_byte_range): New.
* store.h (concrete_binding::get_byte_range): New decl.
(store_manager::get_concrete_binding): New overload.

gcc/ChangeLog:
PR analyzer/105899
* doc/invoke.texi: Remove -Wanalyzer-unterminated-string.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/error-1.c: Update expected results to reflect
reimplementation of unterminated string detection.  Add test
coverage for uninitialized buffers.
* gcc.dg/analyzer/null-terminated-strings-1.c: Likewise.
* gcc.dg/analyzer/putenv-1.c: Likewise.
* gcc.dg/analyzer/strchr-1.c: Likewise.
* gcc.dg/analyzer/strcpy-1.c: Likewise.
* gcc.dg/analyzer/strdup-1.c: Likewise.
---
 gcc/analyzer/analyzer.opt |   4 -
 gcc/analyzer/call-details.cc  |   8 +-
 gcc/analyzer/call-details.h   |   4 +-
 gcc/analyzer/kf-analyzer.cc   |  15 +-
 gcc/analyzer/region-model.cc  | 521 +++---
 gcc/analyzer/region-model.h   |  13 +-
 gcc/analyzer/store.cc |   9 +
 gcc/analyzer/store.h  |   7 +
 gcc/doc/invoke.texi   |  13 -
 gcc/testsuite/gcc.dg/analyzer/error-1.c   |  20 +-
 .../analyzer/null-terminated-strings-1.c  | 128 -
 gcc/testsuite/gcc.dg/analyzer/putenv-1.c  |  13 +-
 gcc/testsuite/gcc.dg/analyzer/strchr-1.c  |  10 

[pushed 2/6] analyzer: add ability for context to add events to a saved_diagnostic

2023-08-21 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3372-g2503dd59b588d3.

gcc/analyzer/ChangeLog:
* diagnostic-manager.cc (saved_diagnostic::add_event): New.
(saved_diagnostic::add_any_saved_events): New.
(diagnostic_manager::add_event): New.
(dedupe_winners::emit_best): New.
(diagnostic_manager::emit_saved_diagnostic): Make "sd" param
non-const.  Call saved_diagnostic::add_any_saved_events.
* diagnostic-manager.h (saved_diagnostic::add_event): New decl.
(saved_diagnostic::add_any_saved_events): New decl.
(saved_diagnostic::m_saved_events): New field.
(diagnostic_manager::add_event): New decl.
(diagnostic_manager::emit_saved_diagnostic): Make "sd" param
non-const.
* engine.cc (impl_region_model_context::add_event): New.
* exploded-graph.h (impl_region_model_context::add_event): New decl.
* region-model.cc
(noop_region_model_context::add_event): New.
(region_model_context_decorator::add_event): New.
* region-model.h (region_model_context::add_event): New vfunc.
(noop_region_model_context::add_event): New decl.
(region_model_context_decorator::add_event): New decl.
---
 gcc/analyzer/diagnostic-manager.cc | 45 --
 gcc/analyzer/diagnostic-manager.h  | 12 +++-
 gcc/analyzer/engine.cc |  8 ++
 gcc/analyzer/exploded-graph.h  |  1 +
 gcc/analyzer/region-model.cc   | 13 +
 gcc/analyzer/region-model.h|  6 
 6 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index 62f78f35dc08..10fea486b8c8 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -721,6 +721,15 @@ saved_diagnostic::add_note (std::unique_ptr 
pn)
   m_notes.safe_push (pn.release ());
 }
 
+/* Add EVENT to this diagnostic.  */
+
+void
+saved_diagnostic::add_event (std::unique_ptr event)
+{
+  gcc_assert (event);
+  m_saved_events.safe_push (event.release ());
+}
+
 /* Return a new json::object of the form
{"sm": optional str,
 "enode": int,
@@ -890,6 +899,19 @@ saved_diagnostic::supercedes_p (const saved_diagnostic 
) const
   return m_d->supercedes_p (*other.m_d);
 }
 
+/* Move any saved checker_events from this saved_diagnostic to
+   the end of DST_PATH.  */
+
+void
+saved_diagnostic::add_any_saved_events (checker_path _path)
+{
+  for (auto  : m_saved_events)
+{
+  dst_path.add_event (std::unique_ptr (event));
+  event = nullptr;
+}
+}
+
 /* Emit any pending notes owned by this diagnostic.  */
 
 void
@@ -1057,6 +1079,20 @@ diagnostic_manager::add_note 
(std::unique_ptr pn)
   sd->add_note (std::move (pn));
 }
 
+/* Add EVENT to the most recent saved_diagnostic.  */
+
+void
+diagnostic_manager::add_event (std::unique_ptr event)
+{
+  LOG_FUNC (get_logger ());
+  gcc_assert (event);
+
+  /* Get most recent saved_diagnostic.  */
+  gcc_assert (m_saved_diagnostics.length () > 0);
+  saved_diagnostic *sd = m_saved_diagnostics[m_saved_diagnostics.length () - 
1];
+  sd->add_event (std::move (event));
+}
+
 /* Return a new json::object of the form
{"diagnostics"  : [obj for saved_diagnostic]}.  */
 
@@ -1308,7 +1344,7 @@ public:
   {
saved_diagnostic **slot = m_map.get (key);
gcc_assert (*slot);
-   const saved_diagnostic *sd = *slot;
+   saved_diagnostic *sd = *slot;
dm->emit_saved_diagnostic (eg, *sd);
   }
   }
@@ -1370,7 +1406,7 @@ diagnostic_manager::emit_saved_diagnostics (const 
exploded_graph )
 
 void
 diagnostic_manager::emit_saved_diagnostic (const exploded_graph ,
-  const saved_diagnostic )
+  saved_diagnostic )
 {
   LOG_SCOPE (get_logger ());
   log ("sd[%i]: %qs at SN: %i",
@@ -1395,6 +1431,11 @@ diagnostic_manager::emit_saved_diagnostic (const 
exploded_graph ,
   /* Now prune it to just cover the most pertinent events.  */
   prune_path (_path, sd.m_sm, sd.m_sval, sd.m_state);
 
+  /* Add any saved events to the path, giving contextual information
+ about what the analyzer was simulating as the diagnostic was
+ generated.  These don't get pruned, as they are probably pertinent.  */
+  sd.add_any_saved_events (emission_path);
+
   /* Add a final event to the path, covering the diagnostic itself.
  We use the final enode from the epath, which might be different from
  the sd.m_enode, as the dedupe code doesn't care about enodes, just
diff --git a/gcc/analyzer/diagnostic-manager.h 
b/gcc/analyzer/diagnostic-manager.h
index d3022b888dd5..413ab0c90b14 100644
--- a/gcc/analyzer/diagnostic-manager.h
+++ b/gcc/analyzer/diagnostic-manager.h
@@ -42,6 +42,7 @@ public:
   bool operator== (const saved_diagnostic ) const;
 
   void add_note (std::unique_ptr pn);
+  void add_event (std::unique_ptr event);

[pushed 5/6] analyzer: add kf_fopen

2023-08-21 Thread David Malcolm via Gcc-patches
Add checking to -fanalyzer that both params of calls to "fopen" are
valid null-terminated strings.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3375-g4325c82736d9e8.

gcc/analyzer/ChangeLog:
* kf.cc (class kf_fopen): New.
(register_known_functions): Register it.

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/fopen-1.c: New test.
---
 gcc/analyzer/kf.cc  | 28 +++
 gcc/testsuite/gcc.dg/analyzer/fopen-1.c | 66 +
 2 files changed, 94 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/fopen-1.c

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 6b2db8613768..1601cf15c685 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -420,6 +420,33 @@ kf_error::impl_call_pre (const call_details ) const
   model->check_for_null_terminated_string_arg (cd, fmt_arg_idx);
 }
 
+/* Handler for fopen.
+ FILE *fopen (const char *filename, const char *mode);
+   See e.g. https://en.cppreference.com/w/c/io/fopen
+   https://www.man7.org/linux/man-pages/man3/fopen.3.html
+   
https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/fopen-wfopen?view=msvc-170
  */
+
+class kf_fopen : public known_function
+{
+public:
+  bool matches_call_types_p (const call_details ) const final override
+  {
+return (cd.num_args () == 2
+   && cd.arg_is_pointer_p (0)
+   && cd.arg_is_pointer_p (1));
+  }
+
+  void impl_call_pre (const call_details ) const final override
+  {
+cd.check_for_null_terminated_string_arg (0);
+cd.check_for_null_terminated_string_arg (1);
+cd.set_any_lhs_with_defaults ();
+
+/* fopen's mode param is effectively a mini-DSL, but there are various
+   non-standard extensions, so we don't bother to check it.  */
+  }
+};
+
 /* Handler for "free", after sm-handling.
 
If the ptr points to an underlying heap region, delete the region,
@@ -1422,6 +1449,7 @@ register_known_functions (known_function_manager )
 
   /* Known POSIX functions, and some non-standard extensions.  */
   {
+kfm.add ("fopen", make_unique ());
 kfm.add ("putenv", make_unique ());
 
 register_known_fd_functions (kfm);
diff --git a/gcc/testsuite/gcc.dg/analyzer/fopen-1.c 
b/gcc/testsuite/gcc.dg/analyzer/fopen-1.c
new file mode 100644
index ..e5b00e93b6da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/fopen-1.c
@@ -0,0 +1,66 @@
+typedef struct FILE FILE;
+FILE *fopen (const char *pathname, const char *mode);
+#define NULL ((void *)0)
+
+FILE *
+test_passthrough (const char *pathname, const char *mode)
+{
+  return fopen (pathname, mode);
+}
+
+FILE *
+test_null_pathname (const char *pathname, const char *mode)
+{
+  return fopen (NULL, mode);
+}
+
+FILE *
+test_null_mode (const char *pathname)
+{
+  return fopen (pathname, NULL);
+}
+
+FILE *
+test_simple_r (void)
+{
+  return fopen ("foo.txt", "r");
+}
+
+FILE *
+test_swapped_args (void)
+{
+  return fopen ("r", "foo.txt"); /* TODO: would be nice to detect this.  */
+}
+
+FILE *
+test_unterminated_pathname (const char *mode)
+{
+  char buf[3] = "abc";
+  return fopen (buf, mode); /* { dg-warning "stack-based buffer over-read" } */
+  /* { dg-message "while looking for null terminator for argument 1 
\\(''\\) of 'fopen'..." "event" { target *-*-* } .-1 } */
+}
+
+FILE *
+test_unterminated_mode (const char *filename)
+{
+  char buf[3] = "abc";
+  return fopen (filename, buf);  /* { dg-warning "stack-based buffer 
over-read" } */
+  /* { dg-message "while looking for null terminator for argument 2 
\\(''\\) of 'fopen'..." "event" { target *-*-* } .-1 } */
+}
+
+FILE *
+test_uninitialized_pathname (const char *mode)
+{
+  char buf[10];
+  return fopen (buf, mode); /* { dg-warning "use of uninitialized value 
'buf\\\[0\\\]'" } */  
+  /* { dg-message "while looking for null terminator for argument 1 
\\(''\\) of 'fopen'..." "event" { target *-*-* } .-1 } */
+}
+
+FILE *
+test_uninitialized_mode (const char *filename)
+{
+  char buf[10];
+  return fopen (filename, buf); /* { dg-warning "use of uninitialized value 
'buf\\\[0\\\]'" } */  
+  /* { dg-message "while looking for null terminator for argument 2 
\\(''\\) of 'fopen'..." "event" { target *-*-* } .-1 } */
+}
+
-- 
2.26.3



[pushed 3/6] analyzer: handle NULL inner context in region_model_context_decorator

2023-08-21 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3373-g1e7b0a5d7a45dc.

gcc/analyzer/ChangeLog:
* region-model.cc (region_model_context_decorator::add_event):
Handle m_inner being NULL.
* region-model.h (class region_model_context_decorator): Likewise.
(annotating_context::warn): Likewise.
---
 gcc/analyzer/region-model.cc |  3 +-
 gcc/analyzer/region-model.h  | 86 
 2 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index fa30193943d2..ed93fb89f933 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -5875,7 +5875,8 @@ noop_region_model_context::terminate_path ()
 void
 region_model_context_decorator::add_event (std::unique_ptr 
event)
 {
-  m_inner->add_event (std::move (event));
+  if (m_inner)
+m_inner->add_event (std::move (event));
 }
 
 /* struct model_merger.  */
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index cdfce0727cf7..a01399c8e85a 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -813,93 +813,118 @@ class region_model_context_decorator : public 
region_model_context
  public:
   bool warn (std::unique_ptr d) override
   {
-return m_inner->warn (std::move (d));
+if (m_inner)
+  return m_inner->warn (std::move (d));
+else
+  return false;
   }
 
   void add_note (std::unique_ptr pn) override
   {
-m_inner->add_note (std::move (pn));
+if (m_inner)
+  m_inner->add_note (std::move (pn));
   }
   void add_event (std::unique_ptr event) override;
 
   void on_svalue_leak (const svalue *sval) override
   {
-m_inner->on_svalue_leak (sval);
+if (m_inner)
+  m_inner->on_svalue_leak (sval);
   }
 
   void on_liveness_change (const svalue_set _svalues,
   const region_model *model) override
   {
-m_inner->on_liveness_change (live_svalues, model);
+if (m_inner)
+  m_inner->on_liveness_change (live_svalues, model);
   }
 
   logger *get_logger () override
   {
-return m_inner->get_logger ();
+if (m_inner)
+  return m_inner->get_logger ();
+else
+  return nullptr;
   }
 
   void on_condition (const svalue *lhs,
 enum tree_code op,
 const svalue *rhs) override
   {
-m_inner->on_condition (lhs, op, rhs);
+if (m_inner)
+  m_inner->on_condition (lhs, op, rhs);
   }
 
   void on_bounded_ranges (const svalue ,
  const bounded_ranges ) override
   {
-m_inner->on_bounded_ranges (sval, ranges);
+if (m_inner)
+  m_inner->on_bounded_ranges (sval, ranges);
   }
 
   void on_pop_frame (const frame_region *frame_reg) override
   {
-m_inner->on_pop_frame (frame_reg);
+if (m_inner)
+  m_inner->on_pop_frame (frame_reg);
   }
 
   void on_unknown_change (const svalue *sval, bool is_mutable) override
   {
-m_inner->on_unknown_change (sval, is_mutable);
+if (m_inner)
+  m_inner->on_unknown_change (sval, is_mutable);
   }
 
   void on_phi (const gphi *phi, tree rhs) override
   {
-m_inner->on_phi (phi, rhs);
+if (m_inner)
+  m_inner->on_phi (phi, rhs);
   }
 
   void on_unexpected_tree_code (tree t,
const dump_location_t ) override
   {
-m_inner->on_unexpected_tree_code (t, loc);
+if (m_inner)
+  m_inner->on_unexpected_tree_code (t, loc);
   }
 
   void on_escaped_function (tree fndecl) override
   {
-m_inner->on_escaped_function (fndecl);
+if (m_inner)
+  m_inner->on_escaped_function (fndecl);
   }
 
   uncertainty_t *get_uncertainty () override
   {
-return m_inner->get_uncertainty ();
+if (m_inner)
+  return m_inner->get_uncertainty ();
+else
+  return nullptr;
   }
 
   void purge_state_involving (const svalue *sval) override
   {
-m_inner->purge_state_involving (sval);
+if (m_inner)
+  m_inner->purge_state_involving (sval);
   }
 
   void bifurcate (std::unique_ptr info) override
   {
-m_inner->bifurcate (std::move (info));
+if (m_inner)
+  m_inner->bifurcate (std::move (info));
   }
 
   void terminate_path () override
   {
-m_inner->terminate_path ();
+if (m_inner)
+  m_inner->terminate_path ();
   }
 
   const extrinsic_state *get_ext_state () const override
   {
-return m_inner->get_ext_state ();
+if (m_inner)
+  return m_inner->get_ext_state ();
+else
+  return nullptr;
   }
 
   bool get_state_map_by_name (const char *name,
@@ -909,20 +934,25 @@ class region_model_context_decorator : public 
region_model_context
  std::unique_ptr *out_sm_context)
 override
   {
-return m_inner->get_state_map_by_name (name, out_smap, out_sm, out_sm_idx,
-  out_sm_context);
+if (m_inner)
+  return m_inner->get_state_map_by_name (name, 

[pushed 1/6] analyzer: convert note_adding_context to annotating_context

2023-08-21 Thread David Malcolm via Gcc-patches
This is enabling work towards the context being able to inject
events into diagnostic paths, rather than just notes after the
warning.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3371-ge40a935db29cfd.


gcc/analyzer/ChangeLog:
* region-model.cc
(class check_external_function_for_access_attr::annotating_ctxt):
Convert to an annotating_context.
* region-model.h (class note_adding_context): Rename to...
(class annotating_context): ...this, updating the "warn" method.
(note_adding_context::make_note): Replace with...
(annotating_context::add_annotations): ...this.
---
 gcc/analyzer/region-model.cc | 12 ++--
 gcc/analyzer/region-model.h  | 14 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 494a9cdf149e..5c165ff127f8 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -1641,23 +1641,23 @@ check_external_function_for_access_attr (const gcall 
*call,
   if (access->mode == access_write_only
  || access->mode == access_read_write)
{
- /* Subclass of decorated_region_model_context that
+ /* Subclass of annotating_context that
 adds a note about the attr access to any saved diagnostics.  */
- class annotating_ctxt : public note_adding_context
+ class annotating_ctxt : public annotating_context
  {
  public:
annotating_ctxt (tree callee_fndecl,
 const attr_access ,
 region_model_context *ctxt)
-   : note_adding_context (ctxt),
+   : annotating_context (ctxt),
  m_callee_fndecl (callee_fndecl),
  m_access (access)
{
}
-   std::unique_ptr make_note () final override
+   void add_annotations () final override
{
- return make_unique
-   (m_callee_fndecl, m_access);
+ add_note (make_unique
+   (m_callee_fndecl, m_access));
}
  private:
tree m_callee_fndecl;
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 4f09f2e585ac..88772655bc5b 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -922,28 +922,28 @@ protected:
   region_model_context *m_inner;
 };
 
-/* Subclass of region_model_context_decorator that adds a note
-   when saving diagnostics.  */
+/* Subclass of region_model_context_decorator with a hook for adding
+   notes/events when saving diagnostics.  */
 
-class note_adding_context : public region_model_context_decorator
+class annotating_context : public region_model_context_decorator
 {
 public:
   bool warn (std::unique_ptr d) override
   {
 if (m_inner->warn (std::move (d)))
   {
-   add_note (make_note ());
+   add_annotations ();
return true;
   }
 else
   return false;
   }
 
-  /* Hook to make the new note.  */
-  virtual std::unique_ptr make_note () = 0;
+  /* Hook to add new event(s)/note(s)  */
+  virtual void add_annotations () = 0;
 
 protected:
-  note_adding_context (region_model_context *inner)
+  annotating_context (region_model_context *inner)
   : region_model_context_decorator (inner)
   {
   }
-- 
2.26.3



Re: [PATCH] testsuite: Improve test in dg-require-python-h

2023-08-18 Thread David Malcolm via Gcc-patches
On Thu, 2023-08-17 at 23:30 -0300, Thiago Jung Bauermann wrote:
> If GCC is tested with a sysroot which doesn't contain a Python
> installation (e.g., with a command such as
> "make check-gcc-c FLAGS_UNDER_TEST="--sysroot=/some/path"), but
> there's
> a python3-config in $PATH, then the testsuite will pick up the host's
> Python.h which can't actually be used:
> 
> Executing on host: python3-config --includes    (timeout = 300)
> spawn -ignore SIGHUP python3-config --includes
> -I/usr/include/python3.10 -I/usr/include/python3.10
> Executing on host: /some/sysroot/bin/aarch64-unknown-linux-gnu-gcc --
> sysroot=/some/sysroot/libc -Wl,-dynamic-
> linker=/some/sysroot/libc/lib/ld-linux-aarch64.so.1 -Wl,-
> rpath=/some/sysroot/libc/lib 
> /some/src/gcc.git/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-
> 2.c    -fdiagnostics-plain-output  -
> fplugin=./analyzer_cpython_plugin.so -fanalyzer -
> I/usr/include/python3.10 -I/usr/include/python3.10 -S -o cpython-
> plugin-test-2.s    (timeout = 600)
> spawn -ignore SIGHUP /some/sysroot/bin/aarch64-unknown-linux-gnu-gcc
> --sysroot=/some/sysroot/libc -Wl,-dynamic-
> linker=/some/sysroot/libc/lib/ld-linux-aarch64.so.1 -Wl,-
> rpath=/some/sysroot/libc/lib
> /some/src/gcc.git/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-2.c
> -fdiagnostics-plain-output -fplugin=./analyzer_cpython_plugin.so -
> fanalyzer -I/usr/include/python3.10 -I/usr/include/python3.10 -S -o
> cpython-plugin-test-2.s
> In file included from /usr/include/python3.10/Python.h:8,
>  from
> /some/src/gcc.git/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-
> 2.c:8:
> /usr/include/python3.10/pyconfig.h:9:12: fatal error: aarch64-linux-
> gnu/python3.10/pyconfig.h: No such file or directory
> compilation terminated.
> compiler exited with status 1
> 
> This problem causes these testsuite failures:
> 
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 17)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 18)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 21)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 31)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 32)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 35)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 45)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 55)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 63)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 66)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 68)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so  (test for warnings, line 69)
> FAIL: gcc.dg/plugin/cpython-plugin-test-2.c -
> fplugin=./analyzer_cpython_plugin.so (test for excess errors)
> Excess errors:
> /usr/include/python3.10/pyconfig.h:9:12: fatal error: aarch64-linux-
> gnu/python3.10/pyconfig.h: No such file or directory
> compilation terminated.
> 
> So try to compile a test file so that the testcase can be marked as
> unsupported instead.
> 
> gcc/testsuite/ChangeLog:
> * gcc/testsuite/lib/target-supports.exp (dg-require-python-
> h): Test
>     whether Python.h can really be used.
> ---
>  gcc/testsuite/lib/target-supports.exp | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index 92b6f69730e9..5b5f86551844 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -12570,11 +12570,21 @@ proc dg-require-python-h { args } {
>  
>  verbose "ENTER dg-require-python-h" 2
>  
> +    set supported 0
>  set result [remote_exec host "python3-config --includes"]
>  set status [lindex $result 0]
>  if { $status == 0 } {
> -    set python_flags [lindex $result 1]
> -    } else {
> +   # Remove trailing newline from python3-config output.
> +   set python_flags [string trim [lindex $result 1]]
> +   if [check_no_compiler_messages python_h assembly {
> +   #include 
> +   int main (void) { return 0; }
> +   } $python_flags] {
> +   set supported 1
> +   }
> +    }
> +
> +    if { $supported == 0 } {
> verbose "Python.h not supported" 2
> upvar dg-do-what dg-do-what
> set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"]
> 
> 

Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters

2023-08-17 Thread David Malcolm via Gcc-patches
On Thu, 2023-08-17 at 17:41 +0200, Guillaume Gomez wrote:
> And now I just discovered that a lot of commits from Antoni's fork
> haven't been sent upstream which is why the ABI count is so high in
> his repository. Fixed that as well.

Thanks for the updated patch; I was about to comment on that.

This version is good for gcc trunk.

Dave

> 
> Le jeu. 17 août 2023 à 17:26, Guillaume Gomez
>  a écrit :
> > 
> > Antoni spot a typo I made:
> > 
> > I added `LIBGCCJIT_HAVE_gcc_jit_type_get_size` instead of
> > `LIBGCCJIT_HAVE_gcc_jit_type_get_restrict`. Fixed in this patch,
> > sorry
> > for the noise.
> > 
> > Le jeu. 17 août 2023 à 11:30, Guillaume Gomez
> >  a écrit :
> > > 
> > > Hi Dave,
> > > 
> > > > What kind of testing has the patch had? (e.g. did you run "make
> > > > check-
> > > > jit" ?  Has this been in use on real Rust code?)
> > > 
> > > I tested it as Rust backend directly on this code:
> > > 
> > > ```
> > > pub fn foo(a:  i32, b:  i32, c: ) {
> > >     *a += *c;
> > >     *b += *c;
> > > }
> > > ```
> > > 
> > > I ran it with `rustc` (and the GCC backend) with the following
> > > flags:
> > > `-C link-args=-lc --emit=asm -O --crate-type=lib` which gave the
> > > diff
> > > you can see in the attached file. Explanations: the diff on the
> > > right
> > > has the `__restrict__` attribute used whereas on the left it is
> > > the
> > > current version where we don't handle it.
> > > 
> > > As for C testing, I used this code:
> > > 
> > > ```
> > > void t(int *__restrict__ a, int *__restrict__ b, char
> > > *__restrict__ c) {
> > >     *a += *c;
> > >     *b += *c;
> > > }
> > > ```
> > > 
> > > (without the `__restrict__` of course when I need to have a
> > > witness
> > > ASM). I attached the diff as well, this time the file with the
> > > use of
> > > `__restrict__` in on the left. I compiled with the following
> > > flags:
> > > `-S -O3`.
> > > 
> > > > Please add a feature macro:
> > > > #define LIBGCCJIT_HAVE_gcc_jit_type_get_restrict
> > > > (see the similar ones in the header).
> > > 
> > > I added `LIBGCCJIT_HAVE_gcc_jit_type_get_size` and extended the
> > > documentation as well to mention the ABI change.
> > > 
> > > > Please add a new ABI tag (LIBGCCJIT_ABI_25 ?), rather than
> > > > adding this
> > > > to ABI_0.
> > > 
> > > I added `LIBGCCJIT_ABI_34` as `LIBGCCJIT_ABI_33` was the last
> > > one.
> > > 
> > > > This refers to a "cold attribute"; is this a vestige of a copy-
> > > > and-
> > > > paste from a different test case?
> > > 
> > > It is a vestige indeed... Missed this one.
> > > 
> > > > I see that the test scans the generated assembler.  Does the
> > > > test
> > > > actually verify that restrict has an effect, or was that
> > > > another
> > > > vestige from a different test case?
> > > 
> > > No, this time it's what I wanted. Please see the C diff I
> > > provided
> > > above to see that the ASM has a small diff that allowed me to
> > > confirm
> > > that the `__restrict__` attribute was correctly set.
> > > 
> > > > If this test is meant to run at -O3 and thus can't be part of
> > > > test-
> > > > combination.c, please add a comment about it to
> > > > gcc/testsuite/jit.dg/all-non-failing-tests.h (in the
> > > > alphabetical
> > > > place).
> > > 
> > > Below `-O3`, this ASM difference doesn't appear unfortunately.
> > > 
> > > > The patch also needs to add documentation for the new
> > > > entrypoint (in
> > > > topics/types.rst), and for the new ABI tag (in
> > > > topics/compatibility.rst).
> > > 
> > > Added!
> > > 
> > > > Thanks again for the patch; hope the above is constructive
> > > 
> > > It was incredibly useful! Thanks for taking time to writing down
> > > the
> > > explanations.
> > > 
> > > The new patch is attached to this email.
> > > 
> > > Cordially.
> > > 
> > > Le jeu. 17 août 2023 à 01:06, David Malcolm 
> > > a écrit :
> > > > 
> > > > On Wed, 2023-08-16 at 22:06 +0200, Guillaume Gomez via Jit
> > > > wrote:
> > > > > My apologies, forgot to run the commit checkers. Here's the
> > > > > commit
> > > > > with the errors fixed.
> > > > > 
> > > > > Le mer. 16 août 2023 à 18:32, Guillaume Gomez
> > > > >  a écrit :
> > > > > > 
> > > > > > Hi,
> > > > 
> > > > Hi Guillaume, thanks for the patch.
> > > > 
> > > > > > 
> > > > > > This patch adds the possibility to specify the __restrict__
> > > > > > attribute
> > > > > > for function parameters. It is used by the Rust GCC
> > > > > > backend.
> > > > 
> > > > What kind of testing has the patch had? (e.g. did you run "make
> > > > check-
> > > > jit" ?  Has this been in use on real Rust code?)
> > > > 
> > > > Overall, this patch looks close to being ready, but some nits
> > > > below...
> > > > 
> > > > [...]
> > > > 
> > > > > diff --git a/gcc/jit/libgccjit.h b/gcc/jit/libgccjit.h
> > > > > index 60eaf39bff6..2e0d08a06d8 100644
> > > > > --- a/gcc/jit/libgccjit.h
> > > > > +++ b/gcc/jit/libgccjit.h
> > > > > @@ -635,6 +635,10 @@ gcc_jit_type_get_const (gcc_jit_type
> > > > > *type);
> > > > >  

Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters

2023-08-16 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-16 at 22:06 +0200, Guillaume Gomez via Jit wrote:
> My apologies, forgot to run the commit checkers. Here's the commit
> with the errors fixed.
> 
> Le mer. 16 août 2023 à 18:32, Guillaume Gomez
>  a écrit :
> > 
> > Hi,

Hi Guillaume, thanks for the patch.

> > 
> > This patch adds the possibility to specify the __restrict__
> > attribute
> > for function parameters. It is used by the Rust GCC backend.

What kind of testing has the patch had? (e.g. did you run "make check-
jit" ?  Has this been in use on real Rust code?)

Overall, this patch looks close to being ready, but some nits below...

[...]

> diff --git a/gcc/jit/libgccjit.h b/gcc/jit/libgccjit.h
> index 60eaf39bff6..2e0d08a06d8 100644
> --- a/gcc/jit/libgccjit.h
> +++ b/gcc/jit/libgccjit.h
> @@ -635,6 +635,10 @@ gcc_jit_type_get_const (gcc_jit_type *type);
>  extern gcc_jit_type *
>  gcc_jit_type_get_volatile (gcc_jit_type *type);
>  
> +/* Given type "T", get type "restrict T".  */
> +extern gcc_jit_type *
> +gcc_jit_type_get_restrict (gcc_jit_type *type);
> +
>  #define LIBGCCJIT_HAVE_SIZED_INTEGERS
>  
>  /* Given types LTYPE and RTYPE, return non-zero if they are
compatible.

Please add a feature macro:
#define LIBGCCJIT_HAVE_gcc_jit_type_get_restrict
(see the similar ones in the header).

> diff --git a/gcc/jit/libgccjit.map b/gcc/jit/libgccjit.map
> index e52de0057a5..b7289b13845 100644
> --- a/gcc/jit/libgccjit.map
> +++ b/gcc/jit/libgccjit.map
> @@ -104,6 +104,7 @@ LIBGCCJIT_ABI_0
>  gcc_jit_type_as_object;
>  gcc_jit_type_get_const;
>  gcc_jit_type_get_pointer;
> +gcc_jit_type_get_restrict;
>  gcc_jit_type_get_volatile;

Please add a new ABI tag (LIBGCCJIT_ABI_25 ?), rather than adding this
to ABI_0.

> diff --git a/gcc/testsuite/jit.dg/test-restrict.c
b/gcc/testsuite/jit.dg/test-restrict.c
> new file mode 100644
> index 000..4c8c4407f91
> --- /dev/null
> +++ b/gcc/testsuite/jit.dg/test-restrict.c
> @@ -0,0 +1,77 @@
> +/* { dg-do compile { target x86_64-*-* } } */
> +
> +#include 
> +#include 
> +
> +#include "libgccjit.h"
> +
> +/* We don't want set_options() in harness.h to set -O3 to see that
the cold
> +  attribute affects the optimizations. */

This refers to a "cold attribute"; is this a vestige of a copy-and-
paste from a different test case?

I see that the test scans the generated assembler.  Does the test
actually verify that restrict has an effect, or was that another
vestige from a different test case?

> +#define TEST_ESCHEWS_SET_OPTIONS
> +static void set_options (gcc_jit_context *ctxt, const char *argv0)
> +{
> + // Set "-O3".
> + gcc_jit_context_set_int_option(ctxt,
GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL, 3);
> +}
> +
> +#define TEST_COMPILING_TO_FILE
> +#define OUTPUT_KIND  GCC_JIT_OUTPUT_KIND_ASSEMBLER
> +#define OUTPUT_FILENAME  "output-of-test-restrict.c.s"
> +#include "harness.h"
> +
> +void
> +create_code (gcc_jit_context *ctxt, void *user_data)
> +{
> + /* Let's try to inject the equivalent of:
> +void t(int *__restrict__ a, int *__restrict__ b, char *__restrict__
c) {
> + *a += *c;
> + *b += *c;
> +}
> + */
> + gcc_jit_type *int_type =
> + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_INT);
> + gcc_jit_type *pint_type = gcc_jit_type_get_pointer(int_type);
> + gcc_jit_type *pint_restrict_type =
gcc_jit_type_get_restrict(pint_type);
> +
> + gcc_jit_type *void_type =
> + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_VOID);
> +
> + gcc_jit_param *a =
> + gcc_jit_context_new_param (ctxt, NULL,
pint_restrict_type, "a");
> + gcc_jit_param *b =
> + gcc_jit_context_new_param (ctxt, NULL,
pint_restrict_type, "b");
> + gcc_jit_param *c =
> + gcc_jit_context_new_param (ctxt, NULL,
pint_restrict_type, "c");
> + gcc_jit_param *params[3] = {a, b, c};
> +
> + gcc_jit_function *func_t =
> + gcc_jit_context_new_function (ctxt, NULL,
> + GCC_JIT_FUNCTION_EXPORTED,
> + void_type,
> + "t",
> + 3, params,
> + 0);
> +
> + gcc_jit_block *block = gcc_jit_function_new_block (func_t,
NULL);
> +
> + /* *a += *c; */
> + gcc_jit_block_add_assignment_op (
> + block, NULL,
> + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue
(a), NULL),
> + GCC_JIT_BINARY_OP_PLUS,
> + gcc_jit_lvalue_as_rvalue (
> + gcc_jit_rvalue_dereference
(gcc_jit_param_as_rvalue (c), NULL)));
> + /* *b += *c; */
> + gcc_jit_block_add_assignment_op (
> + block, NULL,
> + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue
(b), NULL),
> + GCC_JIT_BINARY_OP_PLUS,
> + gcc_jit_lvalue_as_rvalue (
> + gcc_jit_rvalue_dereference
(gcc_jit_param_as_rvalue (c), NULL)));
> +

Re: [WIP RFC v2] analyzer: Add support of placement new and improved operator new [PR105948]

2023-08-16 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-16 at 14:19 +0200, priour...@gmail.com wrote:
> From: benjamin priour 
> 
> Hi,
> (s/we/the analyzer/)

Hi Benjamin, thanks for the updated patch.

> 
> I've been continuing my patch of supporting operator new variants
> in the analyzer, and have added a few more test cases.
> 
> 
>     > > If "y" is null then the allocation failed and dereferencing
> "y" will
>     > > cause
>     > > a segfault, not a "use-of-uninitialized-value".
>     > > Thus we should stick to 'dereference of NULL 'y'" only.
>     > > If "y" is non-null then the allocation succeeded and "*y" is
>     > > initialized
>     > > since we are calling a default initialization with the empty
>     > > parenthesis.
>     > 
>     > I *think* it's possible to have the region_model have y
> pointing to a
>     > heap_allocated_region of sizeof(int) size that's been
> initialized, but
>     > still have the malloc state machine part of the program_state
> say that
>     > the pointer is maybe-null.
> 
> By maybe-null are you implying a new sm-malloc state ?

Sorry, I was too vague here.

I was referring to the "unchecked" state in sm-malloc.cc, which
represents a pointer that's been returned from an allocator function,
where the pointer hasn't yet been checked for being null/non-null.

> I am not sure to follow on that front. 
> 
> 
>     >
>     > > This led me to consider having "null-dereference" supersedes
>     > > "use-of-uninitialized-value", but
>     > > new PR 110830 made me reexamine it.
>     > >
>     > > I believe fixing PR 110830 is thus required before submitting
> this
>     > > patch,
>     > > or we would have some extra irrelevant warnings.
>     >
>     > How bad would the problem be?  PR 110830 looks a little
> involved, so is
>     > there a way to get the current patch in without dragging that
> extra
>     > complexity in?
> 
> Having "null-dereference" supersedes "use-of-uninitialized-value"
> would
> cause false negative upon conditional return statement (similarly as
> demonstrated
> in PR 110830).
> 
> Since PR 110830 is off for the moment, I have tried solving this
> differently.
> I have considered using known NULL constraints on
> heap_allocated_region
> as "initialized_value".
> 
> You can see below in the diff of region_model::get_store_value
> two versions of this approach. The version commented out proved to
> solve
> the issue of the spurious "use-of-unitialized-value" tagging along
> calls to
> "new(std::nothrow) ()". However, this version also shortcircuits the
> diagnostics of the "null-dereference" warning.
> 
> Given
>     /* { dg-additional-options "-O0 -fno-exceptions -fno-analyzer-
> suppress-followups" } */
>     #include 
> 
>     struct A
>     {
>   int x;
>   int y;
>     };
> 
>     void test_nonthrowing ()
>     {
>   A* y = new(std::nothrow) A();
>   int z = y->x + 2; /* { dg-warning "dereference of NULL 'y'" }
> */
>   /* { dg-bogus "use of uninitialized value '\\*y'" "" { xfail *-
> *-* } .-1 } */
> 
>   delete y;
>     }
> 
> The analyzer sees gimple
> 
>    :
>   _7 = operator new (8, );
>   if (_7 != 0B)
>     goto ; [INV]
>   else
>     goto ; [INV]

I would have thought that at each branch of this conditional that 
region_model::add_constraint would be called, and within that we'd
reach this code:

4339  /* Notify the context, if any.  This exists so that the state machines
4340 in a program_state can be notified about the condition, and so can
4341 set sm-state for e.g. unchecked->checked, both for cfg-edges, and
4342 when synthesizing constraints as above.  */
4343  if (ctxt)
4344ctxt->on_condition (lhs, op, rhs);

This ought to call impl_region_model_context::on_condition in
engine.cc, which ought to call malloc_state_machine::on_condition in
sm-malloc.cc, and this ought to transition the sm-state of _7.

Is something going wrong somewhere in the things I mentioned above?

> 
>    :
>   MEM[(struct A *)_7].x = 0;
>   MEM[(struct A *)_7].y = 0;
>   iftmp.0_11 = _7;
>   goto ; [INV]
> 
>    :
>   iftmp.0_8 = _7;
> 
>    :
>   # iftmp.0_2 = PHI 
>   y_12 = iftmp.0_2;
>   _1 = y_12->x;

...and at this point we have a deref from y_12, which on the path from
bb 5 ought to be an svalue that has the "null" state in the sm-state
machine, and thus malloc_state_machine::on_stmt ought to complain at
   _1 = y_12->x;
here:

2094  else if (state == m_null)
2095{
2096  tree diag_arg = sm_ctxt->get_diagnostic_tree
2097  sm_ctxt->warn (node, stmt, arg,
2098 make_unique (*this, diag_arg));
2099  sm_ctxt->set_next_state (stmt, arg, m_stop);
2100}

That's what ought to be happening, and ought to give you the correct warning.



>   z_13 = _1 + 2;
>   y.1_14 = y_12;
>   if (y.1_14 != 0B)
>     goto ; [INV]
>   else
>     goto ; [INV]
> 
>    :
>   *y.1_14 ={v} {CLOBBER};
>   operator delete (y.1_14, 

Re: [RFC] GCC Security policy

2023-08-15 Thread David Malcolm via Gcc-patches
On Mon, 2023-08-14 at 09:26 -0400, Siddhesh Poyarekar wrote:
> Hi,
> 
> Here's the updated draft of the top part of the security policy with all 
> of the recommendations incorporated.
> 
> Thanks,
> Sid
> 
> 
> What is a GCC security bug?
> ===
> 
>  A security bug is one that threatens the security of a system or
>  network, or might compromise the security of data stored on it.
>  In the context of GCC there are multiple ways in which this might
>  happen and they're detailed below.
> 
> Compiler drivers, programs, libgccjit and support libraries
> ---
> 
>  The compiler driver processes source code, invokes other programs
>  such as the assembler and linker and generates the output result,
>  which may be assembly code or machine code.  It is necessary that
>  all source code inputs to the compiler are trusted, since it is
>  impossible for the driver to validate input source code beyond
>  conformance to a programming language standard.
> 
>  The GCC JIT implementation, libgccjit, is intended to be plugged
>  into applications to translate input source code in the application
>  context.  Limitations that apply to the compiler
>  driver, apply here too in terms of sanitizing inputs, so it is
>  recommended that inputs are either sanitized by an external program
>  to allow only trusted, safe execution in the context of the
>  application or the JIT execution context is appropriately sandboxed
>  to contain the effects of any bugs in the JIT or its generated code
>  to the sandboxed environment.

I'd prefer to reword this, as libgccjit was a poor choice of name for
the library (sorry!), to make it clearer it can be used for both ahead-
of-time and just-in-time compilation, and that as used for compilation,
the host considerations apply, not just those of the generated target
code.

How about:

 The libgccjit library can, despite the name, be used both for
 ahead-of-time compilation and for just-in-compilation.  In both
 cases it can be used to translate input representations (such as
 source code) in the application context; in the latter case the
 generated code is also run in the application context.
 Limitations that apply to the compiler driver, apply here too in
 terms of sanitizing inputs, so it is recommended that inputs are
 either sanitized by an external program to allow only trusted,
 safe compilation and execution in the context of the application,
 or that both the compilation *and* execution context of the code
 are appropriately sandboxed to contain the effects of any bugs in
 libgccjit, the application code using it, or its generated code to
 the sandboxed environment.

...or similar.

[...snip...]

Thanks
Dave



Re: [PATCH v4 4/8] diagnostics: Support obtaining source code lines from generated data buffers

2023-08-15 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-15 at 14:15 -0400, Lewis Hyatt wrote:
> On Tue, Aug 15, 2023 at 12:15:15PM -0400, David Malcolm wrote:
> > On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> > > This patch enhances location_get_source_line(), which is the
> > > primary
> > > interface provided by the diagnostics infrastructure to obtain
> > > the line of
> > > source code corresponding to a given location, so that it
> > > understands
> > > generated data locations in addition to normal file-based
> > > locations. This
> > > involves changing the argument to location_get_source_line() from
> > > a plain
> > > file name, to a source_id object that can represent either type
> > > of location.
> > > 

[...]

> > > 
> > > 
> > > diff --git a/gcc/input.cc b/gcc/input.cc
> > > index 9377020b460..790279d4273 100644
> > > --- a/gcc/input.cc
> > > +++ b/gcc/input.cc
> > > @@ -207,6 +207,28 @@ private:
> > >    void maybe_grow ();
> > >  };
> > >  
> > > +/* This is the implementation of cache_data_source for generated
> > > +   data that is already in memory.  */
> > > +class data_cache_slot final : public cache_data_source
> > 
> > It occurred to me: why are we caching accessing a buffer that's
> > already
> > in memory - but we're also caching the line-splitting information,
> > and
> > providing the line-splitting algorithm with a consistent interface
> > to
> > the data, right?
> > 
> 
> Yeah, for the current _Pragma use case, multi-line buffers are not
> going to
> be common, but they can occur. I was mainly motivated by the
> consistent
> interface, and by the assumption that the overhead is not critical
> given a
> diagnostic is being issued.

(nods)

> 
> > [...snip...]
> > 
> > > @@ -397,6 +434,15 @@ diagnostics_file_cache_forcibly_evict_file
> > > (const char *file_path)
> > >    global_dc->m_file_cache->forcibly_evict_file (file_path);
> > >  }
> > >  
> > > +void
> > > +diagnostics_file_cache_forcibly_evict_data (const char *data,
> > > +   unsigned int
> > > data_len)
> > > +{
> > > +  if (!global_dc->m_file_cache)
> > > +    return;
> > > +  global_dc->m_file_cache->forcibly_evict_data (data, data_len);
> > 
> > Maybe we should rename diagnostic_context's m_file_cache to
> > m_source_cache?  (and class file_cache for that matter?)  But if
> > so,
> > that can/should be a followup/separate patch.
> > 
> 
> Yes, we should. Believe it or not, I was trying to minimize the size
> of the
> patch :) 

:)

Thanks for splitting it up, BTW.

[...]


> > 
> > > @@ -912,26 +1000,22 @@ cache_data_source::read_line_num (size_t
> > > line_num,
> > >     If the function fails, a NULL char_span is returned.  */
> > >  
> > >  char_span
> > > -location_get_source_line (const char *file_path, int line)
> > > +location_get_source_line (source_id src, int line)
> > >  {
> > > -  const char *buffer = NULL;
> > > -  ssize_t len;
> > > -
> > > -  if (line == 0)
> > > -    return char_span (NULL, 0);
> > > -
> > > -  if (file_path == NULL)
> > > -    return char_span (NULL, 0);
> > > +  const char_span fail (nullptr, 0);
> > > +  if (!src || line <= 0)
> > > +    return fail;
> > 
> > Looking at source_id's operator bool, are there effectively three
> > kinds
> > of source_id?
> > 
> > (a) file names
> > (b) generated buffer
> > (c) NULL == m_filename_or_buffer
> > 
> > What does (c) mean?  Is it a "something's gone wrong/error" state? 
> > Or
> > is this more a special-case of (a)? (in that the m_len for such a
> > case
> > would be zero)
> > 
> > Should source_id's 2-param ctor have an assert that the ptr is non-
> > NULL?
> > 
> > [...snip...]
> > 
> > The patch is OK for trunk as-is, but note the question about the
> > source_id ctor above.
> > 
> 
> Thanks. (c) has the same meaning as a NULL file name currently does,
> so a
> default-constructed source_id is not an in-memory buffer, but is
> rather a
> NULL filename. linemap_add() for instance, will interpret a NULL
> filename
> for an LC_LEAVE map, as a request to copy it from the natural values
> being
> returned to. I think the source_id constructor needs to accept a NULL
> filename to remain backwards compatible. With the current design of
> source_id, it is safe always to change a 'const char*' file name
> argument to
> a source_id argument instead; it will work just how it did before
> because it
> has an implicit constructor. But if the constructor would assert on a
> non-NULL pointer, that would necessitate changing all call sites that
> currently expect they can pass a NULL pointer there. (For example,
> there are
> several calls to _cpp_do_file_change() within libcpp that take
> advantage of
> being able to pass a NULL filename to linemap_add.)

Yes, it's OK for this ctor to accept NULL;
   source_id (const char *filename = nullptr)
and I see you added the default arg.

I was referring to this ctor:
   source_id (const char *buffer, unsigned buffer_len)
Is it ever OK for "buffer" to be NULL in this 2-param ctor, or can we
assert 

Re: [PATCH v4 3/8] diagnostics: Refactor class file_cache_slot

2023-08-15 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-15 at 13:58 -0400, Lewis Hyatt wrote:
> On Tue, Aug 15, 2023 at 11:43:05AM -0400, David Malcolm wrote:
> > On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> > > Class file_cache_slot in input.cc is used to query specific lines
> > > of source
> > > code from a file when needed by diagnostics infrastructure. This
> > > will be
> > > extended in a subsequent patch to support obtaining the source
> > > code from
> > > in-memory generated buffers rather than from a file. The present
> > > patch
> > > refactors class file_cache_slot, putting most of the logic into a
> > > new base
> > > class cache_data_source, in preparation for reusing that code in
> > > the next
> > > patch. There is no change in functionality yet.
> > > 

[...snip...]

> > 
> > I confess I had to reread both this and patch 4/8 to make sense of
> > this; this is probably one of those cases where it's harder to read
> > in
> > patch form than as source, but I think I now understand the new
> > implementation.
> 
> Yes, sorry about that. I hope at least splitting into two patches
> here made it
> a little easier.
> 
> > 
> > Did you try testing this with valgrind (e.g. "make selftest-
> > valgrind")?
> > 
> 
> Oh interesting, was not aware of this. I think it shows that new
> leaks were
> not introduced with the patch series.
> 

[...snip...]

> 
> 
> > I don't think we have any selftest coverage for "\r" in the line-
> > break
> > handling; that would be good to add.
> > 
> > This patch is OK for trunk once the rest of the kit is approved.
> 
> Thank you. To be clear, were you suggesting to add selftest coverage
> for \r
> endings now, or in a follow up?

The former, please, so that we can sure that the patch doesn't
introduce any buffer overreads etc.

Thanks
Dave



Re: [PATCH v4 8/8] diagnostics: Support generated data locations in SARIF output

2023-08-15 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> The diagnostics routines for SARIF output need to read the source code back
> in, so that they can generate "snippet" and "content" records, so they need to
> be able to cope with generated data locations.  Add support for that in
> diagnostic-format-sarif.cc.
> 
> gcc/ChangeLog:
> 
> * diagnostic-format-sarif.cc (class sarif_builder): Adapt interface
> to support generated data locations.
> (sarif_builder::maybe_make_physical_location_object): Change the
> m_filenames hash_set to support generated data.
> (sarif_builder::make_artifact_location_object): Use a source_id rather
> than a plain file name.
> (sarif_builder::maybe_make_region_object): Adapt to
> expanded_location interface changes.
> (sarif_builder::maybe_make_region_object_for_context): Likewise.
> (sarif_builder::make_artifact_object): Likewise.
> (sarif_builder::make_run_object): Handle generated data.
> (sarif_builder::maybe_make_artifact_content_object): Likewise.
> (get_source_lines): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> * c-c++-common/diagnostic-format-sarif-file-5.c: New test.

I'm not sure if generated data is allowed as part of a SARIF artefact,
or if there's a more standard-compliant way of representing this; SARIF
says an artefact is a "sequence of bytes addressable via a URI".

Can you post a simple example of the generated .sarif JSON please? 
e.g. from the new test, so that we can see it looks like.

You could run it through:

  python -m json.tool 

to format it for easier reading.


Thanks
Dave



Re: [PATCH v4 6/8] diagnostics: Full support for generated data locations

2023-08-15 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> Previous patches in this series have laid the groundwork for supporting
> source code locations in memory ("generated data") rather than ordinary
> files. This patch completes the support by adding awareness of such
> locations to all places that need to support them. The main changes are to
> diagnostic-show-locus.cc; the others are primarily small tweaks such as
> changing from the FILE to the SRC member when inspecting an
> expanded_location.
> 
> gcc/c-family/ChangeLog:
> 
> * c-format.cc (get_corrected_substring): Use the new overload of
> location_get_source_line() to support generated data.
> * c-indentation.cc (get_visual_column): Likewise.
> (get_first_nws_vis_column): Change argument from a plain file name
> to a source_id.
> (detect_intervening_unindent): Likewise.
> (should_warn_for_misleading_indentation): Pass
> detect_intervening_unindent() the SRC field rather than the FILE
> field from the expanded_location.
> 
> gcc/ChangeLog:
> 
> * gcc-rich-location.cc (blank_line_before_p): Use the new overload
> of location_get_source_line() to support generated data.
> * input.cc (get_source_text_between): Likewise.
> (get_substring_ranges_for_loc): Likewise.
> (get_source_file_content): Change the argument from a plain filename
> to a source_id.
> (location_missing_trailing_newline): Likewise.
> * input.h (get_source_file_content): Adjust prototype.
> (location_missing_trailing_newline): Likewise.
> * diagnostic-show-locus.cc (layout::calculate_x_offset_display): Use
> the new overload of location_get_source_line() to support generated
> data.
> (layout::print_line): Likewise.
> (class line_corrections): Change m_filename from a plain filename to
> a source_id.
> (source_line::source_line): Change argument from a plain filename to
> a source_id.
> (line_corrections::add_hint): Adapt to source_line change.
> (layout::print_trailing_fixits): Adapt to line_corrections change.
> (test_layout_x_offset_display_utf8): Test generated data too.
> (test_layout_x_offset_display_tab): Likewise.
> (test_diagnostic_show_locus_one_liner): Likewise.
> (test_diagnostic_show_locus_one_liner_utf8): Likewise.
> (test_add_location_if_nearby): Likewise.
> (test_diagnostic_show_locus_fixit_lines): Likewise.
> (test_fixit_consolidation): Likewise.
> (test_overlapped_fixit_printing): Likewise.
> (test_overlapped_fixit_printing_utf8): Likewise.
> (test_overlapped_fixit_printing_2): Likewise.
> (test_fixit_insert_containing_newline): Likewise.
> (test_fixit_insert_containing_newline_2): Likewise.
> (test_fixit_replace_containing_newline): Likewise.
> (test_fixit_deletion_affecting_newline): Likewise.
> (test_tab_expansion): Likewise.
> (test_escaping_bytes_1): Likewise.
> (test_escaping_bytes_2): Likewise.
> (test_line_numbers_multiline_range): Likewise.
> (diagnostic_show_locus_cc_tests): Likewise.
> ---
>  gcc/c-family/c-format.cc  |   2 +-
>  gcc/c-family/c-indentation.cc |   8 +-
>  gcc/diagnostic-show-locus.cc  | 227 ++
>  gcc/gcc-rich-location.cc  |   2 +-
>  gcc/input.cc  |  21 ++--
>  gcc/input.h   |   6 +-
>  6 files changed, 136 insertions(+), 130 deletions(-)
> 

Looks OK for trunk as-is (assuming prerequisites, of course), but as I
think you noted elsewhere this probably needs revising if we're going
to reject applying fix-it-hints to locations in generated data buffers.

Thanks
Dave


Re: [PATCH v4 5/8] diagnostics: Support testing generated data in input.cc selftests

2023-08-15 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> Add selftests for the new capabilities in input.cc related to source code
> locations that are stored in memory rather than ordinary files.
> 
> gcc/ChangeLog:
> 
> * input.cc (temp_source_file::do_linemap_add): New function.
> (line_table_case::line_table_case): Add GENERATED_DATA argument.
> (line_table_test::line_table_test): Implement new M_GENERATED_DATA
> argument.
> (for_each_line_table_case): Optionally include generated data
> locations in the set of cases.
> (test_accessing_ordinary_linemaps): Test generated data locations.
> (test_make_location_nonpure_range_endpoints): Likewise.
> (test_line_offset_overflow): Likewise.
> (input_cc_tests): Likewise.
> * selftest.cc (named_temp_file::named_temp_file): Interpret a null
> SUFFIX argument as a request to use in-memory data.
> (named_temp_file::~named_temp_file): Support in-memory data.
> (temp_source_file::temp_source_file): Likewise.
> (temp_source_file::~temp_source_file): Likewise.
> * selftest.h (struct line_map_ordinary): Foward declare.
> (class named_temp_file): Add missing explicit to the constructor.
> (class temp_source_file): Add new members to support in-memory data.
> (class line_table_test): Likewise.
> (for_each_line_table_case): Adjust prototype.
> ---
>  gcc/input.cc    | 81 +
>  gcc/selftest.cc | 53 +---
>  gcc/selftest.h  | 19 ++--
>  3 files changed, 113 insertions(+), 40 deletions(-)
> 

Thanks; looks good to me.

Dave



Re: [PATCH v4 4/8] diagnostics: Support obtaining source code lines from generated data buffers

2023-08-15 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> This patch enhances location_get_source_line(), which is the primary
> interface provided by the diagnostics infrastructure to obtain the line of
> source code corresponding to a given location, so that it understands
> generated data locations in addition to normal file-based locations. This
> involves changing the argument to location_get_source_line() from a plain
> file name, to a source_id object that can represent either type of location.
> 
> gcc/ChangeLog:
> 
> * input.cc (class data_cache_slot): New class.
> (file_cache::lookup_data): New function.
> (diagnostics_file_cache_forcibly_evict_data): New function.
> (file_cache::forcibly_evict_data): New function.
> (file_cache::evicted_cache_tab_entry): Generalize (via a template)
> to work for both file_cache_slot and data_cache_slot.
> (file_cache::add_file): Adapt for new interface to
> evicted_cache_tab_entry.
> (file_cache::add_data): New function.
> (data_cache_slot::create): New function.
> (file_cache::file_cache): Support the new m_data_slots member.
> (file_cache::~file_cache): Likewise.
> (file_cache::lookup_or_add_data): New function.
> (file_cache::lookup_or_add): New function that calls either
> lookup_or_add_data or lookup_or_add_file as appropriate.
> (location_get_source_line): Change the FILE_PATH argument to a
> source_id SRC, and use it to support obtaining source lines from
> generated data as well as from files.
> (location_compute_display_column): Support generated data using the
> new features of location_get_source_line.
> (dump_location_info): Likewise.
> * input.h (location_get_source_line): Adjust prototype. Add a new
> convenience overload taking an expanded_location.
> (class cache_data_source): Declare.
> (class data_cache_slot): Declare.
> (class file_cache): Declare new members.
> (diagnostics_file_cache_forcibly_evict_data): Declare.
> ---
>  gcc/input.cc | 171 ---
>  gcc/input.h  |  23 +--
>  2 files changed, 153 insertions(+), 41 deletions(-)
> 
> diff --git a/gcc/input.cc b/gcc/input.cc
> index 9377020b460..790279d4273 100644
> --- a/gcc/input.cc
> +++ b/gcc/input.cc
> @@ -207,6 +207,28 @@ private:
>    void maybe_grow ();
>  };
>  
> +/* This is the implementation of cache_data_source for generated
> +   data that is already in memory.  */
> +class data_cache_slot final : public cache_data_source

It occurred to me: why are we caching accessing a buffer that's already
in memory - but we're also caching the line-splitting information, and
providing the line-splitting algorithm with a consistent interface to
the data, right?

[...snip...]

> @@ -397,6 +434,15 @@ diagnostics_file_cache_forcibly_evict_file (const char 
> *file_path)
>    global_dc->m_file_cache->forcibly_evict_file (file_path);
>  }
>  
> +void
> +diagnostics_file_cache_forcibly_evict_data (const char *data,
> +   unsigned int data_len)
> +{
> +  if (!global_dc->m_file_cache)
> +    return;
> +  global_dc->m_file_cache->forcibly_evict_data (data, data_len);

Maybe we should rename diagnostic_context's m_file_cache to
m_source_cache?  (and class file_cache for that matter?)  But if so,
that can/should be a followup/separate patch.

[...snip...]
 
> @@ -525,10 +582,22 @@ file_cache_slot::create (const 
> file_cache::input_context _context,
>    return true;
>  }
>  
> +void
> +data_cache_slot::create (const char *data, unsigned int data_len,
> +    unsigned int highest_use_count)
> +{
> +  reset ();
> +  on_create (highest_use_count + 1,
> +    total_lines_num (source_id {data, data_len}));
> +  m_data_begin = data;
> +  m_data_end = data + data_len;
> +}
> +
>  /* file_cache's ctor.  */
>  
>  file_cache::file_cache ()
> -: m_file_slots (new file_cache_slot[num_file_slots])
> +  : m_file_slots (new file_cache_slot[num_file_slots]),
> +    m_data_slots (new data_cache_slot[num_file_slots])

Should "num_file_slots" be renamed to "num_slots"?

I assume you're using the same value for both kinds of slot since the
file_cache::evicted_cache_tab_entry template uses this.  I suppose the
number could be passed in as an argument to that function if we wanted
to have different sizes for the two kinds, but I don't think it
matters.

[...snip...]

> @@ -912,26 +1000,22 @@ cache_data_source::read_line_num (size_t line_num,
>     If the function fails, a NULL char_span is returned.  */
>  
>  char_span
> -location_get_source_line (const char *file_path, int line)
> +location_get_source_line (source_id src, int line)
>  {
> -  const char *buffer = NULL;
> -  ssize_t len;
> -
> -  if (line == 0)
> -    return char_span (NULL, 0);
> -
> -  if (file_path == NULL)
> -    return 

Re: [PATCH v4 3/8] diagnostics: Refactor class file_cache_slot

2023-08-15 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> Class file_cache_slot in input.cc is used to query specific lines of source
> code from a file when needed by diagnostics infrastructure. This will be
> extended in a subsequent patch to support obtaining the source code from
> in-memory generated buffers rather than from a file. The present patch
> refactors class file_cache_slot, putting most of the logic into a new base
> class cache_data_source, in preparation for reusing that code in the next
> patch. There is no change in functionality yet.
> 
> gcc/ChangeLog:
> 
> * input.cc (class file_cache_slot): Refactor functionality into a
> new base class...
> (class cache_data_source): ...here.
> (file_cache::forcibly_evict_file): Adapt for refactoring.
> (file_cache_slot::evict): Renamed to...
> (file_cache_slot::reset): ...this, and partially refactored into
> base class...
> (cache_data_source::reset): ...here.
> (file_cache_slot::get_full_file_content): Moved into base class...
> (cache_data_source::get_full_file_content): ...here.
> (file_cache_slot::create): Adapt for refactoring.
> (file_cache_slot::file_cache_slot): Refactor partially into...
> (cache_data_source::cache_data_source): ...here.
> (file_cache_slot::~file_cache_slot): Refactor partially into...
> (cache_data_source::~cache_data_source): ...here.
> (file_cache_slot::needs_read_p): Remove.
> (file_cache_slot::needs_grow_p): Remove.
> (file_cache_slot::maybe_grow): Adapt for refactoring.
> (file_cache_slot::read_data): Refactored, along with...
> (file_cache_slot::maybe_read_data): this, into...
> (file_cache_slot::get_more_data): ...here.
> (find_end_of_line): Change interface to take a pair of pointers,
> rather than a pointer + length.
> (file_cache_slot::get_next_line): Refactored into...
> (cache_data_source::get_next_line): ...here.
> (file_cache_slot::goto_next_line): Refactored into...
> (cache_data_source::goto_next_line): ...here.
> (file_cache_slot::read_line_num): Refactored into...
> (cache_data_source::read_line_num): ...here.
> (location_get_source_line): Fix const-correctness as necessitated by
> new interface.
> ---
>  gcc/input.cc | 513 +++
>  1 file changed, 235 insertions(+), 278 deletions(-)
> 

I confess I had to reread both this and patch 4/8 to make sense of
this; this is probably one of those cases where it's harder to read in
patch form than as source, but I think I now understand the new
implementation.

Did you try testing this with valgrind (e.g. "make selftest-valgrind")?

I don't think we have any selftest coverage for "\r" in the line-break
handling; that would be good to add.

This patch is OK for trunk once the rest of the kit is approved.

Thanks
Dave



Re: [PATCH v2] analyzer: New option fanalyzer-show-events-in-system-headers [PR110543]

2023-08-14 Thread David Malcolm via Gcc-patches
On Mon, 2023-08-14 at 17:48 +0200, priour...@gmail.com wrote:
> From: benjamin priour 
> 
> Plenty useful, thanks David. I've adjusted some few things, especially
> the artifacts of earlier versions I missed when building the commit.
> 
> I didn't how to test for warnings within , I couldn't figure a 
> portable test.
> I cannot pinpoint the line the warning is issued at in an inline DejaGNU 
> directive,
> nor can I safely say the stack depth if I check a multiline-output (nor the 
> methods names)
> 
> In the end, I found out an alternative, I am checking for the presence of 
> event "entry of 'main'".
> Indeed, diagnostic_manager::finish_pruning comment's reads
> If all we're left with is in one function, then filter function entry events.
> The provided test case can only goes into main and std::* frames, so if 
> "entry of 'main'" exists,
> it means we are also going into std::* frames.
> 
> I've also adjusted the comment of prune_system_headers, analyzer.opt and 
> added an entry to invoker.texi.
> 
> Successfully regstrapped off trunk
> 54be338589ea93ad4ff53d22adde476a0582537b on x86_64-linux-gnu.

Thanks for the updated patch.

This is ready to push to trunk.

Dave



Re: [PATCH v4 2/8] libcpp: diagnostics: Support generated data in expanded locations

2023-08-11 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> The previous patch in this series introduced the concept of LC_GEN line
> maps. This patch continues on the path to using them to improve _Pragma
> diagnostics, by adding a new source_id SRC member to struct
> expanded_location, which is populated by linemap_expand_location. This
> member allows call sites to detect and handle when a location refers to
> generated data rather than a plain file name.
> 
> The previous FILE member of expanded_location is preserved (although
> redundant with SRC), so that call sites which do not and never will care
> about generated data do not need to be concerned about it. Call sites that
> will care are modified here, to use SRC rather than FILE for comparing
> locations.

Thanks; this seems like a good approach.


[...snip...]

> diff --git a/gcc/edit-context.cc b/gcc/edit-context.cc
> index 6f5bc6b9d8f..15052aec417 100644
> --- a/gcc/edit-context.cc
> +++ b/gcc/edit-context.cc
> @@ -295,7 +295,7 @@ edit_context::apply_fixit (const fixit_hint *hint)
>  {
>    expanded_location start = expand_location (hint->get_start_loc ());
>    expanded_location next_loc = expand_location (hint->get_next_loc ());
> -  if (start.file != next_loc.file)
> +  if (start.src != next_loc.src || start.src.is_buffer ())
>  return false;
>    if (start.line != next_loc.line)
>  return false;

Thinking about fix-it hints, it makes sense to reject attempts to
create fix-it hints within generated strings, as we can't apply them or
visualize them.

Does anywhere in the patch kit do that?  Either of 
  rich_location::maybe_add_fixit
or
  rich_location::reject_impossible_fixit
would be good places to do that.


[...snip...]

> diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
> index e59123b18c5..76617fe6129 100644
> --- a/libcpp/include/line-map.h
> +++ b/libcpp/include/line-map.h
> @@ -1410,18 +1410,22 @@ linemap_location_before_p (class line_maps *set,
>  
>  typedef struct
>  {
> -  /* The name of the source file involved.  */
> -  const char *file;
> +  /* The file name of the location involved, or NULL if the location
> + is not in an external file.  */
> +  const char *file = nullptr;
>  
> -  /* The line-location in the source file.  */
> -  int line;
> -
> -  int column;
> +  /* A source_id recording the file name and/or the in-memory content,
> + as appropriate.  Users that need to handle in-memory content need
> + to use this rather than FILE.  */
> +  source_id src;
>  
> -  void *data;
> +  /* The line-location in the source file.  */
> +  int line = 0;
> +  int column = 0;
> +  void *data = nullptr;
>  
> -  /* In a system header?. */
> -  bool sysp;
> +  /* In a system header?  */
> +  bool sysp = false;
>  } expanded_location;

I don't know if we've been using default member initialization yet, but
apparently it's C++11, and thus OK.

[...snip...]


This patch looks good to me, but obviously it has dependencies on the
rest of the kit.

Dave



Re: [PATCH v4 1/8] libcpp: Add LC_GEN linemaps to support in-memory buffers

2023-08-11 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:

Hi Lewis, thanks for the patch...

> Add a new linemap reason LC_GEN which enables encoding the location of data
> that was generated during compilation and does not appear in any source file.
> There could be many use cases, such as, for instance, referring to the content
> of builtin macros (not yet implemented, but an easy lift after this one.) The
> first intended application is to create a place to store the input to a
> _Pragma directive, so that proper locations can be assigned to those
> tokens. This will be done in a subsequent commit.
> 
> The TO_FILE member of struct line_map_ordinary has been changed to a union
> named SRC which can be either a file name, or a pointer to a line_map_data
> struct describing the data. There is no space overhead added to the line
> maps data structures.
> 
> Outside libcpp, this patch includes only the minimal changes implied by the
> adjustment from TO_FILE to SRC in struct line_map_ordinary. Subsequent
> patches will implement the new functionality.
> 
> libcpp/ChangeLog:
> 
> * include/line-map.h (enum lc_reason): Add LC_GEN.
> (struct line_map_data): New struct.
> (struct line_map_ordinary): Change TO_FILE from a char* to a union,
> and rename to SRC.
> (class source_id): New class.
> (ORDINARY_MAP_GENERATED_DATA_P): New function.
> (ORDINARY_MAP_GENERATED_DATA): New function.
> (ORDINARY_MAP_GENERATED_DATA_LEN): New function.
> (ORDINARY_MAP_SOURCE_ID): New function.
> (ORDINARY_MAPS_SAME_FILE_P): New function.
> (ORDINARY_MAP_CONTAINING_FILE_NAME): Declare.
> (LINEMAP_FILE): Adapt to struct line_map_ordinary change.
> (linemap_get_file_highest_location): Likewise.
> * line-map.cc (source_id::operator==): New function.
> (ORDINARY_MAP_CONTAINING_FILE_NAME): New function.
> (linemap_add): Support creating LC_GEN maps.
> (linemap_line_start): Support LC_GEN maps.
> (linemap_check_files_exited): Likewise.
> (linemap_position_for_loc_and_offset): Likewise.
> (linemap_get_expansion_filename): Likewise.
> (linemap_dump): Likewise.
> (linemap_dump_location): Likewise.
> (linemap_get_file_highest_location): Likewise.
> * directives.cc (_cpp_do_file_change): Likewise.
> 
> gcc/c-family/ChangeLog:
> 
> * c-common.cc (try_to_locate_new_include_insertion_point): Recognize
> and ignore LC_GEN maps.
> 
> gcc/cp/ChangeLog:
> 
> * module.cc (module_state::write_ordinary_maps): Recognize and
> ignore LC_GEN maps, and adapt to interface change in struct
> line_map_ordinary.
> (module_state::read_ordinary_maps): Likewise.
> 
> gcc/ChangeLog:
> 
> * diagnostic-show-locus.cc (compatible_locations_p): Adapt to
> interface change in struct line_map_ordinary.
> * input.cc (special_fname_generated): New function.
> (dump_location_info): Support LC_GEN maps.
> (get_substring_ranges_for_loc): Adapt to interface change in struct
> line_map_ordinary.
> * input.h (special_fname_generated): Declare.
> 
> gcc/go/ChangeLog:
> 
> * go-linemap.cc (Gcc_linemap::to_string): Recognize and ignore
> LC_GEN maps.
> ---
>  gcc/c-family/c-common.cc |  11 ++-
>  gcc/cp/module.cc |   8 +-
>  gcc/diagnostic-show-locus.cc |   2 +-
>  gcc/go/go-linemap.cc |   3 +-
>  gcc/input.cc |  27 +-
>  gcc/input.h  |   1 +
>  libcpp/directives.cc |   4 +-
>  libcpp/include/line-map.h    | 144 
>  libcpp/line-map.cc   | 181 +--
>  9 files changed, 299 insertions(+), 82 deletions(-)

[...snip...]

> 
> diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc
> index 0514815b51f..a2aa6b4e0b5 100644
> --- a/gcc/diagnostic-show-locus.cc
> +++ b/gcc/diagnostic-show-locus.cc
> @@ -998,7 +998,7 @@ compatible_locations_p (location_t loc_a, location_t 
> loc_b)
>  are in the same file.  */
>    const line_map_ordinary *ord_map_a = linemap_check_ordinary (map_a);
>    const line_map_ordinary *ord_map_b = linemap_check_ordinary (map_b);
> -  return ord_map_a->to_file == ord_map_b->to_file;
> +  return ORDINARY_MAPS_SAME_FILE_P (ord_map_a, ord_map_b);

My first thought here was: are buffers supported here, or does it have
to be a file?

It turns out that ORDINARY_MAPS_SAME_FILE_P works on both files and
buffers.

This suggests that it would be better named as
ORDINARY_MAPS_SAME_SOURCE_ID_P, but note the comment below, could this
be:

   return ord_map_a->same_source_id_p (ord_map_b);

?

[...snip...]

> diff --git a/gcc/input.cc b/gcc/input.cc
> index eaf301ec7c1..c1735215b29 100644
> --- a/gcc/input.cc
> +++ b/gcc/input.cc

[...snip...]

> @@ -1814,11 +1835,11 @@ 

[pushed] analyzer: new warning: -Wanalyzer-unterminated-string [PR105899]

2023-08-11 Thread David Malcolm via Gcc-patches
This patch adds new functions to the analyzer for checking that
an argument at a callsite is a pointer to a valid null-terminated
string, and uses this for the following known functions:

- error (param 3, the format string)
- error_at_line (param 5, the format string)
- putenv
- strchr (1st param)
- strcpy (2nd param)
- strdup

Currently the check merely detects pointers to unterminated string
constants, and adds a new -Wanalyzer-unterminated-string to complain
about that.  I'm experimenting with detecting other ways in which
a buffer can fail to be null-terminated, and for other problems with
such buffers, but this patch at least adds the framework for wiring
up the check to specific parameters of known_functions.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3169-g325f9e88802daa.

gcc/analyzer/ChangeLog:
PR analyzer/105899
* analyzer.opt (Wanalyzer-unterminated-string): New.
* call-details.cc
(call_details::check_for_null_terminated_string_arg): New.
* call-details.h
(call_details::check_for_null_terminated_string_arg): New decl.
* kf-analyzer.cc (class kf_analyzer_get_strlen): New.
(register_known_analyzer_functions): Register it.
* kf.cc (kf_error::impl_call_pre): Check that format arg is a
valid null-terminated string.
(kf_putenv::impl_call_pre): Likewise for the sole param.
(kf_strchr::impl_call_pre): Likewise for the first param.
(kf_strcpy::impl_call_pre): Likewise for the second param.
(kf_strdup::impl_call_pre): Likewise for the sole param.
* region-model.cc (get_strlen): New.
(struct call_arg_details): New.
(inform_about_expected_null_terminated_string_arg): New.
(class unterminated_string_arg): New.
(region_model::check_for_null_terminated_string_arg): New.
* region-model.h
(region_model::check_for_null_terminated_string_arg): New decl.

gcc/ChangeLog:
PR analyzer/105899
* doc/analyzer.texi (__analyzer_get_strlen): New.
* doc/invoke.texi: Add -Wanalyzer-unterminated-string.

gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/analyzer-decls.h (__analyzer_get_strlen): New.
* gcc.dg/analyzer/error-1.c (test_error_unterminated): New.
(test_error_at_line_unterminated): New.
* gcc.dg/analyzer/null-terminated-strings-1.c: New test.
* gcc.dg/analyzer/putenv-1.c (test_unterminated): New.
* gcc.dg/analyzer/strchr-1.c (test_unterminated): New.
* gcc.dg/analyzer/strcpy-1.c (test_unterminated): New.
* gcc.dg/analyzer/strdup-1.c (test_unterminated): New.
---
 gcc/analyzer/analyzer.opt |   4 +
 gcc/analyzer/call-details.cc  |   7 +
 gcc/analyzer/call-details.h   |   2 +
 gcc/analyzer/kf-analyzer.cc   |  18 ++
 gcc/analyzer/kf.cc|  11 ++
 gcc/analyzer/region-model.cc  | 163 ++
 gcc/analyzer/region-model.h   |   3 +
 gcc/doc/analyzer.texi |   8 +
 gcc/doc/invoke.texi   |  13 ++
 .../gcc.dg/analyzer/analyzer-decls.h  |   5 +
 gcc/testsuite/gcc.dg/analyzer/error-1.c   |  12 ++
 .../analyzer/null-terminated-strings-1.c  |  30 
 gcc/testsuite/gcc.dg/analyzer/putenv-1.c  |   7 +
 gcc/testsuite/gcc.dg/analyzer/strchr-1.c  |   6 +
 gcc/testsuite/gcc.dg/analyzer/strcpy-1.c  |   6 +
 gcc/testsuite/gcc.dg/analyzer/strdup-1.c  |   6 +
 16 files changed, 301 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c

diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt
index 2760aaa8151..a9bac400ca3 100644
--- a/gcc/analyzer/analyzer.opt
+++ b/gcc/analyzer/analyzer.opt
@@ -214,6 +214,10 @@ Wanalyzer-tainted-size
 Common Var(warn_analyzer_tainted_size) Init(1) Warning
 Warn about code paths in which an unsanitized value is used as a size.
 
+Wanalyzer-unterminated-string
+Common Var(warn_analyzer_unterminated_string) Init(1) Warning
+Warn about code paths which attempt to find the length of an unterminated 
string.
+
 Wanalyzer-use-after-free
 Common Var(warn_analyzer_use_after_free) Init(1) Warning
 Warn about code paths in which a freed value is used.
diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc
index 93f4846f674..fa86f55177a 100644
--- a/gcc/analyzer/call-details.cc
+++ b/gcc/analyzer/call-details.cc
@@ -376,6 +376,13 @@ call_details::lookup_function_attribute (const char 
*attr_name) const
   return lookup_attribute (attr_name, TYPE_ATTRIBUTES (allocfntype));
 }
 
+void
+call_details::check_for_null_terminated_string_arg (unsigned arg_idx) const
+{
+  region_model *model = get_model ();
+  model->check_for_null_terminated_string_arg (*this, arg_idx);
+}
+
 } // namespace ana
 
 #endif /* #if ENABLE_ANALYZER */

Re: [PATCH] analyzer: New option fanalyzer-show-events-in-system-headers [PR110543]

2023-08-11 Thread David Malcolm via Gcc-patches
On Fri, 2023-08-11 at 13:51 +0200, priour...@gmail.com wrote:
> From: benjamin priour 

Hi Benjamin, thanks for the patch.

Overall, the patch is close to being ready, but see the various
comments inline below...

> 
> This patch introduces -fanalyzer-show-events-in-system-headers,
> disabled by default.
> 
> This option reduce the noise of the analyzer emitted diagnostics
> when dealing with system headers.
> The new option only affects the display of the diagnostics,
> but doesn't hinder the actual analysis.
> 
> Given a diagnostics path diving into a system header in the form
> [
>   prefix events...,
>   system header call,
>     system header entry,
>     events within system headers...,
>   system header return,
>   suffix events...
> ]
> then disabling the option (either by default or explicitly)
> will shorten the path into:
> [
>   prefix events...,
>   system header call,
>   system header return,
>   suffix events...
> ]
> 
> Signed-off-by: benjamin priour 
> 

[...]

> 
> diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc
> index 5091fb7a583..b27d8e359db 100644
> --- a/gcc/analyzer/analyzer.cc
> +++ b/gcc/analyzer/analyzer.cc
> @@ -274,7 +274,7 @@ is_named_call_p (const_tree fndecl, const char *funcname)
>     Compare with cp/typeck.cc: decl_in_std_namespace_p, but this doesn't
>     rely on being the C++ FE (or handle inline namespaces inside of std).  */
>  
> -static inline bool
> +bool
>  is_std_function_p (const_tree fndecl)
>  {
>    tree name_decl = DECL_NAME (fndecl);
> diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
> index 579517c23e6..31597079153 100644
> --- a/gcc/analyzer/analyzer.h
> +++ b/gcc/analyzer/analyzer.h
> @@ -386,6 +386,7 @@ extern bool is_special_named_call_p (const gcall *call, 
> const char *funcname,
>  extern bool is_named_call_p (const_tree fndecl, const char *funcname);
>  extern bool is_named_call_p (const_tree fndecl, const char *funcname,
>  const gcall *call, unsigned int num_args);
> +extern bool is_std_function_p (const_tree fndecl);

The analyzer.{cc|h} parts of the patch make is_std_function_p "extern",
but I didn't see any use of it in the rest of the patch.  Did I miss
something, or are the changes to is_std_function_p a vestige from an
earlier version of the patch?

[...]

> diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt
> index 2760aaa8151..d97cd569f52 100644
> --- a/gcc/analyzer/analyzer.opt
> +++ b/gcc/analyzer/analyzer.opt
> @@ -290,6 +290,10 @@ fanalyzer-transitivity
>  Common Var(flag_analyzer_transitivity) Init(0)
>  Enable transitivity of constraints during analysis.
>  
> +fanalyzer-show-events-in-system-headers
> +Common Var(flag_analyzer_show_events_in_system_headers) Init(0)
> +Trim diagnostics path that are too long before emission.
> +

There's a mismatch here between the sense of the name of the option as
opposed to the sense of the description, and the wording isn't quite
accurate.

You could either

(A) rename the option to:
  fanalyzer-hide-events-in-system-headers
and make it be Init(1), and change the sense of the conditional in
diagnostic_manager::prune_path?
That way the user would suppy:
  -fno-analyzer-hide-events-in-system-headers

or:

(B) change the wording to something like
"Show events within system headers in analyzer execution paths."
or somesuch

All options should have a corresponding entry in invoke.texi, so please
add one for the new option (have a look at the existing ones).

>  fanalyzer-call-summaries
>  Common Var(flag_analyzer_call_summaries) Init(0)
>  Approximate the effect of function calls to simplify analysis.
> diff --git a/gcc/analyzer/diagnostic-manager.cc 
> b/gcc/analyzer/diagnostic-manager.cc
> index cfca305d552..2a9705a464f 100644
> --- a/gcc/analyzer/diagnostic-manager.cc
> +++ b/gcc/analyzer/diagnostic-manager.cc
> @@ -20,9 +20,11 @@ along with GCC; see the file COPYING3.  If not see
>  
>  #include "config.h"
>  #define INCLUDE_MEMORY
> +#define INCLUDE_VECTOR

I don't see any use of std::vector in the patch; is this a vestige from
an earlier version of the patch?

>  #include "system.h"
>  #include "coretypes.h"
>  #include "tree.h"
> +#include "input.h"
>  #include "pretty-print.h"
>  #include "gcc-rich-location.h"
>  #include "gimple-pretty-print.h"
> @@ -2281,6 +2283,8 @@ diagnostic_manager::prune_path (checker_path *path,
>    path->maybe_log (get_logger (), "path");
>    prune_for_sm_diagnostic (path, sm, sval, state);
>    prune_interproc_events (path);
> +  if (! flag_analyzer_show_events_in_system_headers)
> +    prune_system_headers (path);
>    consolidate_conditions (path);
>    finish_pruning (path);
>    path->maybe_log (get_logger (), "pruned");
> @@ -2667,6 +2671,67 @@ diagnostic_manager::prune_interproc_events 
> (checker_path *path) const
>    while (changed);
>  }
>  
> +/* Remove everything within [call point, IDX]. For consistency,
> +   IDX should represent the return event of the frame to delete,
> + 

Re: [PATCH v2] analyzer: More features for CPython analyzer plugin [PR107646]

2023-08-09 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-09 at 15:22 -0400, Eric Feng wrote:
> Thank you for your help in getting dg-require-python-h working! I can
> confirm that the FAILs are related to differences between the --
> cflags
> affecting the gimple seen by the analyzer. For this reason, I have
> changed it to --includes for now. 

Sounds good.

Eventually we'll probably want to support --cflags, but given that
every distribution probably has its own set of flags, it's a recipe for
an unpleasantly large test matrix, so just using --includes is a good
compromise.

> To be sure, I tested on Python 3.8 as
> well and it works as expected. I have also addressed the following
> comments on the WIP patch as you described.
> 
> -- Update Changelog entry to list new functions being simulated.
> -- Update region_model::get_or_create_region_for_heap_alloc leading
> comment.
> -- Change register_alloc to update_state_machine.
> -- Change move_ptr_sval_non_null to transition_ptr_sval_non_null.
> -- Static helper functions for:
> -- Initializing ob_refcnt field.
> -- Setting ob_type field.
> -- Getting ob_base field.
> -- Initializing heap allocated region for PyObjects.
> -- Incrementing a field by one.
> -- Change arg_is_long_p to arg_is_integral_p.
> -- Extract common failure scenario for reusability.
> 
> The initial WIP patch using 
> 
> /* { dg-options "-fanalyzer -I/usr/include/python3.9" }. */
> 
> have been bootstrapped and regtested on aarch64-unknown-linux-gnu.
> Since
> we did not change any core logic in the revision and the only changes
> within the analyzer core are changing variable names, is it OK for
> trunk. In the mean time, the revised patch is currently going through
> bootstrap and regtest process.

Thanks for the updated patch.

Unfortunately I just pushed a largish analyzer patch (r14-3114-
g73da34a538ddc2) which may well conflict with your patch, so please
rebase to beyond that.  

Sorry about this.

In particular note that there's no longer a default assignment to the
LHS at a call-site in region_model::on_call_pre; known_function
subclasses are now responsible for assigning to the LHS of the
callsite.  But I suspect that all the known_function subclasses in the
cpython plugin already do that.

Some nits inline below...

[...snip...]

> Some concessions were made to
> simplify the analysis process when comparing kf_PyList_Append with
> the
> real implementation. In particular, PyList_Append performs some
> optimization internally to try and avoid calls to realloc if
> possible. For simplicity, we assume that realloc is called every
> time.
> Also, we grow the size by just 1 (to ensure enough space for adding a
> new element) rather than abide by the heuristics that the actual
> implementation
> follows.

Might be worth capturing these notes as comments in the source (for
class kf_PyList_Append), rather than just within the commit message.

[...snip...]
> 
> diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-
> model.cc
> index e92b3f7b074..c338f045d92 100644
> --- a/gcc/analyzer/region-model.cc
> +++ b/gcc/analyzer/region-model.cc
> @@ -5127,11 +5127,16 @@ region_model::check_dynamic_size_for_floats
> (const svalue *size_in_bytes,
>     Use CTXT to complain about tainted sizes.
>  
>     Reuse an existing heap_allocated_region if it's not being
> referenced by
> -   this region_model; otherwise create a new one.  */
> +   this region_model; otherwise create a new one.
> +
> +   Optionally (update_state_machine) transitions the pointer
> pointing to the
> +   heap_allocated_region from start to assumed non-null.  */
>  
>  const region *
>  region_model::get_or_create_region_for_heap_alloc (const svalue
> *size_in_bytes,
> - 
> region_model_context *ctxt)
> +   region_model_context *ctxt,
> +   bool update_state_machine,
> +   const call_details *cd)
>  {
>    /* Determine which regions are referenced in this region_model, so
> that
>   we can reuse an existing heap_allocated_region if it's not in
> use on
> @@ -5153,6 +5158,17 @@
> region_model::get_or_create_region_for_heap_alloc (const svalue
> *size_in_bytes,
>    if (size_in_bytes)
>  if (compat_types_p (size_in_bytes->get_type (), size_type_node))
>    set_dynamic_extents (reg, size_in_bytes, ctxt);
> +
> +   if (update_state_machine && cd)
> +   {
> +   const svalue *ptr_sval = nullptr;
> +   if (cd->get_lhs_type ())
> +   ptr_sval = m_mgr->get_ptr_svalue (cd->get_lhs_type (), reg);
> +   else
> +   ptr_sval = m_mgr->get_ptr_svalue (NULL_TREE, reg);
> +   transition_ptr_sval_non_null (ctxt,
> ptr_sval);

This if/else is redundant: the "else" is only reached if cd-
>get_lhs_type () is null, in which case you pass in NULL_TREE, so it
works the same either way.  Or am I missing something?

Also, it looks like something weird's happening with 

Re: [PATCH] testsuite: Fix gcc.dg/analyzer/allocation-size-multiline-[123].c [PR 110426]

2023-08-09 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-08 at 15:01 +, Christophe Lyon wrote:
> For 32-bit newlib targets (e.g. arm-eabi)  int32_t is "long int".
> 
> Like previous patches in these tests, update the matching regexps to
> match "aka (long )?int".
> 
> Tested on arm-eabi and aarch64-linux-gnu.

Sorry about this breakage.

These tests used to emit the infomation as multiple messages, but were
consolidated as a side-effect of r14-3001-g021077b94741c9.

I've just committed r14-3114-g73da34a538ddc2, a cleanup of the analyzer
code, which has a side-effect of splitting the messages back up.  I
believe that r14-3114 restores these tests to their pre-r14-3001 state,
but I might have messed up.

Does r14-3114-g73da34a538ddc2 fix the issues for you, or is some
patching still needed?

Dave


> 
> 2023-08-08  Christophe Lyon  
> 
> gcc/testsuite/
> PR analyzer/110426
> * gcc.dg/analyzer/allocation-size-multiline-1.c: Handle
> int32_t being "long int".
> * gcc.dg/analyzer/allocation-size-multiline-2.c: Likewise.
> * gcc.dg/analyzer/allocation-size-multiline-3.c: Likewise.
> ---
>  gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-1.c | 6 +++-
> --
>  gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-2.c | 6 +++-
> --
>  gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-3.c | 4 ++--
>  3 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-
> 1.c b/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-1.c
> index 9938ba237a0..b56e4b4e8e1 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-1.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-1.c
> @@ -16,7 +16,7 @@ void test_constant_1 (void)
>  |   int32_t *ptr = __builtin_malloc (1);
>  |  ^~~~
>  |  |
> -    |  (1) allocated 1 bytes and assigned to
> 'int32_t *' {aka 'int *'} here; 'sizeof (int32_t {aka int})' is '4'
> +    |  (1) allocated 1 bytes and assigned to
> 'int32_t *' {aka '{re:long :re?}int *'} here; 'sizeof (int32_t {aka
> {re:long :re?}int})' is '4'
>  |
>     { dg-end-multiline-output "" } */
>  
> @@ -34,7 +34,7 @@ void test_constant_2 (void)
>  |   int32_t *ptr = __builtin_malloc (2);
>  |  ^~~~
>  |  |
> -    |  (1) allocated 2 bytes and assigned to
> 'int32_t *' {aka 'int *'} here; 'sizeof (int32_t {aka int})' is '4'
> +    |  (1) allocated 2 bytes and assigned to
> 'int32_t *' {aka '{re:long :re?}int *'} here; 'sizeof (int32_t {aka
> {re:long :re?}int})' is '4'
>  |
>     { dg-end-multiline-output "" } */
>  
> @@ -52,6 +52,6 @@ void test_symbolic (int n)
>  |   int32_t *ptr = __builtin_malloc (n * 2);
>  |  ^~~~
>  |  |
> -    |  (1) allocated 'n * 2' bytes and assigned to
> 'int32_t *' {aka 'int *'} here; 'sizeof (int32_t {aka int})' is '4'
> +    |  (1) allocated 'n * 2' bytes and assigned to
> 'int32_t *' {aka '{re:long :re?}int *'} here; 'sizeof (int32_t {aka
> {re:long :re?}int})' is '4'
>  |
>     { dg-end-multiline-output "" } */
> diff --git a/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-
> 2.c b/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-2.c
> index 9e1269cbb7a..8912913a78c 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-2.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/allocation-size-multiline-2.c
> @@ -16,7 +16,7 @@ void test_constant_1 (void)
>  |   int32_t *ptr = __builtin_alloca (1);
>  |  ^~~~
>  |  |
> -    |  (1) allocated 1 bytes and assigned to
> 'int32_t *' {aka 'int *'} here; 'sizeof (int32_t {aka int})' is '4'
> +    |  (1) allocated 1 bytes and assigned to
> 'int32_t *' {aka '{re:long :re?}int *'} here; 'sizeof (int32_t {aka
> {re:long :re?}int})' is '4'
>  |
>     { dg-end-multiline-output "" } */
>  
> @@ -33,7 +33,7 @@ void test_constant_2 (void)
>  |   int32_t *ptr = __builtin_alloca (2);
>  |  ^~~~
>  |  |
> -    |  (1) allocated 2 bytes and assigned to
> 'int32_t *' {aka 'int *'} here; 'sizeof (int32_t {aka int})' is '4'
> +    |  (1) allocated 2 bytes and assigned to
> 'int32_t *' {aka '{re:long :re?}int *'} here; 'sizeof (int32_t {aka
> {re:long :re?}int})' is '4'
>  |
>     { dg-end-multiline-output "" } */
>  
> @@ -50,7 +50,7 @@ void test_symbolic (int n)
>  |   int32_t *ptr = __builtin_alloca (n * 2);
>  |  ^~~~
>  |  |
> -    |  (1) allocated 'n * 2' bytes and assigned to
> 'int32_t *' {aka 'int *'} here; 'sizeof (int32_t {aka int})' is '4'
> +    |  (1) 

[pushed] analyzer: remove default return value from region_model::on_call_pre

2023-08-09 Thread David Malcolm via Gcc-patches
Previously, the code for simulating calls to external functions in
region_model::on_call_pre wrote a default svalue to the LHS of the
call statement, which could be further overwritten by known_function
subclasses.

Unfortunately, this led to messy hacks, such as when the default svalue
was an allocation: the LHS would be written to with two different
heap-allocated regions, requiring special-case cleanups to avoid the
stray state from the first heap allocation leading to state explosions;
see r14-3001-g021077b94741c9.

The following patch eliminates this write of a default svalue to the LHS
of callsite.  Instead, all known_function implementations that have a
return value are now responsible for set the LHS themselves.  A new
call_details::set_any_lhs_with_defaults function is provided to make it
easy to get the old behavior.

On working through the various known_function subclasses, I noticed that
memset was using the default behavior.  That patch updates this so that
it's now known to return its first parameter.

Cleaning this up eliminates various doubling of saved_diagnostics (e.g.
for dubious_allocation_size) where it was generating a diagnostic for
both writes to the LHS, deduplicating them to the first diagnostic (with
the default LHS), and then failing to create a region_creation_event
when emitting the diagnostic, leading to the fallback wording in
dubious_allocation_size::describe_final_event, such as:

  (1) allocated 42 bytes and assigned to ‘int32_t *’ {aka ‘int *’} here; 
‘sizeof (int32_t {aka int})’ is ‘4’

Without the double write to the LHS, it creates a region_creation_event,
so we get the allocation and the assignment as two separate events in
the diagnostic path, e.g.:

  (1) allocated 42 bytes here
  (2) assigned to ‘int32_t *’ {aka ‘int *’} here; ‘sizeof (int32_t {aka int})’ 
is ‘4’

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3114-g73da34a538ddc2.

gcc/analyzer/ChangeLog:
* analyzer.h (class pure_known_function_with_default_return): New
subclass.
* call-details.cc (const_fn_p): Move here from region-model.cc.
(maybe_get_const_fn_result): Likewise.
(get_result_size_in_bytes): Likewise.
(call_details::set_any_lhs_with_defaults): New function, based on
code in region_model::on_call_pre.
* call-details.h (call_details::set_any_lhs_with_defaults): New
decl.
* diagnostic-manager.cc
(diagnostic_manager::emit_saved_diagnostic): Log the index of the
saved_diagnostic.
* kf.cc (pure_known_function_with_default_return::impl_call_pre):
New.
(kf_memset::impl_call_pre): Set the LHS to the first param.
(kf_putenv::impl_call_pre): Call cd.set_any_lhs_with_defaults.
(kf_sprintf::impl_call_pre): Call cd.set_any_lhs_with_defaults.
(class kf_stack_restore): Derive from
pure_known_function_with_default_return.
(class kf_stack_save): Likewise.
(kf_strlen::impl_call_pre): Call cd.set_any_lhs_with_defaults.
* region-model-reachability.cc (reachable_regions::handle_sval):
Remove logic for symbolic regions for pointers.
* region-model.cc (region_model::canonicalize): Remove purging of
dynamic extents workaround for surplus values from
region_model::on_call_pre's default LHS code.
(const_fn_p): Move to call-details.cc.
(maybe_get_const_fn_result): Likewise.
(get_result_size_in_bytes): Likewise.
(region_model::update_for_nonzero_return): Call
cd.set_any_lhs_with_defaults.
(region_model::on_call_pre): Remove the assignment to the LHS of a
default return value, instead requiring all known_function
implementations to write to any LHS of the call.  Use
cd.set_any_lhs_with_defaults on the non-kf paths.
* sm-fd.cc (kf_socket::outcome_of_socket::update_model): Use
cd.set_any_lhs_with_defaults when failing to get at fd state.
(kf_bind::outcome_of_bind::update_model): Likewise.
(kf_listen::outcome_of_listen::update_model): Likewise.
(kf_accept::outcome_of_accept::update_model): Likewise.
(kf_connect::outcome_of_connect::update_model): Likewise.
(kf_read::impl_call_pre): Use cd.set_any_lhs_with_defaults.
* sm-file.cc (class kf_stdio_output_fn): Derive from
pure_known_function_with_default_return.
(class kf_ferror): Likewise.
(class kf_fileno): Likewise.
(kf_fgets::impl_call_pre): Use cd.set_any_lhs_with_defaults.
(kf_read::impl_call_pre): Likewise.
(class kf_getc): Derive from
pure_known_function_with_default_return.
(class kf_getchar): Likewise.
* varargs.cc (kf_va_arg::impl_call_pre): Use
cd.set_any_lhs_with_defaults.

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/allocation-size-1.c: Update expected results
to reflect splitting of allocation size 

Re: [PATCH] Add -Wdisabled-optimization warning for not optimizing sibling calls

2023-08-08 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-08 at 08:05 +0200, Richard Biener wrote:
> On Mon, Aug 7, 2023 at 9:04 PM Bradley Lucier 
> wrote:
> > 
> > Thank you for your comments.  I have a few questions.
> > 
> > > I don't think this specific case qualifies for -Wdisabled-
> > > optimization.
> > > The diagnostic is for cases the user can control and was invented
> > > for limits we put up for compile-time and memory-usage issues
> > > where there exist --param XYZ to adjust limits.
> > > 
> > > It would be more appropriate to change this to
> > > 
> > >    dump_printf_loc (MSG_MISSED_OPTIMIZATION, ...)
> > > 
> > > where this was designe to diagnose cases the compiler failed to
> > > optimize for other reasons than running into some --param.
> > 
> > I'm sorry, I don't understand what dump_printf_loc does, where does
> > it
> > dump this information?  What is the form of information that is
> > usually
> > dumped, and for which purpose?
> 
> dump_printf_loc is able to direct the information to multiple places,
> for one it amends the IL/pass dump file produced with -fdump-tree-
> ,
> but it also produces information for the -fopt-info* family of
> switches.
> For example -fopt-info-vec produces a set of vectorized locations
> as diagnostics, -fopt-info-vec-missed a set of diagnostics related to
> missed vectorization opportunities.  GCC recently gained the ability
> to
> format some of the diagnostics as SARIF, I'm not sure if -fopt-info
> related
> diagnostics are covered, 

FWIW the SARIF output doesn't contain the -fopt-info stuff.

> but at least -fsave-optimization-record produces
> JSON output.
> 
> So we now have a better and more general machinery to diagnose
> optimized/missed optimized things for passes than -Wdisabled-
> optimization
> and _disabled_ optimization should now be taken literally when the
> optimization
> was disabled by the user via a (default) choice of a --param value
> for example
> (or a conflicting different optimization option).
> 
> The granularity for reporting -fopt-info is optimization groups (see
> dumpfile.h, the OPTGROUP_* enum), I believe
> maybe_complain_about_tail_call
> invocations are all from RTL expansion which currently is
> OPTGROUP_NONE which would have to change (I think it's currently
> not possible to explicitely specify an alternate optgroup at
> dump_printf time
> (David?).

IIRC, the dumping machinery tries to consolidate the dump messages into
"optinfo" instances, where a dump_*_loc call starts a new optinfo, and
it's then that the optgroup can be specified.  But it's been a while
since I last looked at this code.

> 
> So the simplest way forward would be to add, say, OPTGROUP_EXPAND,
> use dump_printf in maybe_complain_about_tail_call (which will also
> dump info to the .expand debug dump which looks useful) and recognize
> -fopt-info-expand.

[...snip...]

Dave



Re: [RFC] GCC Security policy

2023-08-08 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-08 at 09:33 -0400, Paul Koning via Gcc-patches wrote:
> 
> 
> > On Aug 8, 2023, at 9:01 AM, Jakub Jelinek via Gcc-patches
> >  wrote:
> > 
> > On Tue, Aug 08, 2023 at 02:52:57PM +0200, Richard Biener via Gcc-
> > patches wrote:
> > > There's probably external tools to do this, not sure if we should
> > > replicate
> > > things in the driver for this.
> > > 
> > > But sure, I think the driver is the proper point to address any
> > > of such
> > > issues - iff we want to address them at all.  Maybe a nice little
> > > google summer-of-code project ;)
> > 
> > What I'd really like to avoid is having all compiler bugs
> > (primarily ICEs)
> > considered to be security bugs (e.g. DoS category), it would be
> > terrible to
> > release every week a new compiler because of the "security" issues.
> 
> Indeed.  But my answer would be that such things are not DoS issues. 
> DoS means that an external input, over which you have little control,
> is impairing service.  In the case of a compiler, if feeding it bad
> source code X.c causes it to crash, the answer is "well, then don't
> do that".

Agreed.

I'm not sure how to "wordsmith" this, but it seems like the sources and
options on the *host* are assumed to be trusted, and that the act of
*compiling* source on the host requires trusting them, just like the
act of executing the compiled code on the target does.  Though users
may be more familiar with sandboxing the target than the host.

We should spell this out further for libgccjit: libgccjit allows for
ahead-of-time and JIT compilation of sources - but it assumes that
those sources (and the compilation options) are trusted.

[Adding Andrea Corallo to the addressees]

For example, Emacs is using libgccjit to do ahead-of-time compilation
of Emacs bytecode.  I'm assuming that Emacs is assuming that its
bytecode is trusted, and that there isn't any attempt by Emacs to
sandbox the Emacs Lisp being processed.

However, consider a situation in which someone attempted to, say, embed
libgccjit inside a web browser to generate machine code from
JavaScript, where the JavaScript is potentially controlled by an
attacker.  I think we want to explicitly say that that if you're going
to do that, you need to put some other layer of defense in, so that
you're not blithely accepting the inputs to the compilation (sources
and options) from a potentially hostile source, where a crafted input
sources could potentially hit an ICE in the compiler and thus crash the
web browser.

Dave



Re: [PATCH] Add -Wdisabled-optimization warning for not optimizing sibling calls

2023-08-05 Thread David Malcolm via Gcc-patches
On Sun, 2023-08-06 at 02:28 +0530, Prathamesh Kulkarni via Gcc-patches
wrote:
> On Fri, 4 Aug 2023 at 23:28, Bradley Lucier via Gcc-patches
>  wrote:

Hi Bradley and Prathamesh...

> > 
> > The patch at the end adds a warning when a tail/sibling call cannot
> > be
> > optimized for various reasons.
> > 
> > I built and tested GCC with and without the patch with
> > configuration
> > 
> > Configured with: ../../gcc-mainline/configure --enable-languages=c
> > --disable-multilib --prefix=/pkgs/gcc-mainline --disable-werror
> > 
> > There were some changes in the test results, but I can't say that
> > they
> > look substantive:
> > 

[...]

> > 
> > to test the new warning.  The warnings are of the form, e.g.,
> > 
> > ../../../gcc-mainline/gcc/tree-vect-stmts.cc:11990:44: warning:
> > cannot
> > apply sibling-call optimization: callee required more stack slots
> > than
> > the caller [-Wdisabled-optimization]
> > 
> > These are the number of times this warning was triggered building
> > stage1:
> > 
> > grep warning: build.log | grep sibling | sed 's/^.*://' | sort |
> > uniq -c
> >  259  callee required more stack slots than the caller
> > [-Wdisabled-optimization]
> >   43  callee returns a structure [-Wdisabled-optimization]
> > 
> > If this patch is OK, someone else will need to commit it for me.
> > 
> > Brad
> > 
> > gcc/Changelog
> > 
> >     * calls.cc (maybe_complain_about_tail_call) Add warning
> > when
> >     tail or sibling call cannot be optimized.
> Hi Bradley,
> I don't have comments on the patch, but a new warning will also
> require a corresponding entry in doc/invoke.texi.

To nitpick, this isn't a new warning; the patch is extending an
existing warning.  Looking at the existing entry for that warning I
see:

@opindex Wdisabled-optimization
@opindex Wno-disabled-optimization
@item -Wdisabled-optimization
Warn if a requested optimization pass is disabled.  This warning does
not generally indicate that there is anything wrong with your code; it
merely indicates that GCC's optimizers are unable to handle the code
effectively.  Often, the problem is that your code is too big or too
complex; GCC refuses to optimize programs when the optimization
itself is likely to take inordinate amounts of time.

...which arguably fits the new functionality.  Though I don't know how
the optimizer maintainers feel about it.  Also, as we add more stuff to
this warning, would users need more fine-grained control over which
things for the optimizer to complain about?  I'm not sure.

> 
> Thanks,
> Prathamesh
> > 
> > diff --git a/gcc/calls.cc b/gcc/calls.cc
> > index 1f3a6d5c450..b95c876fda8 100644
> > --- a/gcc/calls.cc
> > +++ b/gcc/calls.cc
> > @@ -1242,10 +1242,12 @@ void
> >   maybe_complain_about_tail_call (tree call_expr, const char
> > *reason)
> >   {
> >     gcc_assert (TREE_CODE (call_expr) == CALL_EXPR);
> > -  if (!CALL_EXPR_MUST_TAIL_CALL (call_expr))
> > -    return;
> > -
> > -  error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s",
> > reason);
> > +  if (CALL_EXPR_MUST_TAIL_CALL (call_expr))
> > +    error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s",
> > reason);

The existing code use error_at, passing it the location of the
call_expr...

> > +  else if (flag_optimize_sibling_calls)
> > +    warning (OPT_Wdisabled_optimization,
> > + "cannot apply sibling-call optimization: %s",
> > reason);

...but the warning branch uses "warning", which implicitly uses the
input_location global variable.  Is the warning reported at the correct
place?  It's better to use warning_at and pass it the location at which
the warning should be emitted.

The patch doesn't add any test cases, but I imagine any such cases
would be very target-dependent (did I add any to my libgccjit version
of this way back when?)

Thanks for the patch; hope this is constructive
Dave



> > +  return;
> >   }
> > 
> >   /* Fill in ARGS_SIZE and ARGS array based on the parameters found
> > in
> > 
> > 
> 



[pushed] analyzer: handle function attribute "alloc_size" [PR110426]

2023-08-04 Thread David Malcolm via Gcc-patches
This patch makes -fanalyzer make use of the function attribute
"alloc_size", allowing -fanalyzer to emit -Wanalyzer-allocation-size,
-Wanalyzer-out-of-bounds, and -Wanalyzer-tainted-allocation-size on
execution paths involving allocations using such functions.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3001-g021077b94741c9.

gcc/analyzer/ChangeLog:
PR analyzer/110426
* bounds-checking.cc (region_model::check_region_bounds): Handle
symbolic base regions.
* call-details.cc: Include "stringpool.h" and "attribs.h".
(call_details::lookup_function_attribute): New function.
* call-details.h (call_details::lookup_function_attribute): New
function decl.
* region-model-manager.cc
(region_model_manager::maybe_fold_binop): Add reference to
PR analyzer/110902.
* region-model-reachability.cc (reachable_regions::handle_sval):
Add symbolic regions for pointers that are conjured svalues for
the LHS of a stmt.
* region-model.cc (region_model::canonicalize): Purge dynamic
extents for regions that aren't referenced.
(get_result_size_in_bytes): New function.
(region_model::on_call_pre): Use get_result_size_in_bytes and
potentially set the dynamic extents of the region pointed to by
the return value.
(region_model::deref_rvalue): Add param "add_nonnull_constraint"
and use it to conditionalize adding the constraint.
(pending_diagnostic_subclass::dubious_allocation_size): Add "stmt"
param to both ctors and use it to initialize new "m_stmt" field.
(pending_diagnostic_subclass::operator==): Use m_stmt; don't use
m_lhs or m_rhs.
(pending_diagnostic_subclass::m_stmt): New field.
(region_model::check_region_size): Generalize to any kind of
pointer svalue by using deref_rvalue rather than checking for
region_svalue.  Pass stmt to dubious_allocation_size ctor.
* region-model.h (region_model::deref_rvalue): Add param
"add_nonnull_constraint".
* svalue.cc (conjured_svalue::lhs_value_p): New function.
* svalue.h (conjured_svalue::lhs_value_p): New decl.

gcc/testsuite/ChangeLog:
PR analyzer/110426
* gcc.dg/analyzer/allocation-size-1.c: Update expected message to
reflect consolidation of size and assignment into a single event.
* gcc.dg/analyzer/allocation-size-2.c: Likewise.
* gcc.dg/analyzer/allocation-size-3.c: Likewise.
* gcc.dg/analyzer/allocation-size-4.c: Likewise.
* gcc.dg/analyzer/allocation-size-multiline-1.c: Likewise.
* gcc.dg/analyzer/allocation-size-multiline-2.c: Likewise.
* gcc.dg/analyzer/allocation-size-multiline-3.c: Likewise.
* gcc.dg/analyzer/attr-alloc_size-1.c: New test.
* gcc.dg/analyzer/attr-alloc_size-2.c: New test.
* gcc.dg/analyzer/attr-alloc_size-3.c: New test.
* gcc.dg/analyzer/explode-4.c: New test.
* gcc.dg/analyzer/taint-size-1.c: Add test coverage for
__attribute__ alloc_size.
---
 gcc/analyzer/bounds-checking.cc   |  12 +-
 gcc/analyzer/call-details.cc  |  21 +++
 gcc/analyzer/call-details.h   |   2 +
 gcc/analyzer/region-model-manager.cc  |   2 +
 gcc/analyzer/region-model-reachability.cc |  21 +++
 gcc/analyzer/region-model.cc  | 109 ++--
 gcc/analyzer/region-model.h   |   3 +-
 gcc/analyzer/svalue.cc|  11 ++
 gcc/analyzer/svalue.h |   1 +
 .../gcc.dg/analyzer/allocation-size-1.c   |   3 +-
 .../gcc.dg/analyzer/allocation-size-2.c   |   3 +-
 .../gcc.dg/analyzer/allocation-size-3.c   |   9 +-
 .../gcc.dg/analyzer/allocation-size-4.c   |   6 +-
 .../analyzer/allocation-size-multiline-1.c|  12 +-
 .../analyzer/allocation-size-multiline-2.c|  15 +-
 .../analyzer/allocation-size-multiline-3.c|  10 +-
 .../gcc.dg/analyzer/attr-alloc_size-1.c   |  81 +
 .../gcc.dg/analyzer/attr-alloc_size-2.c   |  19 +++
 .../gcc.dg/analyzer/attr-alloc_size-3.c   |  14 ++
 gcc/testsuite/gcc.dg/analyzer/explode-4.c | 157 ++
 gcc/testsuite/gcc.dg/analyzer/taint-size-1.c  |  10 ++
 21 files changed, 458 insertions(+), 63 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/attr-alloc_size-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/attr-alloc_size-2.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/attr-alloc_size-3.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/explode-4.c

diff --git a/gcc/analyzer/bounds-checking.cc b/gcc/analyzer/bounds-checking.cc
index 5e8de9a7aa5..f49cf7cf2af 100644
--- a/gcc/analyzer/bounds-checking.cc
+++ b/gcc/analyzer/bounds-checking.cc
@@ -981,12 +981,6 @@ region_model::check_region_bounds (const region *reg,
   region_offset reg_offset = 

[pushed] analyzer: fix some svalue::dump_to_pp implementations

2023-08-04 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3000-g187b213ddbe7ea.

gcc/analyzer/ChangeLog:
* svalue.cc (region_svalue::dump_to_pp): Support NULL type.
(constant_svalue::dump_to_pp): Likewise.
(initial_svalue::dump_to_pp): Likewise.
(conjured_svalue::dump_to_pp): Likewise.  Fix missing print of the
type.
---
 gcc/analyzer/svalue.cc | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc
index 4395018dbc3..5d5c80f88c6 100644
--- a/gcc/analyzer/svalue.cc
+++ b/gcc/analyzer/svalue.cc
@@ -714,8 +714,11 @@ region_svalue::dump_to_pp (pretty_printer *pp, bool 
simple) const
   else
 {
   pp_string (pp, "region_svalue(");
-  print_quoted_type (pp, get_type ());
-  pp_string (pp, ", ");
+  if (get_type ())
+   {
+ print_quoted_type (pp, get_type ());
+ pp_string (pp, ", ");
+   }
   m_reg->dump_to_pp (pp, simple);
   pp_string (pp, ")");
 }
@@ -811,8 +814,11 @@ constant_svalue::dump_to_pp (pretty_printer *pp, bool 
simple) const
   else
 {
   pp_string (pp, "constant_svalue(");
-  print_quoted_type (pp, get_type ());
-  pp_string (pp, ", ");
+  if (get_type ())
+   {
+ print_quoted_type (pp, get_type ());
+ pp_string (pp, ", ");
+   }
   dump_tree (pp, m_cst_expr);
   pp_string (pp, ")");
 }
@@ -1029,8 +1035,11 @@ initial_svalue::dump_to_pp (pretty_printer *pp, bool 
simple) const
   else
 {
   pp_string (pp, "initial_svalue(");
-  print_quoted_type (pp, get_type ());
-  pp_string (pp, ", ");
+  if (get_type ())
+   {
+ print_quoted_type (pp, get_type ());
+ pp_string (pp, ", ");
+   }
   m_reg->dump_to_pp (pp, simple);
   pp_string (pp, ")");
 }
@@ -1910,7 +1919,11 @@ conjured_svalue::dump_to_pp (pretty_printer *pp, bool 
simple) const
   else
 {
   pp_string (pp, "conjured_svalue (");
-  pp_string (pp, ", ");
+  if (get_type ())
+   {
+ print_quoted_type (pp, get_type ());
+ pp_string (pp, ", ");
+   }
   pp_gimple_stmt_1 (pp, m_stmt, 0, (dump_flags_t)0);
   pp_string (pp, ", ");
   m_id_reg->dump_to_pp (pp, simple);
-- 
2.26.3



Re: [PATCH v2] analyzer: stash values for CPython plugin [PR107646]

2023-08-03 Thread David Malcolm via Gcc-patches
On Thu, 2023-08-03 at 11:28 -0400, Eric Feng wrote:
> On Wed, Aug 2, 2023 at 5:09 PM David Malcolm 
> wrote:
> > 
> > On Wed, 2023-08-02 at 14:46 -0400, Eric Feng wrote:
> > 

[...snip...]

> > 
> > >  Otherwise, please let me know if I should request write
> > > access first (the GettingStarted page suggested requesting
> > > someone
> > > commit the patch for the first few patches before requesting
> > > write
> > > access).
> > 
> > Please go ahead and request write access now; we should have done
> > this
> > in the "community bonding" phase of GSoC; sorry for not catching
> > this.
> Sounds good.

FWIW once you have an @gcc.gnu.org account, I'd like to set you as the
"assignee" of PR107646 in bugzilla.

[...snip...]

Dave



Re: [PATCH] mid-end: Use integral time intervals in timevar.cc

2023-08-03 Thread David Malcolm via Gcc-patches
On Thu, 2023-08-03 at 15:54 +0100, Matthew Malcomson wrote:
> On 8/3/23 15:09, David Malcolm wrote:
> > 
> > Hi Matthew.  I recently touched the timevar code (in r14-2881-
> > g75d623946d4b6e) to add support for serializing the timevar data in
> > JSON form as part of the SARIF output (PR analyzer/109361).
> > 
> > Looking at your patch, it looks like the baseline for the patch
> > seems
> > to predate r14-2881-g75d623946d4b6e.
> > 
> > I don't have a strong opinion on the implementation choices in your
> > patch, but please can you rebase to beyond my recent change and
> > make
> > sure that the SARIF serialization still works with your patch.
> > 
> > Specifically, please try compiling with
> >    -ftime-report -fdiagnostics-format=sarif-file
> > and have a look at the generated .sarif file, e.g. via
> >    python -m json.tool foo.c.sarif
> > which will pretty-print the JSON to stdout.
> > 
> > Currently I'm writing out the values as floating-point seconds, and
> > AFAIK my analyzer integration testsuite [1] is the only consumer of
> > this data.
> 
> Hi David,
> 
> Thanks for the heads-up.  Will update the patch.
> 
> I read your last paragraph as suggesting that you'd be open to
> changing 
> the format.  Is that correct?

I suppose, but I'd prefer to keep the existing format.

> 
> I would initially assume that writing out the time as floating-point 
> seconds would still be most convenient for your use since it looks to
> be 
> like something to be presented to a person.

Yes.  I may be biased in that with -fanalyzer the times tend to be
measured in seconds rather than fractions of seconds, alas.

> 
> However, since I don't know much about the intended uses of SARIF in 
> general I figured I should double-check -- does that choice to remain
> printing out floating-point seconds seem best to you?

I'd prefer to keep the JSON output as floating-point seconds, if that's
not too much of a pain.

Dave



> 
> > 
> > [...snip...]
> > 
> > Thanks
> > Dave
> > [1]
> > https://github.com/davidmalcolm/gcc-analyzer-integration-tests/issues/5
> > 
> 



[committed] testsuite, analyzer: add test case [PR108171]

2023-08-03 Thread David Malcolm via Gcc-patches
The ICE in PR analyzer/108171 appears to be a dup of the recently fixed
PR analyzer/110882 and is likewise fixed by it; adding this test case.

Successfully regrtested on x86_64-pc-linux-gnu.

Pushed to trunk as r14-2957-gf80efa49b7a163.

gcc/testsuite/ChangeLog:
PR analyzer/108171
* gcc.dg/analyzer/pr108171.c: New test.
---
 gcc/testsuite/gcc.dg/analyzer/pr108171.c | 31 
 1 file changed, 31 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr108171.c

diff --git a/gcc/testsuite/gcc.dg/analyzer/pr108171.c 
b/gcc/testsuite/gcc.dg/analyzer/pr108171.c
new file mode 100644
index 000..5f7b9fd7875
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr108171.c
@@ -0,0 +1,31 @@
+struct nl_context {
+  void *cmd_private;
+};
+
+struct sfeatures_context {
+  int a;
+  int req_mask[0];
+};
+
+int set_sf_req_mask_idx;
+
+extern void fill_legacy_flag();
+
+void
+fill_sfeatures_bitmap(struct nl_context *nlctx) {
+  while (nlctx) {
+fill_legacy_flag();
+struct nl_context __trans_tmp_1 = *nlctx;
+struct sfeatures_context *sfctx = __trans_tmp_1.cmd_private;
+sfctx->req_mask[set_sf_req_mask_idx] |= 1;
+  }
+}
+
+void
+nl_sfeatures() {
+  struct nl_context nlctx;
+  struct sfeatures_context *sfctx;
+  nlctx.cmd_private = 
+  sfctx = 0;
+  fill_sfeatures_bitmap();
+}
-- 
2.26.3



Re: [PATCH] mid-end: Use integral time intervals in timevar.cc

2023-08-03 Thread David Malcolm via Gcc-patches
On Thu, 2023-08-03 at 14:38 +0100, Matthew Malcomson via Gcc-patches
wrote:
> > 
> > I think this is undesriable.  With fused you mean we use FMA?
> > I think you could use -ffp-contract=off for the TU instead.
> > 
> > Note you can't use __attribute__((noinline)) literally since the
> > host compiler might not support this.
> > 
> > Richard.
> > 
> 
> 
> Trying to make the timevar store integral time intervals.
> Hope this is acceptable -- I had originally planned to use
> `-ffp-contract` as agreed until I saw the email mentioning the old
> x86
> bug in the same area which was not to do with floating point
> contraction
> of operations (PR 99903) and figured it would be better to try and
> solve
> both at the same time while making things in general a bit more
> robust.
> _
> ___
> 
> 
> On some AArch64 bootstrapped builds, we were getting a flaky test
> because the floating point operations in `get_time` were being fused
> with the floating point operations in `timevar_accumulate`.
> 
> This meant that the rounding behaviour of our multiplication with
> `ticks_to_msec` was different when used in `timer::start` and when
> performed in `timer::stop`.  These extra inaccuracies led to the
> testcase `g++.dg/ext/timevar1.C` being flaky on some hardware.
> 
> --
> Avoiding the inlining which was agreed to be undesirable.  Three
> alternative approaches:
> 1) Use `-ffp-contract=on` to avoid this particular optimisation.
> 2) Adjusting the code so that the "tolerance" is always of the order
> of
>    a "tick".
> 3) Recording times and elapsed differences in integral values.
>    - Could be in terms of a standard measurement (e.g. nanoseconds or
>  microseconds).
>    - Could be in terms of whatever integral value ("ticks" /
>  seconds / "clock ticks") is returned from the
> syscall
>  chosen at configure time.
> 
> While `-ffp-contract=on` removes the problem that I bumped into,
> there
> has been a similar bug on x86 that was to do with a different
> floating
> point problem that also happens after `get_time` and
> `timevar_accumulate` both being inlined into the same function. 
> Hence
> it seems worth choosing a different approach.
> 
> Of the two other solutions, recording measurements in integral values
> seems the most robust against slightly "off" measurements being
> presented to the user -- even though it could avoid the ICE that
> creates
> a flaky test.
> 
> I considered storing time in whatever units our syscall returns and
> normalising them at the time we print out rather than normalising
> them
> to nanoseconds at the point we record our "current time".  The logic
> being that normalisation could have some rounding affect (e.g. if
> TICKS_PER_SECOND is 3) that would be taken into account in
> calculations.
> 
> I decided against it in order to give the values recorded in
> `timevar_time_def` some interpretive value so it's easier to read the
> code.  Compared to the small rounding that would represent a tiny
> amount
> of time and AIUI can not trigger the same kind of ICE's as we are
> attempting to fix, said interpretive value seems more valuable.
> 
> Recording time in microseconds seemed reasonable since all obvious
> values for ticks and `getrusage` are at microsecond granularity or
> less
> precise.  That said, since TICKS_PER_SECOND and CLOCKS_PER_SEC are
> both
> variables given to use by the host system I was not sure of that
> enough
> to make this decision.
> 
> --
> timer::all_zero is ignoring rows which are inconsequential to the
> user
> and would be printed out as all zeros.  Since upon printing rows we
> convert to the same double value and print out the same precision as
> before, we return true/false based on the same amount of time as
> before.
> 
> timer::print_row casts to a floating point measurement in units of
> seconds as was printed out before.
> 
> timer::validate_phases -- I'm printing out nanoseconds here rather
> than
> floating point seconds since this is an error message for when things
> have "gone wrong" printing out the actual nanoseconds that have been
> recorded seems like the best approach.
> N.b. since we now print out nanoseconds instead of floating point
> value
> the padding requirements are different.  Originally we were padding
> to
> 24 characters and printing 18 decimal places.  This looked odd with
> the
> now visually smaller values getting printed.  I judged 13 characters
> (corresponding to 2 hours) to be a reasonable point at which our
> alignment could start to degrade and this provides a more compact
> output
> for the majority of cases (checked by triggering the error case via
> GDB).
> 
> --
> N.b. I use a literal 10 for "NANOSEC_PER_SEC".  I believe
> this
> would fit in an integer on all hosts that GCC supports, but am not
> certain there are not strange integer sizes we 

[committed] analyzer: fix ICE on zero-sized arrays [PR110882]

2023-08-03 Thread David Malcolm via Gcc-patches
Successfully bootstrapped and regrtested on x86_64-pc-linux-gnu.

Pushed to trunk as r14-2955-gc62f93d1e0383d.

gcc/analyzer/ChangeLog:
PR analyzer/110882
* region.cc (int_size_in_bits): Fail on zero-sized types.

gcc/testsuite/ChangeLog:
PR analyzer/110882
* gcc.dg/analyzer/pr110882.c: New test.
---
 gcc/analyzer/region.cc   |  6 +-
 gcc/testsuite/gcc.dg/analyzer/pr110882.c | 18 ++
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr110882.c

diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc
index 9524739c7a4..730dab3d707 100644
--- a/gcc/analyzer/region.cc
+++ b/gcc/analyzer/region.cc
@@ -742,7 +742,11 @@ int_size_in_bits (const_tree type, bit_size_t *out)
 }
 
   tree sz = TYPE_SIZE (type);
-  if (sz && tree_fits_uhwi_p (sz))
+  if (sz
+  && tree_fits_uhwi_p (sz)
+  /* If the size is zero, then we may have a zero-sized
+array; handle such cases by returning false.  */
+  && !integer_zerop (sz))
 {
   *out = TREE_INT_CST_LOW (sz);
   return true;
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr110882.c 
b/gcc/testsuite/gcc.dg/analyzer/pr110882.c
new file mode 100644
index 000..80027184053
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr110882.c
@@ -0,0 +1,18 @@
+/* { dg-additional-options "-Wno-analyzer-too-complex" } */
+
+struct csv_row {
+  char *columns[0];
+};
+
+void
+parse_csv_line (int n_columns, const char *columns[])
+{
+  for (int n = 0; n < n_columns; n++) {
+  columns[n] = ((void *)0);
+  }
+}
+
+void parse_csv_data (int n_columns, struct csv_row *entry)
+{
+  parse_csv_line(n_columns, (const char **)entry->columns);
+}
-- 
2.26.3



Re: [PATCH v2] analyzer: stash values for CPython plugin [PR107646]

2023-08-02 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-02 at 14:46 -0400, Eric Feng wrote:
> On Wed, Aug 2, 2023 at 1:20 PM Marek Polacek 
> wrote:
> > 
> > On Wed, Aug 02, 2023 at 12:59:28PM -0400, David Malcolm wrote:
> > > On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:
> > > 

[Dropping Joseph and Marek from the CC]

[...snip...]

> 
> 
> Thank you, everyone. I've submitted a new patch with the described
> changes. 

Thanks.

> As I do not yet have write access, could someone please help
> me commit it?

I've pushed the v3 trunk to patch, as r14-2933-gfafe2d18f791c6; you can
see it at [1], so you're now officially a GCC contributor,
congratulation!

FWIW I had to do a little whitespace fixing on the ChangeLog entries
before the server-side hooks.commit-extra-checker would pass, as they
were indented with spaces, rather than tabs, so it complained thusly:

remote: *** The following commit was rejected by your 
hooks.commit-extra-checker script (status: 1)
remote: *** commit: 0a4a2dc7dad1dfe22be0b48fe0d8c50d216c8349
remote: *** ChangeLog format failed:
remote: *** ERR: line should start with a tab: "PR analyzer/107646"
remote: *** ERR: line should start with a tab: "* analyzer-language.cc 
(run_callbacks): New function."
remote: *** ERR: line should start with a tab: "
(on_finish_translation_unit): New function."
remote: *** ERR: line should start with a tab: "* analyzer-language.h 
(GCC_ANALYZER_LANGUAGE_H): New include."
remote: *** ERR: line should start with a tab: "(class 
translation_unit): New vfuncs."
remote: *** ERR: line should start with a tab: "PR analyzer/107646"
remote: *** ERR: line should start with a tab: "* c-parser.cc: New 
functions on stashing values for the"
remote: *** ERR: line should start with a tab: "  analyzer."
remote: *** ERR: line should start with a tab: "PR analyzer/107646"
remote: *** ERR: line should start with a tab: "* 
gcc.dg/plugin/plugin.exp: Add new plugin and test."
remote: *** ERR: line should start with a tab: "* 
gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin."
remote: *** ERR: line should start with a tab: "* 
gcc.dg/plugin/cpython-plugin-test-1.c: New test."
remote: *** ERR: PR 107646 in subject but not in changelog: "analyzer: stash 
values for CPython plugin [PR107646]"
remote: *** 
remote: *** Please see: https://gcc.gnu.org/codingconventions.html#ChangeLogs
remote: *** 
remote: error: hook declined to update refs/heads/master
To git+ssh://gcc.gnu.org/git/gcc.git
 ! [remote rejected] master -> master (hook declined)
error: failed to push some refs to 'git+ssh://dmalc...@gcc.gnu.org/git/gcc.git'

...but this was a trivial fix.  You can test that patches are properly
formatted by running:

  ./contrib/gcc-changelog/git_check_commit.py HEAD

locally.


>  Otherwise, please let me know if I should request write
> access first (the GettingStarted page suggested requesting someone
> commit the patch for the first few patches before requesting write
> access).

Please go ahead and request write access now; we should have done this
in the "community bonding" phase of GSoC; sorry for not catching this.

Thanks again for the patch.  How's the followup work?  Are you close to
being able to post one or more of the simpler known_function
subclasses?

Dave

[1] 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fafe2d18f791c6b97b49af7c84b1b5703681c3af



Re: [PATCH v2] analyzer: stash values for CPython plugin [PR107646]

2023-08-02 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:

Hi Eric, thanks for the updated patch.

Overall, looks good to me, although I'd drop the "Exited." from the
"sorry" message (and thus from the dg-message directive), since the
compiler is not exiting, it's just the particular plugin that's giving
up (but let's not hold up the patch with a "bikeshed" discussion on the
precise wording).

If Joseph or Marek approves the C parts of the patch, this will be OK
to push to trunk.

Dave

> Revised:
> -- Fix indentation problems
> -- Add more detail to Changelog
> -- Add new test on handling non-CPython code case
> -- Turn off debugging inform by default
> -- Make on_finish_translation_unit() static
> -- Remove superfluous null checks in init_py_structs()
> 
> Changes have been bootstrapped and tested against trunk on aarch64-
> unknown-linux-gnu.
> 
> ---
> This patch adds a hook to the end of ana::on_finish_translation_unit
> which calls relevant stashing-related callbacks registered during
> plugin
> initialization. This feature is used to stash named types and global
> variables for a CPython analyzer plugin [PR107646].
> 
> gcc/analyzer/ChangeLog:
> PR analyzer/107646
>     * analyzer-language.cc (run_callbacks): New function.
>     (on_finish_translation_unit): New function.
>     * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
>     (class translation_unit): New vfuncs.
> 
> gcc/c/ChangeLog:
> PR analyzer/107646
>     * c-parser.cc: New functions on stashing values for the
>   analyzer.
> 
> gcc/testsuite/ChangeLog:
> PR analyzer/107646
>     * gcc.dg/plugin/plugin.exp: Add new plugin and test.
>     * gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin.
>     * gcc.dg/plugin/cpython-plugin-test-1.c: New test.
> 
> Signed-off-by: Eric Feng 
> ---
>  gcc/analyzer/analyzer-language.cc |  22 ++
>  gcc/analyzer/analyzer-language.h  |   9 +
>  gcc/c/c-parser.cc |  26 ++
>  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 230
> ++
>  .../gcc.dg/plugin/cpython-plugin-test-1.c |   8 +
>  gcc/testsuite/gcc.dg/plugin/plugin.exp    |   2 +
>  6 files changed, 297 insertions(+)
>  create mode 100644
> gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
>  create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-
> 1.c
> 
> diff --git a/gcc/analyzer/analyzer-language.cc
> b/gcc/analyzer/analyzer-language.cc
> index 2c8910906ee..85400288a93 100644
> --- a/gcc/analyzer/analyzer-language.cc
> +++ b/gcc/analyzer/analyzer-language.cc
> @@ -35,6 +35,26 @@ static GTY (()) hash_map 
> *analyzer_stashed_constants;
>  #if ENABLE_ANALYZER
>  
>  namespace ana {
> +static vec
> +    *finish_translation_unit_callbacks;
> +
> +void
> +register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback)
> +{
> +  if (!finish_translation_unit_callbacks)
> +    vec_alloc (finish_translation_unit_callbacks, 1);
> +  finish_translation_unit_callbacks->safe_push (callback);
> +}
> +
> +static void
> +run_callbacks (logger *logger, const translation_unit )
> +{
> +  for (auto const  : finish_translation_unit_callbacks)
> +    {
> +  cb (logger, tu);
> +    }
> +}
>  
>  /* Call into TU to try to find a value for NAME.
>     If found, stash its value within analyzer_stashed_constants.  */
> @@ -102,6 +122,8 @@ on_finish_translation_unit (const
> translation_unit )
>  the_logger.set_logger (new logger (logfile, 0, 0,
>    *global_dc->printer));
>    stash_named_constants (the_logger.get_logger (), tu);
> +
> +  run_callbacks (the_logger.get_logger (), tu);
>  }
>  
>  /* Lookup NAME in the named constants stashed when the frontend TU
> finished.
> diff --git a/gcc/analyzer/analyzer-language.h
> b/gcc/analyzer/analyzer-language.h
> index 00f85aba041..8deea52d627 100644
> --- a/gcc/analyzer/analyzer-language.h
> +++ b/gcc/analyzer/analyzer-language.h
> @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3.  If not see
>  #ifndef GCC_ANALYZER_LANGUAGE_H
>  #define GCC_ANALYZER_LANGUAGE_H
>  
> +#include "analyzer/analyzer-logging.h"
> +
>  #if ENABLE_ANALYZER
>  
>  namespace ana {
> @@ -35,8 +37,15 @@ class translation_unit
>   have been seen).  If it is defined and an integer (e.g. either
> as a
>   macro or enum), return the INTEGER_CST value, otherwise return
> NULL.  */
>    virtual tree lookup_constant_by_id (tree id) const = 0;
> +  virtual tree lookup_type_by_id (tree id) const = 0;
> +  virtual tree lookup_global_var_by_id (tree id) const = 0;
>  };
>  
> +typedef void (*finish_translation_unit_callback)
> +   (logger *, const translation_unit &);
> +void register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback);
> +
>  /* Analyzer hook for frontends to call at the end of the TU.  */
>  
>  void on_finish_translation_unit (const translation_unit );
> diff --git 

Re: [PATCH] analyzer: stash values for CPython plugin [PR107646]

2023-08-01 Thread David Malcolm via Gcc-patches
On Tue, 2023-08-01 at 09:52 -0400, Eric Feng wrote:
> Hi all,
> 
> This patch adds a hook to the end of ana::on_finish_translation_unit
> which calls relevant stashing-related callbacks registered during
> plugin
> initialization. This feature is used to stash named types and global
> variables for a CPython analyzer plugin [PR107646].
> 
> Bootstrapped and tested on aarch64-unknown-linux-gnu. Does it look
> okay?

Hi Eric, thanks for the patch.

The patch touches the C frontend, so those parts would need approval
from the C FE maintainers/reviewers; I've CCed them.

Overall, I like the patch, but it's not ready for trunk yet; various
comments inline below...

> 
> ---
> 
> gcc/analyzer/ChangeLog:

You could add: PR analyzer/107646 to these ChangeLog entries; have a
look at how other ChangeLog entries refer to such bugzilla entries.

> 
>     * analyzer-language.cc (run_callbacks): New function.
>     (on_finish_translation_unit): New function.
>     * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
>     (class translation_unit): New vfuncs.
> 
> gcc/c/ChangeLog:
> 
>     * c-parser.cc: New functions.

I think this ChangeLog entry needs more detail.
> 
> gcc/testsuite/ChangeLog:
> 
>     * gcc.dg/plugin/analyzer_cpython_plugin.c: New test.
> 
> Signed-off-by: Eric Feng 
> ---
>  gcc/analyzer/analyzer-language.cc |  22 ++
>  gcc/analyzer/analyzer-language.h  |   9 +
>  gcc/c/c-parser.cc |  26 ++
>  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 224
> ++
>  4 files changed, 281 insertions(+)
>  create mode 100644
> gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> 
> diff --git a/gcc/analyzer/analyzer-language.cc
> b/gcc/analyzer/analyzer-language.cc
> index 2c8910906ee..fc41b9c17b8 100644
> --- a/gcc/analyzer/analyzer-language.cc
> +++ b/gcc/analyzer/analyzer-language.cc
> @@ -35,6 +35,26 @@ static GTY (()) hash_map 
> *analyzer_stashed_constants;
>  #if ENABLE_ANALYZER
> 
>  namespace ana {
> +static vec
> +    *finish_translation_unit_callbacks;
> +
> +void
> +register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback)
> +{
> +  if (!finish_translation_unit_callbacks)
> +    vec_alloc (finish_translation_unit_callbacks, 1);
> +  finish_translation_unit_callbacks->safe_push (callback);
> +}
> +
> +void
> +run_callbacks (logger *logger, const translation_unit )

This function could be "static" since it's not needed outside of
analyzer-language.cc

> +{
> +  for (auto const  : finish_translation_unit_callbacks)
> +    {
> +  cb (logger, tu);
> +    }
> +}
> 
>  /* Call into TU to try to find a value for NAME.
>     If found, stash its value within analyzer_stashed_constants.  */
> @@ -102,6 +122,8 @@ on_finish_translation_unit (const
> translation_unit )
>  the_logger.set_logger (new logger (logfile, 0, 0,
>  *global_dc->printer));
>    stash_named_constants (the_logger.get_logger (), tu);
> +
> +  run_callbacks (the_logger.get_logger (), tu);
>  }
> 
>  /* Lookup NAME in the named constants stashed when the frontend TU
> finished.
> diff --git a/gcc/analyzer/analyzer-language.h
> b/gcc/analyzer/analyzer-language.h
> index 00f85aba041..8deea52d627 100644
> --- a/gcc/analyzer/analyzer-language.h
> +++ b/gcc/analyzer/analyzer-language.h
> @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3.  If not see
>  #ifndef GCC_ANALYZER_LANGUAGE_H
>  #define GCC_ANALYZER_LANGUAGE_H
> 
> +#include "analyzer/analyzer-logging.h"
> +
>  #if ENABLE_ANALYZER
> 
>  namespace ana {
> @@ -35,8 +37,15 @@ class translation_unit
>   have been seen).  If it is defined and an integer (e.g. either
> as a
>   macro or enum), return the INTEGER_CST value, otherwise return
> NULL.  */
>    virtual tree lookup_constant_by_id (tree id) const = 0;
> +  virtual tree lookup_type_by_id (tree id) const = 0;
> +  virtual tree lookup_global_var_by_id (tree id) const = 0;
>  };
> 
> +typedef void (*finish_translation_unit_callback)
> +   (logger *, const translation_unit &);
> +void register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback);
> +
>  /* Analyzer hook for frontends to call at the end of the TU.  */
> 
>  void on_finish_translation_unit (const translation_unit );
> diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> index 80920b31f83..f0ee55e416b 100644
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -1695,6 +1695,32 @@ public:
>  return NULL_TREE;
>    }
> 
> +  tree
> +  lookup_type_by_id (tree id) const final override
> +  {
> +    if (tree type_decl = lookup_name (id))
> +  {
> + if (TREE_CODE (type_decl) == TYPE_DECL)
> + {
> + tree record_type = TREE_TYPE (type_decl);
> + if (TREE_CODE (record_type) == RECORD_TYPE)
> + return record_type;
> + }

It looks like something's wrong with the indentation here, but the idea
seems OK to me (but needs C FE reviewer approval).

> +  }
> +
> +    return NULL_TREE;

Re: [PATCH] analyzer: Add support of placement new and improved operator new [PR105948]

2023-07-31 Thread David Malcolm via Gcc-patches
On Mon, 2023-07-31 at 13:46 +0200, Benjamin Priour wrote:
> Hi Dave,
> 
> On Fri, Jul 21, 2023 at 10:10 PM David Malcolm 
> wrote:

[...snip...]

> > 
> > I see that we have test coverage for:
> >   noexcept-new.C: -fno-exceptions with new vs nothrow-new
> > whereas:
> >   new-2.C has (implicitly) -fexceptions with new
> > 
> > It seems that of the four combinations for:
> >   - exceptions enabled or disabled
> > and:
> >   - throwing versus non-throwing new
> > this is covering three of the cases but is missing the case of
> > nothrow-
> > new when exceptions are enabled.
> > Presumably new-2.C should gain test coverage for this case.  Or am
> > I
> > missing something here?  Am I right in thinking that it's possible
> > for
> > the user to use nothrow new when exceptions are enabled to get a
> > new
> > that can fail and return nullptr?  Or is that not possible?
> > 
> > 
> Thanks a lot for spotting that, the new test pointed out an issue
> with the
> detection of nothrow.
> It has been fixed and now both test cases behave similarly.
> However, this highlighted a faulty test case I had written.
> 
> int* y = new(std::nothrow) int();
> int z = *y + 2; /* { dg-warning "dereference of NULL 'y'" } */
> /* { dg-warning "use of uninitialized value '\\*y'" "" { xfail *-*-*
> } .-1
> } */ // (#) should be a bogus
> delete y;
> 
> The test labelled (#) is wrong and should be a bogus instead.

Am I right in thinking that by this you mean that with the patch, the
analyzer complains about "use of uninitialized value '*y'" ? (which
would be an incorrect warning)

> If "y" is null then the allocation failed and dereferencing "y" will
> cause
> a segfault, not a "use-of-uninitialized-value".
> Thus we should stick to 'dereference of NULL 'y'" only.
> If "y" is non-null then the allocation succeeded and "*y" is
> initialized
> since we are calling a default initialization with the empty
> parenthesis.

I *think* it's possible to have the region_model have y pointing to a
heap_allocated_region of sizeof(int) size that's been initialized, but
still have the malloc state machine part of the program_state say that
the pointer is maybe-null.

What does the gimple look like and what does the program_state look
like after the assignment to y?

> 
> This led me to consider having "null-dereference" supersedes
> "use-of-uninitialized-value", but
> new PR 110830 made me reexamine it.
> 
> I believe fixing PR 110830 is thus required before submitting this
> patch,
> or we would have some extra irrelevant warnings.

How bad would the problem be?  PR 110830 looks a little involved, so is
there a way to get the current patch in without dragging that extra
complexity in?


[...snip...]

Thanks
Dave



Re: [PATCH v3 0/4] diagnostics: libcpp: Overhaul locations for _Pragma tokens

2023-07-29 Thread David Malcolm via Gcc-patches
On Sat, 2023-07-29 at 10:27 -0400, Lewis Hyatt wrote:
> On Fri, Jul 28, 2023 at 6:22 PM David Malcolm 
> wrote:
> > 
> > On Fri, 2023-07-21 at 19:08 -0400, Lewis Hyatt wrote:
> > > Hello-
> > > 
> > > This is an update to the v2 patch series last sent in January:
> > > https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609473.html
> > > 
> > > While I did not receive any feedback on the v2 patches yet, they
> > > did
> > > need some
> > > rebasing on top of other recent commits to input.cc, so I thought
> > > it
> > > would be
> > > helpful to send them again now. The patches have not otherwise
> > > changed from
> > > v2, and the above-linked message explains how all the patches fit
> > > in
> > > with the
> > > original v1 series sent last November.
> > > 
> > > Dave, I would appreciate it very much if you could please let me
> > > know
> > > what you
> > > think of this approach? I feel like the diagnostics we currently
> > > output for _Pragmas are worth improving. As a reminder, say for
> > > this
> > > example:
> > > 
> > > =
> > >  #define S "GCC diagnostic ignored \"oops"
> > >  _Pragma(S)
> > > =
> > > 
> > > We currently output:
> > > 
> > > =
> > > file.cpp:2:24: warning: missing terminating " character
> > >     2 | _Pragma(S)
> > >   |    ^
> > > =
> > > 
> > > While after these patches, we would output:
> > > 
> > > ==
> > > :1:24: warning: missing terminating " character
> > >     1 | GCC diagnostic ignored "oops
> > >   |    ^
> > > file.cpp:2:1: note: in <_Pragma directive>
> > >     2 | _Pragma(S)
> > >   | ^~~
> > > ==
> > > 
> > > Thanks!
> > 
> > Hi Lewis; sorry for not responding to the v2 patches.
> > 
> > I've started looking at the v3 patches in detail, but I have some
> > high-
> > level questions about memory usage:
> > 
> > Am I right in thinking that the effect of this patch is that for
> > every
> > _Pragma in the source we will create a new line_map_ordinary, and a
> > new
> > buffer for the stringified content of that _Pragma, and that these
> > allocations will persist for the rest of the compilation?  (plus a
> > little extra allocation within the "location_t" space from 0 to
> > 0x7fff).
> > 
> > It sounds like this will probably be a rounding error that won't be
> > noticable in profiling, but did you attempt any such measurement of
> > the
> > memory usage before/after this patch on some real-world projects?
> > 
> > Thanks
> > Dave
> > 
> 
> Thanks for looking at the patches, I appreciate it whenever you have
> time to get to them.
> 
> This is a fair point about the memory usage, basically it means that
> each instance of a _Pragma has comparable memory footprint to a macro
> definition. (In addition to the overheads you mentioned, it also
> creates a macro map to generate a virtual location for the tokens, so
> that it's able to output the "in expansion of _Pragma" note. That
> part
> can be disabled with -ftrack-macro-expansion=0 at least.)
> 
> I had the sense that _Pragma isn't used often enough for that to be a
> problem, but agreed it is worth checking. (I really hope this memory
> usage isn't an issue since there are also numerous PRs complaining
> about 32-bit limitations in location tracking, that make it tempting
> to explore 64-bit line maps or some other option someday too.)
> 
> I tried one thing now, wxWidgets uses a lot of diagnostic pragmas
> wrapped up inside macros that use _Pragma. (See
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578). The testsuite
> contains a file allheaders.cpp which includes the whole library, so I
> tried compiling this into a pch, which I believe measures the entire
> memory footprint including the ordinary and macro line maps and the
> _Pragma strings. The resulting PCH sizes were:
> 
> 279000173 bytes before the changes
> 279491345 bytes after the changes
> 
> So 0.1% bigger. Happy to check other projects too, do you have any
> standard gotos? Maybe firefox or something I take it.

Thanks for doing that test; I think that slight increase on a heavy
user of _Pragma is acceptable.
> 
> I see your other response on patch #1, I am thinking about that and
> will reply later. Thanks again!

Thanks.  Hope that my patch #1 response makes sense and that I'm not
missing something about the way this works.

Dave



Re: [PATCH v3 1/4] diagnostics: libcpp: Add LC_GEN linemaps to support in-memory buffers

2023-07-28 Thread David Malcolm via Gcc-patches
On Fri, 2023-07-21 at 19:08 -0400, Lewis Hyatt wrote:
> Add a new linemap reason LC_GEN which enables encoding the location
> of data
> that was generated during compilation and does not appear in any
> source file.
> There could be many use cases, such as, for instance, referring to
> the content
> of builtin macros (not yet implemented, but an easy lift after this
> one.) The
> first intended application is to create a place to store the input to
> a
> _Pragma directive, so that proper locations can be assigned to those
> tokens. This will be done in a subsequent commit.
> 
> The actual change needed to the line-maps API in libcpp is not too
> large and
> requires no space overhead in the line map data structures (on 64-bit
> systems
> that is; one newly added data member to class line_map_ordinary sits
> inside
> former padding bytes.) An LC_GEN map is just an ordinary map like any
> other,
> but the TO_FILE member that normally points to the file name points
> instead to
> the actual data.  This works automatically with PCH as well, for the
> same
> reason that the file name makes its way into a PCH.  In order to
> avoid
> confusion, the member has been renamed from TO_FILE to DATA, and
> associated
> accessors adjusted.
> 
> Outside libcpp, there are many small changes but most of them are to
> selftests, which are necessarily more sensitive to implementation
> details. From the perspective of the user (the "user", here, being a
> frontend
> using line maps or else the diagnostics infrastructure), the chief
> visible
> change is that the function location_get_source_line() should be
> passed an
> expanded_location object instead of a separate filename and line
> number.  This
> is not a big change because in most cases, this information came
> anyway from a
> call to expand_location and the needed expanded_location object is
> readily
> available. The new overload of location_get_source_line() uses the
> extra
> information in the expanded_location object to obtain the data from
> the
> in-memory buffer when it originated from an LC_GEN map.
> 
> Until the subsequent patch that starts using LC_GEN maps, none are
> yet
> generated within GCC, hence nothing is added to the testsuite here;
> but all
> relevant selftests have been extended to cover generated data maps in
> addition
> to normal files.

[..snip...]

Thanks for the updated patch.

Reading this patch, it felt a bit unnatural to me to have an
  (exploded location, source line) 
pair where the exploded location seems to be representing "which source
file or generated buffer", but the line/column info in that
exploded_location is to be ignored in favor of the 2nd source line.

I think we're missing a class: something that identifies either a
specific source file, or a specific generated buffer.

How about something like either:

class source_id
{
public:
  source_id (const char *filename)
  : m_filename_or_buffer (filename),
m_len (0)
  {
  }

  explicit source_id (const char *buffer, unsigned buffer_len)
  : m_filename_or_buffer (buffer),
m_len (buffer_len)
  {
linemap_assert (buffer_len > 0);
  }

private:
  const char *m_filename_or_buffer;
  unsigned m_len;  // where 0 means "it's a filename"
};

or:

class source_id
{
public:
  source_id (const char *filename)
  : m_ptr (filename),
m_is_buffer (false)
  {
  }

  explicit source_id (const linemap_ordinary *buffer_linemap)
  : m_ptr (buffer_linemap),
m_is_buffer (true)
  {
  }

private:
  const void *m_ptr;
  bool m_is_buffer;
};

and use one of these "source_id file" in place of "const char *file",
rather than replacing such things with expanded_location?

> diff --git a/gcc/c-family/c-indentation.cc b/gcc/c-family/c-indentation.cc
> index e8d3dece770..4164fa0b1ba 100644
> --- a/gcc/c-family/c-indentation.cc
> +++ b/gcc/c-family/c-indentation.cc
> @@ -50,7 +50,7 @@ get_visual_column (expanded_location exploc,
>  unsigned int *first_nws,
>  unsigned int tab_width)
>  {
> -  char_span line = location_get_source_line (exploc.file, exploc.line);
> +  char_span line = location_get_source_line (exploc);

...so this might contine to be:

  char_span line = location_get_source_line (exploc.file, exploc.line);

...but expanded_location's "file" field would become a source_id,
rather than a const char *.  It looks like doing do might make a lot of
"is this the same file or buffer?"  turn into comparisons of source_id
instances.

So I think expanded_location would become:

typedef struct
{
  /* Either the name of the source file involved, or the
 specific generated buffer.  */
  source_id file;

  /* The line-location in the source file.  */
  int line;

  int column;

  void *data;

  /* In a system header?. */
  bool sysp;
} expanded_location;

and we wouldn't need to add these extra fields:

> +
> +  /* If generated data, the data and its length.  The data may contain 
> embedded
> +   nulls and need not be null-terminated.  */
> +  unsigned 

Re: [PATCH v3 0/4] diagnostics: libcpp: Overhaul locations for _Pragma tokens

2023-07-28 Thread David Malcolm via Gcc-patches
On Fri, 2023-07-21 at 19:08 -0400, Lewis Hyatt wrote:
> Hello-
> 
> This is an update to the v2 patch series last sent in January:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609473.html
> 
> While I did not receive any feedback on the v2 patches yet, they did
> need some
> rebasing on top of other recent commits to input.cc, so I thought it
> would be
> helpful to send them again now. The patches have not otherwise
> changed from
> v2, and the above-linked message explains how all the patches fit in
> with the
> original v1 series sent last November.
> 
> Dave, I would appreciate it very much if you could please let me know
> what you
> think of this approach? I feel like the diagnostics we currently
> output for _Pragmas are worth improving. As a reminder, say for this
> example:
> 
> =
>  #define S "GCC diagnostic ignored \"oops"
>  _Pragma(S)
> =
> 
> We currently output:
> 
> =
> file.cpp:2:24: warning: missing terminating " character
>     2 | _Pragma(S)
>   |    ^
> =
> 
> While after these patches, we would output:
> 
> ==
> :1:24: warning: missing terminating " character
>     1 | GCC diagnostic ignored "oops
>   |    ^
> file.cpp:2:1: note: in <_Pragma directive>
>     2 | _Pragma(S)
>   | ^~~
> ==
> 
> Thanks!

Hi Lewis; sorry for not responding to the v2 patches.

I've started looking at the v3 patches in detail, but I have some high-
level questions about memory usage:

Am I right in thinking that the effect of this patch is that for every
_Pragma in the source we will create a new line_map_ordinary, and a new
buffer for the stringified content of that _Pragma, and that these
allocations will persist for the rest of the compilation?  (plus a
little extra allocation within the "location_t" space from 0 to
0x7fff).

It sounds like this will probably be a rounding error that won't be
noticable in profiling, but did you attempt any such measurement of the
memory usage before/after this patch on some real-world projects?

Thanks
Dave



[PATCH v2] SARIF and -ftime-report's output [PR109361]

2023-07-28 Thread David Malcolm via Gcc-patches
On Fri, 2023-07-28 at 08:00 +0200, Richard Biener wrote:
> On Fri, Jul 28, 2023 at 12:23 AM David Malcolm via Gcc-patches
>  wrote:
> > 
> > On Tue, 2023-04-11 at 08:43 +, Richard Biener wrote:
> > > On Tue, 4 Apr 2023, David Malcolm wrote:
> > > 
> > > > Richi, Jakub: I can probably self-approve this, but it's
> > > > technically a
> > > > new feature.  OK if I push this to trunk in stage 4?  I believe
> > > > it's
> > > > low risk, and is very useful for benchmarking -fanalyzer.
> > > 
> > > Please wait for stage1 at this point.  One comment on the patch
> > > below ...
> > > 
> > > > 
> > > > This patch adds support for embeddding profiling information
> > > > about
> > > > the
> > > > compiler itself into the SARIF output.

[...snip...]

> > > 
> > > 'sarif' is currently used only with -fdiagnostics-format= it
> > > seems.
> > > We already have
> > > 
> > > ftime-report
> > > Common Var(time_report)
> > > Report the time taken by each compiler pass.
> > > 
> > > ftime-report-details
> > > Common Var(time_report_details)
> > > Record times taken by sub-phases separately.
> > > 
> > > so -fsarif-time-report is not a) -ftime-report-sarif and b) it's
> > > unclear if it applies to -ftime-report or to both -ftime-report
> > > and -ftime-report-details?  (note -ftime-report-details needs
> > > -ftime-report to be effective)
> > > 
> > > I'd rather have a -ftime-report-format= (or -freport-format in
> > > case we want to cover -fmem-report, -fmem-report-wpa,
> > > -fpre-ipa-mem-report and -fpost-ipa-mem-report as well?)
> > > 
> > > ISTR there's a summer of code project in this are as well.
> > > 
> > > Thanks,
> > > Richard.
> > 
> > Revisiting this; sorry about the delay.
> > 
> > As I understand the status quo, we currently have:
> > * -ftime-report: enable capturing of timing information (with a
> > slight
> > speed hit), and report it to stderr
> > * -ftime-report-details: tweak how that information is captured (if
> > -
> > ftime-report is enabled), so that timevar->children is populated
> > and
> > printed
> > 
> > There seem to be two things here:
> > - what timing data we capture
> > - where that timing data goes
> > 
> > What I need is to some way to specify that the output should go to
> > the
> > SARIF file, rather than to stderr.
> > 
> > Some ways we could do this:
> > (a) simply enforce that if SARIF diagnostics were requested with -
> > fdiagnostics-format=sarif-{file|stderr} that the time report goes
> > there
> > in JSON form, rather than to stderr
> > (b) add an option to specify where the time report goes
> > (c) add options to allow the time report to potentially go to
> > multiple
> > places (both stderr and SARIF, one or the other, neither); this
> > seems
> > overcomplex to me.
> > (d) something else?
> > 
> > The patch I posted implements a form of (b), but right now I'm
> > leaning
> > towards option (a): if the user requested SARIF output, then the
> > time
> > report goes to the SARIF output, rather than stderr.
> 
> I'm fine with (a), but -fdiagnostics-format= doesn't naturally apply
> to
> -ftime-report (or -fmem-report), those are not "diagnostics" in my
> opinion but they are auxiliary data for the compilation process
> rather than the input to it.  But yes, -ftime-report-format= would be
> too specific, maybe -faux-format=.
> 
> That said, we can go with (a) and do something else later if desired.
> I don't think preserving behavior in this area will be important so
> we
> don't have to get it right immediately.

Thanks.

Here's an updated version of the patch which implements (a).

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
As before, I've tested this with my analyzer integration testsuite and
was able to use the .sarif data to generate reports about which source
files get slowed down by the analyzer [1]. I've validated the generated
.sarif files against the SARIF schema.

OK for trunk?
Dave
[1] https://github.com/davidmalcolm/gcc-analyzer-integration-tests/issues/5


This patch adds support for embeddding profiling information about the
compiler itself into the SARIF output.

Specifically, if SARIF diagnostic output is requested, via
-fdiagnostics-format=sarif-file or -fdiagnostics-format=sarif-stderr,
then any -ftime-report output is written in JS

Re: [PATCH] Add -fsarif-time-report [PR109361]

2023-07-27 Thread David Malcolm via Gcc-patches
On Tue, 2023-04-11 at 08:43 +, Richard Biener wrote:
> On Tue, 4 Apr 2023, David Malcolm wrote:
> 
> > Richi, Jakub: I can probably self-approve this, but it's
> > technically a
> > new feature.  OK if I push this to trunk in stage 4?  I believe
> > it's
> > low risk, and is very useful for benchmarking -fanalyzer.
> 
> Please wait for stage1 at this point.  One comment on the patch
> below ...
> 
> > 
> > This patch adds support for embeddding profiling information about
> > the
> > compiler itself into the SARIF output.
> > 
> > In an earlier version of this patch I extended -ftime-report so
> > that
> > as well as writing to stderr, it would embed the information in any
> > SARIF output.  This turned out to be awkward to use, in that I
> > found
> > myself needing to get the data in JSON form without also having it
> > emitted on stderr (which was affecting the output of the build).
> > 
> > Hence this version of the patch adds a new -fsarif-time-report,
> > similar
> > to the existing -ftime-report for requesting GCC profile itself
> > using
> > the timevar machinery.
> > 
> > Specifically, if -fsarif-time-report is specified, the timing
> > information will be captured (as if -ftime-report were specified),
> > and
> > will be embedded in JSON form within any SARIF as a
> > "gcc/timeReport"
> > property within a property bag of the "invocation" object.
> > 
> > Here's an example of the output:
> > 
> >   "invocations": [
> >   {
> >   "executionSuccessful": true,
> >   "toolExecutionNotifications": [],
> >   "properties": {
> >   "gcc/timeReport": {
> >   "timevars": [
> >   {
> >   "name": "phase setup",
> >   "elapsed": {
> >   "user": 0.04,
> >   "sys": 0,
> >   "wall": 0.04,
> >   "ggc_mem": 1863472
> >   }
> >   },
> > 
> >   [...snip...]
> > 
> >   {
> >   "name": "analyzer: processing worklist",
> >   "elapsed": {
> >   "user": 0.06,
> >   "sys": 0,
> >   "wall": 0.06,
> >   "ggc_mem": 48
> >   }
> >   },
> >   {
> >   "name": "analyzer: emitting diagnostics",
> >   "elapsed": {
> >   "user": 0.01,
> >   "sys": 0,
> >   "wall": 0.01,
> >   "ggc_mem": 0
> >   }
> >   },
> >   {
> >   "name": "TOTAL",
> >   "elapsed": {
> >   "user": 0.21,
> >   "sys": 0.03,
> >   "wall": 0.24,
> >   "ggc_mem": 3368736
> >   }
> >   }
> >   ],
> >   "CHECKING_P": true,
> >   "flag_checking": true
> >   }
> >   }
> >   }
> >   ]
> > 
> > I have successfully used this in my analyzer integration tests to
> > get
> > timing information about which source files get slowed down by the
> > analyzer.  I've validated the generated .sarif files against the
> > SARIF
> > schema.
> > 
> > The documentation notes that the precise output format is subject
> > to change.
> > 
> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> > 
> > gcc/ChangeLog:
> > PR analyzer/109361
> > * common.opt (fsarif-time-report): New option.
> 
> 'sarif' is currently used only with -fdiagnostics-format= it seems.
> We already have
> 
> ftime-report
> Common Var(time_report)
> Report the time taken by each compiler pass.
> 
> ftime-report-details
> Common Var(time_report_details)
> Record times taken by sub-phases separately. 
> 
> so -fsarif-time-report is not a) -ftime-report-sarif and b) it's
> unclear if it applies to -ftime-report or to both -ftime-report
> and -ftime-report-details?  (note -ftime-report-details needs
> -ftime-report to be effective)
> 
> I'd rather have a -ftime-report-format= (or -freport-format in
> case we want to cover -fmem-report, -fmem-report-wpa,
> -fpre-ipa-mem-report and -fpost-ipa-mem-report as well?)
> 
> ISTR there's a summer of code project in this are as well.
> 
> Thanks,
> Richard.

Revisiting this; sorry about the delay.

As I understand the status quo, we currently have:
* -ftime-report: enable capturing of timing information (with a slight
speed hit), and report it to stderr
* -ftime-report-details: tweak how that information is captured (if -

[pushed] analyzer: add symbol base class, moving region id to there [PR104940]

2023-07-26 Thread David Malcolm via Gcc-patches
This patch introduces a "symbol" base class that region and svalue
both inherit from, generalizing the ID from the region class so it's
also used by svalues.  This gives a way of sorting regions and svalues
into creation order, which I've found useful in my experiments with
adding SMT support (PR analyzer/104940).

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-2793-g9d804f9b2709b3.

gcc/ChangeLog:
PR analyzer/104940
* Makefile.in (ANALYZER_OBJS): Add analyzer/symbol.o.

gcc/analyzer/ChangeLog:
PR analyzer/104940
* region-model-manager.cc
(region_model_manager::region_model_manager): Update for
generalizing region ids to also cover svalues.
(region_model_manager::get_or_create_constant_svalue): Likewise.
(region_model_manager::get_or_create_unknown_svalue): Likewise.
(region_model_manager::create_unique_svalue): Likewise.
(region_model_manager::get_or_create_initial_value): Likewise.
(region_model_manager::get_or_create_setjmp_svalue): Likewise.
(region_model_manager::get_or_create_poisoned_svalue): Likewise.
(region_model_manager::get_ptr_svalue): Likewise.
(region_model_manager::get_or_create_unaryop): Likewise.
(region_model_manager::get_or_create_binop): Likewise.
(region_model_manager::get_or_create_sub_svalue): Likewise.
(region_model_manager::get_or_create_repeated_svalue): Likewise.
(region_model_manager::get_or_create_bits_within): Likewise.
(region_model_manager::get_or_create_unmergeable): Likewise.
(region_model_manager::get_or_create_widening_svalue): Likewise.
(region_model_manager::get_or_create_compound_svalue): Likewise.
(region_model_manager::get_or_create_conjured_svalue): Likewise.
(region_model_manager::get_or_create_asm_output_svalue): Likewise.
(region_model_manager::get_or_create_const_fn_result_svalue):
Likewise.
(region_model_manager::get_region_for_fndecl): Likewise.
(region_model_manager::get_region_for_label): Likewise.
(region_model_manager::get_region_for_global): Likewise.
(region_model_manager::get_field_region): Likewise.
(region_model_manager::get_element_region): Likewise.
(region_model_manager::get_offset_region): Likewise.
(region_model_manager::get_sized_region): Likewise.
(region_model_manager::get_cast_region): Likewise.
(region_model_manager::get_frame_region): Likewise.
(region_model_manager::get_symbolic_region): Likewise.
(region_model_manager::get_region_for_string): Likewise.
(region_model_manager::get_bit_range): Likewise.
(region_model_manager::get_var_arg_region): Likewise.
(region_model_manager::get_region_for_unexpected_tree_code):
Likewise.
(region_model_manager::get_or_create_region_for_heap_alloc):
Likewise.
(region_model_manager::create_region_for_alloca): Likewise.
(region_model_manager::log_stats): Likewise.
* region-model-manager.h (region_model_manager::get_num_regions):
Replace with...
(region_model_manager::get_num_symbols): ...this.
(region_model_manager::alloc_region_id): Replace with...
(region_model_manager::alloc_symbol_id): ...this.
(region_model_manager::m_next_region_id): Replace with...
(region_model_manager::m_next_symbol_id): ...this.
* region-model.cc (selftest::test_get_representative_tree): Update
for generalizing region ids to also cover svalues.
(selftest::test_binop_svalue_folding): Likewise.
(selftest::test_state_merging): Likewise.
* region.cc (region::cmp_ids): Delete, in favor of
symbol::cmp_ids.
(region::region): Update for introduction of symbol base class.
(frame_region::get_region_for_local): Likewise.
(root_region::root_region): Likewise.
(symbolic_region::symbolic_region): Likewise.
* region.h: Replace include of "analyzer/complexity.h" with
"analyzer/symbol.h".
(class region): Make a subclass of symbol.
(region::get_id): Delete in favor of symbol::get_id.
(region::cmp_ids): Delete in favor of symbol::cmp_ids.
(region::get_complexity): Delete in favor of
symbol::get_complexity.
(region::region): Use symbol::id_t for "id" param.
(region::m_complexity): Move field to symbol base class.
(region::m_id): Likewise.
(space_region::space_region): Use symbol::id_t for "id" param.
(frame_region::frame_region): Likewise.
(globals_region::globals_region): Likewise.
(code_region::code_region): Likewise.
(function_region::function_region): Likewise.
(label_region::label_region): Likewise.
(stack_region::stack_region): Likewise.
(heap_region::heap_region): Likewise.

  1   2   3   4   5   6   7   8   9   10   >