Re: [Cocci] Determination of an usage statistic for macro calls “for_each…node…”

2020-10-18 Thread Markus Elfring
> Would you like to look for software configuration alternatives for better 
> parallel data processing?

I would like to share another source code analysis approach.
I hope that this contribution can trigger further helpful software development 
ideas.


@initialize:python@
@@
import sys

def write_identifier(source, loop):
names = []
for x in source:
   names.append(loop)

sys.stdout.write("\n".join(names) + "\n")

@find@
identifier fe, x;
iterator name for_each_node_by_name,
  for_each_node_by_type,
  for_each_node_with_property,
  for_each_matching_node,
  for_each_matching_node_and_match,
  for_each_compatible_node,
  for_each_child_of_node,
  for_each_available_child_of_node;
position pos;
statement s;
type t;
@@
 t x(...)
 {
 <+...
(for_each_child_of_node@fe@pos(...) s
|for_each_available_child_of_node@fe@pos(...) s
|for_each_compatible_node@fe@pos(...) s
|for_each_node_by_name@fe@pos(...) s
|for_each_node_by_type@fe@pos(...) s
|for_each_matching_node@fe@pos(...) s
|for_each_matching_node_and_match@fe@pos(...) s
|for_each_node_with_property@fe@pos(...) s
)
 ...+>
 }

@script:python collection@
fe << find.fe;
place << find.pos;
@@
write_identifier(place, fe)


Test result:
elfring@Sonne:~/Projekte/Linux/next-patched> git checkout next-20201016 && 
XX=$(date) && time spatch --python python3 --jobs 4 --include-headers 
--no-includes --dir . 
~/Projekte/Coccinelle/janitor/report_for_each_node_macro_calls5.cocci | echo 
"$(echo 'call' && cat)" | csvsql --query 'select call, count(*) from stdin 
group by call'; YY=$(date) && echo "$XX | $YY"
…
523 files match
…
call,count(*)
for_each_available_child_of_node,158
for_each_child_of_node,359
for_each_compatible_node,80
for_each_matching_node,22
for_each_matching_node_and_match,16
for_each_node_by_name,59
for_each_node_by_type,53
for_each_node_with_property,6

real0m47,779s
user2m19,285s
sys 0m1,541s
So 18. Okt 13:13:02 CEST 2020 | So 18. Okt 13:13:50 CEST 2020


Can such facts influence the specification of efficient SmPL disjunctions any 
more?

Regards,
Markus


___
Cocci mailing list
Cocci@systeme.lip6.fr
https://systeme.lip6.fr/mailman/listinfo/cocci


Re: [Cocci] Determination of an usage statistic for macro calls “for_each…node…”

2020-10-17 Thread Markus Elfring
> …
> > +(
> > +for_each_node_by_name(n,e1) S
> > +|
> …
> > +|
> > +for_each_node_with_property(n,e1) S
> > +)
> …
> 
> 
> Do you indicate any occurrence frequencies or probabilities for the mentioned 
> macro calls
> by the ordering in this disjunction for the semantic patch language?

I would like to share another source code analysis approach.
I hope that this contribution can trigger further helpful software development 
ideas.


@initialize:python@
@@
import sys, sqlalchemy
sys.stderr.write("\n".join( ("Using SQLAlchemy version:", 
sqlalchemy.__version__) ))
sys.stderr.write("\n")
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
engine = create_engine("sqlite:///:memory:", echo=False)
base = declarative_base()

class action(base):
   __tablename__ = "macros"
   name = Column(String, primary_key=True)
   source_file = Column(String, primary_key=True)
   macro = Column(String, primary_key=True)
   line = Column(Integer, primary_key=True)
   column = Column(Integer, primary_key=True)
   
   def __repr__(self):
  return "" % (self.name,
self.source_file,
self.macro,
self.line,
self.column)

configured_session = sessionmaker(bind=engine)
session = configured_session()
base.metadata.create_all(engine)

def store_position(source, loop):
"""Add data to an internal table."""
for place in source:
   entry = action(name = place.current_element,
  source_file = place.file,
  macro = loop,
  line = place.line,
  column = int(place.column) + 1)
   session.add(entry)

@find@
identifier for_loop, work;
iterator name for_each_node_by_name,
  for_each_node_by_type,
  for_each_node_with_property,
  for_each_matching_node,
  for_each_matching_node_and_match,
  for_each_compatible_node,
  for_each_child_of_node,
  for_each_available_child_of_node;
position pos;
statement s;
type t;
@@
 t work(...)
 {
 <+...
(for_each_node_by_name@for_loop@pos(...) s
|for_each_node_by_type@for_loop@pos(...) s
|for_each_matching_node@for_loop@pos(...) s
|for_each_node_with_property@for_loop@pos(...) s
|for_each_compatible_node@for_loop@pos(...) s
|for_each_matching_node_and_match@for_loop@pos(...) s
|for_each_child_of_node@for_loop@pos(...) s
|for_each_available_child_of_node@for_loop@pos(...) s
)
 ...+>
 }

@script:python collection@
fl << find.for_loop;
place << find.pos;
@@
store_position(place, fl)

@finalize:python@
@@
session.commit()
from sqlalchemy import func
entries = session.query(func.count()).select_from(action).scalar()

if entries > 0:
   from sqlalchemy.sql import literal_column
   delimiter = "|"
   sys.stdout.write(delimiter.join(['"source file"', 'macro', 'incidence']))
   sys.stdout.write("\r\n")
   mark = ['"', '', '"']
   
   for file, \
   macro, \
   incidence in session.query(action.source_file,
  action.macro,
  func.count(literal_column("*"))
 ) \
   .group_by(action.source_file, action.macro) \
   .order_by(action.source_file, 
func.count(literal_column("*")).desc()):
  mark[1] = file
  sys.stdout.write(delimiter.join([''.join(mark), macro, str(incidence)]))
  sys.stdout.write("\r\n")

   sys.stdout.write("=\r\n")
   sys.stdout.write(delimiter.join(['macro', 'incidence']))
   sys.stdout.write("\r\n")

   for macro, \
   incidence in session.query(action.macro, 
func.count(literal_column("*"))) \
   .group_by(action.macro) \
   .order_by(func.count(literal_column("*")).desc()):
  sys.stdout.write(macro + delimiter + str(incidence))
  sys.stdout.write("\r\n")
else:
   sys.stderr.write("No result for this analysis!\n")


Test result:
elfring@Sonne:~/Projekte/Linux/next-patched> git checkout next-20201016 && 
XX=$(date) && time spatch --python $(which python3) --dir . 
~/Projekte/Coccinelle/janitor/report_for_each_node_macro_calls.cocci; 
YY=$(date) && echo "$XX | $YY"
…
Using SQLAlchemy version:
1.3.19
518 files match
…
=
macro|incidence
for_each_child_of_node|357
for_each_available_child_of_node|157
for_each_compatible_node|79
for_each_node_by_name|55
for_each_node_by_type|53
for_each_matching_node|22
for_each_matching_node_and_match|16
for_each_node_with_property|6

real3m26,039s
user2m3,453s
sys 0m5,041s
Sa 17. Okt 07:00:42 CEST 2020 | Sa 17. Okt 07:04:08 CEST 2020


Can such facts influence the specification of efficient SmPL disjunctions any 
more?


Would you like to look for software configuration alternatives for better 
parallel data processing?

Regards,
Markus