[PATCH] D82967: [analyzer][tests] Measure peak memory consumption for every project

2020-07-10 Thread Valeriy Savchenko via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG21bacc215413: [analyzer][tests] Measure peak memory 
consumption for every project (authored by vsavchenko).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82967/new/

https://reviews.llvm.org/D82967

Files:
  clang/utils/analyzer/Dockerfile
  clang/utils/analyzer/SATestBuild.py
  clang/utils/analyzer/SATestUtils.py
  clang/utils/analyzer/requirements.txt

Index: clang/utils/analyzer/requirements.txt
===
--- /dev/null
+++ clang/utils/analyzer/requirements.txt
@@ -0,0 +1,4 @@
+graphviz
+humanize
+matplotlib
+psutil
Index: clang/utils/analyzer/SATestUtils.py
===
--- clang/utils/analyzer/SATestUtils.py
+++ clang/utils/analyzer/SATestUtils.py
@@ -1,8 +1,9 @@
 import os
 import sys
+import time
 
 from subprocess import CalledProcessError, check_call
-from typing import List, IO, Optional
+from typing import List, IO, Optional, Tuple
 
 
 def which(command: str, paths: Optional[str] = None) -> Optional[str]:
@@ -47,6 +48,87 @@
 return ext in (".i", ".ii", ".c", ".cpp", ".m", "")
 
 
+def time_to_str(time: float) -> str:
+"""
+Convert given time in seconds into a human-readable string.
+"""
+return f"{time:.2f}s"
+
+
+def memory_to_str(memory: int) -> str:
+"""
+Convert given number of bytes into a human-readable string.
+"""
+if memory:
+try:
+import humanize
+return humanize.naturalsize(memory, gnu=True)
+except ImportError:
+# no formatter installed, let's keep it in bytes
+return f"{memory}B"
+
+# If memory is 0, we didn't succeed measuring it.
+return "N/A"
+
+
+def check_and_measure_call(*popenargs, **kwargs) -> Tuple[float, int]:
+"""
+Run command with arguments.  Wait for command to complete and measure
+execution time and peak memory consumption.
+If the exit code was zero then return, otherwise raise
+CalledProcessError.  The CalledProcessError object will have the
+return code in the returncode attribute.
+
+The arguments are the same as for the call and check_call functions.
+
+Return a tuple of execution time and peak memory.
+"""
+peak_mem = 0
+start_time = time.time()
+
+try:
+import psutil as ps
+
+def get_memory(process: ps.Process) -> int:
+mem = 0
+
+# we want to gather memory usage from all of the child processes
+descendants = list(process.children(recursive=True))
+descendants.append(process)
+
+for subprocess in descendants:
+try:
+mem += subprocess.memory_info().rss
+except (ps.NoSuchProcess, ps.AccessDenied):
+continue
+
+return mem
+
+with ps.Popen(*popenargs, **kwargs) as process:
+# while the process is running calculate resource utilization.
+while (process.is_running() and
+   process.status() != ps.STATUS_ZOMBIE):
+# track the peak utilization of the process
+peak_mem = max(peak_mem, get_memory(process))
+time.sleep(.5)
+
+if process.is_running():
+process.kill()
+
+if process.returncode != 0:
+cmd = kwargs.get("args")
+if cmd is None:
+cmd = popenargs[0]
+raise CalledProcessError(process.returncode, cmd)
+
+except ImportError:
+# back off to subprocess if we don't have psutil installed
+peak_mem = 0
+check_call(*popenargs, **kwargs)
+
+return time.time() - start_time, peak_mem
+
+
 def run_script(script_path: str, build_log_file: IO, cwd: str,
out=sys.stdout, err=sys.stderr, verbose: int = 0):
 """
Index: clang/utils/analyzer/SATestBuild.py
===
--- clang/utils/analyzer/SATestBuild.py
+++ clang/utils/analyzer/SATestBuild.py
@@ -43,7 +43,7 @@
 variable. It should contain a comma separated list.
 """
 import CmpRuns
-import SATestUtils
+import SATestUtils as utils
 from ProjectMap import DownloadType, ProjectInfo
 
 import glob
@@ -63,7 +63,7 @@
 # and this is we can shush that false positive
 from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
 
 
 ###
@@ -115,7 +115,7 @@
 if 'CC' in os.environ:
 cc_candidate: Optional[str] = os.environ['CC']
 else:
-cc_candidate = SATestUtils.which("clang", os.environ['PATH'])
+

[PATCH] D82967: [analyzer][tests] Measure peak memory consumption for every project

2020-07-10 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko updated this revision to Diff 276944.
vsavchenko added a comment.

Add matplotlib and graphviz to requirements.txt


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82967/new/

https://reviews.llvm.org/D82967

Files:
  clang/utils/analyzer/Dockerfile
  clang/utils/analyzer/SATestBuild.py
  clang/utils/analyzer/SATestUtils.py
  clang/utils/analyzer/requirements.txt

Index: clang/utils/analyzer/requirements.txt
===
--- /dev/null
+++ clang/utils/analyzer/requirements.txt
@@ -0,0 +1,4 @@
+graphviz
+humanize
+matplotlib
+psutil
Index: clang/utils/analyzer/SATestUtils.py
===
--- clang/utils/analyzer/SATestUtils.py
+++ clang/utils/analyzer/SATestUtils.py
@@ -1,8 +1,9 @@
 import os
 import sys
+import time
 
 from subprocess import CalledProcessError, check_call
-from typing import List, IO, Optional
+from typing import List, IO, Optional, Tuple
 
 
 def which(command: str, paths: Optional[str] = None) -> Optional[str]:
@@ -47,6 +48,87 @@
 return ext in (".i", ".ii", ".c", ".cpp", ".m", "")
 
 
+def time_to_str(time: float) -> str:
+"""
+Convert given time in seconds into a human-readable string.
+"""
+return f"{time:.2f}s"
+
+
+def memory_to_str(memory: int) -> str:
+"""
+Convert given number of bytes into a human-readable string.
+"""
+if memory:
+try:
+import humanize
+return humanize.naturalsize(memory, gnu=True)
+except ImportError:
+# no formatter installed, let's keep it in bytes
+return f"{memory}B"
+
+# If memory is 0, we didn't succeed measuring it.
+return "N/A"
+
+
+def check_and_measure_call(*popenargs, **kwargs) -> Tuple[float, int]:
+"""
+Run command with arguments.  Wait for command to complete and measure
+execution time and peak memory consumption.
+If the exit code was zero then return, otherwise raise
+CalledProcessError.  The CalledProcessError object will have the
+return code in the returncode attribute.
+
+The arguments are the same as for the call and check_call functions.
+
+Return a tuple of execution time and peak memory.
+"""
+peak_mem = 0
+start_time = time.time()
+
+try:
+import psutil as ps
+
+def get_memory(process: ps.Process) -> int:
+mem = 0
+
+# we want to gather memory usage from all of the child processes
+descendants = list(process.children(recursive=True))
+descendants.append(process)
+
+for subprocess in descendants:
+try:
+mem += subprocess.memory_info().rss
+except (ps.NoSuchProcess, ps.AccessDenied):
+continue
+
+return mem
+
+with ps.Popen(*popenargs, **kwargs) as process:
+# while the process is running calculate resource utilization.
+while (process.is_running() and
+   process.status() != ps.STATUS_ZOMBIE):
+# track the peak utilization of the process
+peak_mem = max(peak_mem, get_memory(process))
+time.sleep(.5)
+
+if process.is_running():
+process.kill()
+
+if process.returncode != 0:
+cmd = kwargs.get("args")
+if cmd is None:
+cmd = popenargs[0]
+raise CalledProcessError(process.returncode, cmd)
+
+except ImportError:
+# back off to subprocess if we don't have psutil installed
+peak_mem = 0
+check_call(*popenargs, **kwargs)
+
+return time.time() - start_time, peak_mem
+
+
 def run_script(script_path: str, build_log_file: IO, cwd: str,
out=sys.stdout, err=sys.stderr, verbose: int = 0):
 """
Index: clang/utils/analyzer/SATestBuild.py
===
--- clang/utils/analyzer/SATestBuild.py
+++ clang/utils/analyzer/SATestBuild.py
@@ -43,7 +43,7 @@
 variable. It should contain a comma separated list.
 """
 import CmpRuns
-import SATestUtils
+import SATestUtils as utils
 from ProjectMap import DownloadType, ProjectInfo
 
 import glob
@@ -63,7 +63,7 @@
 # and this is we can shush that false positive
 from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
 
 
 ###
@@ -115,7 +115,7 @@
 if 'CC' in os.environ:
 cc_candidate: Optional[str] = os.environ['CC']
 else:
-cc_candidate = SATestUtils.which("clang", os.environ['PATH'])
+cc_candidate = utils.which("clang", os.environ['PATH'])
 if not cc_candidate:
 

[PATCH] D82967: [analyzer][tests] Measure peak memory consumption for every project

2020-07-02 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ accepted this revision.
NoQ added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/utils/analyzer/SATestUtils.py:113
+peak_mem = max(peak_mem, get_memory(process))
+time.sleep(.5)
+

vsavchenko wrote:
> NoQ wrote:
> > Do i understand correctly that this basically adds roughly 0.25 second 
> > delay to every invocation of the analyzer? If so, that may add up to a lot 
> > of seconds for a project with many small translation units which can 
> > potentially screw our wall clock measurements. I guess the ideal solution 
> > would be some sort of "select" that waits on process termination with 
> > timeout, or we could simply decrease the sleep interval and hope it won't 
> > consume too much cpu time.
> Not really, the process starts when we call `Popen` and instead of waiting 
> for it to finish like `check_call` does, we poll the memory.
> 
> Another point (even though it is not very useful information in the context 
> of my previous statement) is that this happens on every `scan-build` 
> invocation and not for every `clang` invocation.
Oh right!



Comment at: clang/utils/analyzer/requirements.txt:1-2
+humanize
+psutil

vsavchenko wrote:
> NoQ wrote:
> > Dunno, should we bother putting every utility into its own directory with 
> > its own requirements? Like, `exploded-graph-rewriter.py` certainly has 
> > different requirements.
> Maybe we should simply put it here as well?
Maybe. Dunno, you're the boss!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82967/new/

https://reviews.llvm.org/D82967



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82967: [analyzer][tests] Measure peak memory consumption for every project

2020-07-02 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko marked 2 inline comments as done.
vsavchenko added inline comments.



Comment at: clang/utils/analyzer/SATestUtils.py:113
+peak_mem = max(peak_mem, get_memory(process))
+time.sleep(.5)
+

NoQ wrote:
> Do i understand correctly that this basically adds roughly 0.25 second delay 
> to every invocation of the analyzer? If so, that may add up to a lot of 
> seconds for a project with many small translation units which can potentially 
> screw our wall clock measurements. I guess the ideal solution would be some 
> sort of "select" that waits on process termination with timeout, or we could 
> simply decrease the sleep interval and hope it won't consume too much cpu 
> time.
Not really, the process starts when we call `Popen` and instead of waiting for 
it to finish like `check_call` does, we poll the memory.

Another point (even though it is not very useful information in the context of 
my previous statement) is that this happens on every `scan-build` invocation 
and not for every `clang` invocation.



Comment at: clang/utils/analyzer/requirements.txt:1-2
+humanize
+psutil

NoQ wrote:
> Dunno, should we bother putting every utility into its own directory with its 
> own requirements? Like, `exploded-graph-rewriter.py` certainly has different 
> requirements.
Maybe we should simply put it here as well?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82967/new/

https://reviews.llvm.org/D82967



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82967: [analyzer][tests] Measure peak memory consumption for every project

2020-07-01 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: clang/utils/analyzer/SATestUtils.py:113
+peak_mem = max(peak_mem, get_memory(process))
+time.sleep(.5)
+

Do i understand correctly that this basically adds roughly 0.25 second delay to 
every invocation of the analyzer? If so, that may add up to a lot of seconds 
for a project with many small translation units which can potentially screw our 
wall clock measurements. I guess the ideal solution would be some sort of 
"select" that waits on process termination with timeout, or we could simply 
decrease the sleep interval and hope it won't consume too much cpu time.



Comment at: clang/utils/analyzer/requirements.txt:1-2
+humanize
+psutil

Dunno, should we bother putting every utility into its own directory with its 
own requirements? Like, `exploded-graph-rewriter.py` certainly has different 
requirements.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82967/new/

https://reviews.llvm.org/D82967



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82967: [analyzer][tests] Measure peak memory consumption for every project

2020-07-01 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko created this revision.
vsavchenko added reviewers: NoQ, xazax.hun, Szelethus.
Herald added subscribers: cfe-commits, ASDenysPetrov, Charusso, dkrupp, 
donat.nagy, mikhail.ramalho, a.sidorin, rnkovacs, szepet, baloghadamsoftware.
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82967

Files:
  clang/utils/analyzer/Dockerfile
  clang/utils/analyzer/SATestBuild.py
  clang/utils/analyzer/SATestUtils.py
  clang/utils/analyzer/requirements.txt

Index: clang/utils/analyzer/requirements.txt
===
--- /dev/null
+++ clang/utils/analyzer/requirements.txt
@@ -0,0 +1,2 @@
+humanize
+psutil
Index: clang/utils/analyzer/SATestUtils.py
===
--- clang/utils/analyzer/SATestUtils.py
+++ clang/utils/analyzer/SATestUtils.py
@@ -1,8 +1,9 @@
 import os
 import sys
+import time
 
 from subprocess import CalledProcessError, check_call
-from typing import List, IO, Optional
+from typing import List, IO, Optional, Tuple
 
 
 def which(command: str, paths: Optional[str] = None) -> Optional[str]:
@@ -47,6 +48,87 @@
 return ext in (".i", ".ii", ".c", ".cpp", ".m", "")
 
 
+def time_to_str(time: float) -> str:
+"""
+Convert given time in seconds into a human-readable string.
+"""
+return f"{time:.2f}s"
+
+
+def memory_to_str(memory: int) -> str:
+"""
+Convert given number of bytes into a human-readable string.
+"""
+if memory:
+try:
+import humanize
+return humanize.naturalsize(memory, gnu=True)
+except ImportError:
+# no formatter installed, let's keep it in bytes
+return f"{memory}B"
+
+# If memory is 0, we didn't succeed measuring it.
+return "N/A"
+
+
+def check_and_measure_call(*popenargs, **kwargs) -> Tuple[float, int]:
+"""
+Run command with arguments.  Wait for command to complete and measure
+execution time and peak memory consumption.
+If the exit code was zero then return, otherwise raise
+CalledProcessError.  The CalledProcessError object will have the
+return code in the returncode attribute.
+
+The arguments are the same as for the call and check_call functions.
+
+Return a tuple of execution time and peak memory.
+"""
+peak_mem = 0
+start_time = time.time()
+
+try:
+import psutil as ps
+
+def get_memory(process: ps.Process) -> int:
+mem = 0
+
+# we want to gather memory usage from all of the child processes
+descendants = list(process.children(recursive=True))
+descendants.append(process)
+
+for subprocess in descendants:
+try:
+mem += subprocess.memory_info().rss
+except (ps.NoSuchProcess, ps.AccessDenied):
+continue
+
+return mem
+
+with ps.Popen(*popenargs, **kwargs) as process:
+# while the process is running calculate resource utilization.
+while (process.is_running() and
+   process.status() != ps.STATUS_ZOMBIE):
+# track the peak utilization of the process
+peak_mem = max(peak_mem, get_memory(process))
+time.sleep(.5)
+
+if process.is_running():
+process.kill()
+
+if process.returncode != 0:
+cmd = kwargs.get("args")
+if cmd is None:
+cmd = popenargs[0]
+raise CalledProcessError(process.returncode, cmd)
+
+except ImportError:
+# back off to subprocess if we don't have psutil installed
+peak_mem = 0
+check_call(*popenargs, **kwargs)
+
+return time.time() - start_time, peak_mem
+
+
 def run_script(script_path: str, build_log_file: IO, cwd: str,
out=sys.stdout, err=sys.stderr, verbose: int = 0):
 """
Index: clang/utils/analyzer/SATestBuild.py
===
--- clang/utils/analyzer/SATestBuild.py
+++ clang/utils/analyzer/SATestBuild.py
@@ -43,7 +43,7 @@
 variable. It should contain a comma separated list.
 """
 import CmpRuns
-import SATestUtils
+import SATestUtils as utils
 from ProjectMap import DownloadType, ProjectInfo
 
 import glob
@@ -63,7 +63,7 @@
 # and this is we can shush that false positive
 from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
 
 
 ###
@@ -115,7 +115,7 @@
 if 'CC' in os.environ:
 cc_candidate: Optional[str] = os.environ['CC']
 else:
-cc_candidate = SATestUtils.which("clang", os.environ['PATH'])
+cc_candidate =