Author: Carl Friedrich Bolz <[email protected]>
Branch: extradoc
Changeset: r4888:4a9c26090909
Date: 2012-10-21 08:44 -0700
http://bitbucket.org/pypy/extradoc/changeset/4a9c26090909/
Log: restructure talk, add some speed numbers
diff --git a/talk/dls2012/presentation/talk.tex
b/talk/dls2012/presentation/talk.tex
--- a/talk/dls2012/presentation/talk.tex
+++ b/talk/dls2012/presentation/talk.tex
@@ -116,15 +116,20 @@
\begin{frame}
\frametitle{Optimizing traces}
\begin{itemize}
- \item Traces trivial to optimize, because there's no control flow
+ \item A trace is an extended basic block
+ \item traces are easy to optime due to lack of control flow merges
\item most optimizations are one forward pass
\item optimizers are often like symbolic executors
\item can do optimizations that are untractable with full control flow
- \item XXX example
\end{itemize}
\end{frame}
\begin{frame}
+ \frametitle{Example}
+\end{frame}
+
+
+\begin{frame}
\frametitle{Problems with this approach}
\begin{itemize}
\item most traces actually are loops
@@ -146,6 +151,7 @@
\item apply the unchanged forward-pass optimizations
\item do some post-processing
\item pre-processing is done in such a way that the normal
optimizations become loop-aware
+ \pause
\item intuition: give the optimizations a second iteration of
context to work with
\end{itemize}
\end{block}
diff --git a/talk/vmil2012/presentation/figures/all_numbers.png
b/talk/vmil2012/presentation/figures/all_numbers.png
new file mode 100644
index
0000000000000000000000000000000000000000..9076ac193fc9ba1954e24e2ae372ec7e1e1f44e6
GIT binary patch
[cut]
diff --git a/talk/vmil2012/presentation/talk.tex
b/talk/vmil2012/presentation/talk.tex
--- a/talk/vmil2012/presentation/talk.tex
+++ b/talk/vmil2012/presentation/talk.tex
@@ -35,7 +35,7 @@
% does not look nice, try deleting the line with the fontenc.
-\title{The Efficient Handling of Guards in the Design of RPython's Tracing JIT}
+\title[Guards in RPython's Tracing JIT]{The Efficient Handling of Guards in
the Design of RPython's Tracing JIT}
\author[David Schneider, Carl Friedrich Bolz]{David Schneider \and \emph{Carl
Friedrich Bolz}}
% - Give the names in the same order as the appear in the paper.
@@ -85,6 +85,21 @@
%\section{Introduction}
\begin{frame}
+ \frametitle{RPython and PyPy}
+ \begin{itemize}
+ \item Context: RPython
+ \item a language for writing interpreters for dynamic languages
+ \item a generic tracing JIT, applicable to many languages
+ \item used to implement PyPy, an efficient Python interpreter
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{How fast is PyPy?}
+ \includegraphics[scale=0.3]{figures/all_numbers.png}
+\end{frame}
+
+\begin{frame}
\frametitle{Tracing JITs Compile by Observing an Interpreter}
\begin{itemize}
\item VM contains both an interpreter and the tracing JIT compiler
@@ -109,6 +124,7 @@
\item Operations that check whether conditions are still true
\item When a guard fails, execution of the trace stops and continues in
the interpreter
\pause
+ \item similar to deoptimization points, but more common, and patchable
\item \emph{This talk:} technology and design decisions of guards
\pause
\begin{block}{Guard Characteristics}
@@ -121,179 +137,6 @@
\end{itemize}
\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop03}
-\end{frame}
-
-\begin{frame}
- \frametitle{Inlining}
- Tracing automatically does (potentially deep) inlining
-\end{frame}
-
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop04}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop05}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop06}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop07}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop08}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop09}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop10}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop11}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop12}
-\end{frame}
-
-% this talk wants to go over a lot of details that are usually glossed over as
-% "easy" when tracing JITs are introduced.
-
-\begin{frame}
- \frametitle{Bridges}
- \begin{itemize}
- \item When a trace fails often, it becomes worth to attach a new trace
to it
- \item This is called a bridge
- \item The bridge is attached by patching the guard machine code
- \item when this guard fails in the future, the new trace is executed
instead
- \end{itemize}
-\end{frame}
-
-\begin{frame}
- \frametitle{RPython and PyPy}
- \begin{itemize}
- \item Context: RPython
- \item a generic tracing JIT, applicable to many languages
- \item main use: PyPy, an efficient Python interpreter
- \end{itemize}
-\end{frame}
-
-%\section{High-Level}
-
-\begin{frame}
- \frametitle{Symbolic Frame Capturing}
- \begin{itemize}
- \item Guard can fail deep inside inlined function
- \item when going back to the interpreter, call stack needs to be
re-created
- \item done with the help of symbolic frame stacks
- \item these show how trace variables fill the to-be-built stack frames
- \end{itemize}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop07}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/framechain1}
-\end{frame}
-
-
-\begin{frame}
- \frametitle{Symbolic Frame Compression}
- \begin{itemize}
- \item There are \emph{a lot of} guards
- \item Naively storing symbolic frames would be costly in terms of memory
- \item need to store them compactly
- \item observation: from one guard to the next, the non-top stack frames
don't change
- \item share these between subsequent guards
- \pause
- \item also need a byte-saving binary representation, but that's just
boring work
- \end{itemize}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop07}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/framechain1}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/loop08}
-\end{frame}
-
-\begin{frame}
- \includegraphics[scale=0.4]{figures/framechain2}
-\end{frame}
-
-\begin{frame}
- \frametitle{Interaction with Optimization}
- \begin{itemize}
- \item Some optimizations make it necessary to store extra information in
symbolic frames
- \pause
- \item examples:
- \begin{itemize}
- \item allocation removal (need to allocate objects before
resuming)
- \item delayed heap stores (need to do stores before resuming
interpreter)
- \end{itemize}
- \item can be compressed using similar techniques
- \end{itemize}
-\end{frame}
-
-\begin{frame}
- \frametitle{Emitting Guards}
- Guards are compiled as
- \begin{itemize}
- \item quick Check if the condition holds
- \item and a mapping of machine locations to JIT-variables % indirection
using the fail-boxes
- \end{itemize}
- \pause
- In case of failure
- \begin{itemize}
- \item execution jumps to shared compensation code, decodes and stores
mapping
- \item returns to interpreter that rebuilds state
- \end{itemize}
-\end{frame}
-
-\begin{frame}
- \frametitle{Compiling a Trace}
- \begin{figure}
- \centering
- \includegraphics[width=1\textwidth]{figures/loop.pdf}
- \end{figure}
-\end{frame}
-
-
-\begin{frame}
- \frametitle{Compiling a Bridge}
- \begin{figure}
- \centering
- \includegraphics[width=1\textwidth]{figures/bridge_compiled.pdf}
- \end{figure}
-\end{frame}
-\begin{frame}
- \frametitle{Patching Guards for Bridges}
- \begin{figure}
- \centering
- \includegraphics[width=1\textwidth]{figures/bridge_patched.pdf}
- \end{figure}
-\end{frame}
-
-
\begin{frame}
\includegraphics[scale=0.6]{figures/op_percentage_after}
\end{frame}
@@ -341,6 +184,174 @@
\end{frame}
\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop03}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Inlining}
+ Tracing automatically does (potentially deep) inlining
+\end{frame}
+
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop04}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop05}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop06}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop07}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop08}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop09}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop10}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop11}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop12}
+\end{frame}
+
+% this talk wants to go over a lot of details that are usually glossed over as
+% "easy" when tracing JITs are introduced.
+
+%\section{High-Level}
+
+\begin{frame}
+ \frametitle{Symbolic Frame Capturing}
+ \begin{itemize}
+ \item Guard can fail deep inside inlined function
+ \item when going back to the interpreter, call stack needs to be
re-created
+ \item done with the help of symbolic frame stacks
+ \item these show how trace variables fill the to-be-built interpreter
stack frames
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop07}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/framechain1}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{Symbolic Frame Compression}
+ \begin{itemize}
+ \item There are \emph{a lot of} guards
+ \item Naively storing symbolic frames would be costly in terms of memory
+ \item need to store them compactly
+ \item observation: from one guard to the next, the non-top stack frames
don't change
+ \item share these between subsequent guards
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop07}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/framechain1}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/loop08}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[scale=0.4]{figures/framechain2}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Compact Representation}
+ also need a byte-saving binary representation, but that's just boring work
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{Interaction with Optimization}
+ \begin{itemize}
+ \item Some optimizations make it necessary to store extra information in
symbolic frames
+ \pause
+ \item examples:
+ \begin{itemize}
+ \item allocation removal (need to allocate objects before
resuming)
+ \item delayed heap stores (need to do stores before resuming
interpreter)
+ \end{itemize}
+ \item can be compressed using similar techniques
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Emitting Guards}
+ Guards are compiled as
+ \begin{itemize}
+ \item quick check if the condition holds
+ \item and a mapping of machine locations to JIT-variables % indirection
using the fail-boxes
+ \end{itemize}
+ \pause
+ In case of failure
+ \begin{itemize}
+ \item execution jumps to shared compensation code, decodes and stores
mapping
+ \item returns to interpreter that rebuilds state
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Compiling a Trace}
+ \begin{figure}
+ \centering
+ \includegraphics[width=1\textwidth]{figures/loop.pdf}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Bridges}
+ \begin{itemize}
+ \item When a trace fails often, it becomes worth to attach a new trace
to it
+ \item This is called a bridge
+ \item The bridge is attached by patching the guard machine code
+ \item when this guard fails in the future, the new trace is executed
instead
+ \end{itemize}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{Compiling a Bridge}
+ \begin{figure}
+ \centering
+ \includegraphics[width=1\textwidth]{figures/bridge_compiled.pdf}
+ \end{figure}
+\end{frame}
+\begin{frame}
+ \frametitle{Patching Guards for Bridges}
+ \begin{figure}
+ \centering
+ \includegraphics[width=1\textwidth]{figures/bridge_patched.pdf}
+ \end{figure}
+\end{frame}
+
+
+
+\begin{frame}
\frametitle{JIT memory overhead}
\includegraphics[width=\textwidth]{figures/jit_memory}
\end{frame}
@@ -354,12 +365,23 @@
\begin{itemize}
\item Things that sound simple still often need careful engineering
\pause
+ \item guards are fundamental part of tracing JITs, need to be
implemented well
\item not even any direct performance gains
\item keep memory usage sane
\item allows good bridges
\end{itemize}
\end{frame}
+\begin{frame}
+ \frametitle{Thank you! Questions?}
+ \begin{itemize}
+ \item Things that sound simple still often need careful engineering
+ \item guards are fundamental part of tracing JITs, need to be
implemented well
+ \item not even any direct performance gains
+ \item keep memory usage sane
+ \item allows good bridges
+ \end{itemize}
+\end{frame}
%\section{Evaluation}
%as in paper
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit