% plumbers2010.tex
\documentclass{beamer}
\usetheme{Warsaw}

\title{Perf Tools: Recent Improvements}
\subtitle{Recent developments and discussion about TODO}
\author{Arnaldo Carvalho de Melo}
\institute{Red Hat Inc.}
\date{\scriptsize Netconf and Linux Plumbers Conference, Cambridge\\ \vspace{.10cm}November, 2010}

\begin{document}

% Adds the presentation cover page
\begin{frame}
\titlepage
\end{frame}

% ---------------------------------------------------------------------

\begin{frame}
\tableofcontents
\end{frame}

% =====================================================================

\section{Improvements on initial set of tools}
\begin{frame}{Improvements on initial set of tools}
\begin{enumerate}
	\item Tools Integration
	\item Slang based Text User Interface
	\item Use of build ids
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\subsection{Tool Integration}
\begin{frame}{Tools Integration}

\begin{enumerate}
	\item One tool doesn't do it all
	\item Combine steps to achieve multiple results
	\item Allows spreading work flows over multiple machines
	\item Profiling fast path
	\item report to annotate
	\item Reuse perf.data parsing
\end{enumerate}

\end{frame}

% ---------------------------------------------------------------------

\subsection{Slang Based TUI}
\begin{frame}{Slang Based TUI}
\begin{enumerate}
	\item GUIs not necessarily better
	\item We still have mutt and pine users, after all
	\item But the changes paves the way for GUIs
	\item mutt like interface
	\item report to annotate fast path
	\item Zoom in/out DSOs/threads
	\item Keys used: arrows + ENTER mostly, TAB sometimes
	\item Still don't like it? Use --stdio
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\begin{frame}{perf report TUI}

\begin{center} 
  \includegraphics[width=0.9\textwidth]{perf-report-tui.png} 
\end{center} 

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}{perf annotate}
\begin{enumerate}
	\item Starts at the line with most hits
	\item Tabs through ordered list of hot lines
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\begin{frame}{perf annotate TUI}

\begin{center} 
  \includegraphics[width=0.9\textwidth]{perf-annotate-tui.png} 
\end{center} 

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}{UI - TODO}
\begin{enumerate}
	\item perf top
	\item Allow selecting events to record at any time
	\item Start with top
	\item Freeze == report
	\item Save == record
	\item perf probe
	\item Go from annotate to probe, restart top
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=1,fragile]
\frametitle{perf top}
Considers user space symbols too:

\begin{verbatim}
 PerfTop: 155 irqs/sec kernel:83.9% [1000Hz cycles], (all, 2 CPUs)

  samples  pcnt function                               DSO
  _______ _____ ______________________________________ ___________________

   119.00 12.0% read_hpet                              [kernel]  
    43.00  4.4% __strstr_ia32                          /lib/libc-2.12.1.so
    28.00  2.8% system_call                            [kernel]  
    25.00  2.5% unix_poll                              [kernel]  
    24.00  2.4% aes_enc_blk                            [aes_i586]         
    21.00  2.1% schedule                               [kernel]  
    21.00  2.1% _raw_spin_lock_irqsave                 [kernel]  
    19.00  1.9% _raw_spin_unlock_irqrestore            [kernel]  
    19.00  1.9% aes_dec_blk                            [aes_i586]         
    18.00  1.8% probe_workqueue_insertion              [kernel]  
    17.00  1.7% hpet_next_event                        [kernel]  
    13.00  1.3% fget_light                             [kernel]  
    13.00  1.3% do_select                              [kernel]  
    12.00  1.2% audit_syscall_entry                    [kernel]  
    12.00  1.2% ktime_get                              [kernel]  
    11.00  1.1% test_ti_thread_flag                    [kernel]  
    11.00  1.1% std::_List_node_base::transfer(std::_L libstdc++.so.6.0.13
    11.00  1.1% native_sched_clock                     [kernel]  
    11.00  1.1% vsnprintf                              [kernel]  
    11.00  1.1% format_decode                          [kernel]  
    10.00  1.0% index                                  /lib/libc-2.12.1.so
\end{verbatim}

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}{perf stat}
\begin{enumerate}
	\item List of CPUs to monitor
	\item Ask for precise events(PEBS) using suffix: "-e cycles:p"
	\item Multiple 'p' characters == more precise
	\item Proof of concept patch for printing counters periodically ready
	\item Merge app log output sorting by timestamps
\end{enumerate}
\end{frame}

% =====================================================================

\section{New Tools}
\begin{frame} {New Tools}
Introduced after Plumbers'2009:

\begin{enumerate}
	\item diff
	\item archive
	\item probe
	\item trace
	\item several trace ones (timechart, etc)
\end{enumerate}

\end{frame}

% ---------------------------------------------------------------------

\subsection{perf diff}
\begin{frame}{perf diff}
\begin{enumerate}
	\item Shows difference in symbol hits between two perf.data files
	\item Keyed by build-ids in the cache
	\item Should support more than two files
	\item Generating version X samples symbol plottings
	\item Read "Differential Profiling" paper by Paul McKenney on how to use it
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\subsection{perf archive}
\begin{frame}{perf archive}
\begin{enumerate}
	\item Looks at perf.data files for DSOs with hits
	\item Creates tarball
	\item Transfer to another machine
	\item Populate the cache
	\item Use report and annotate
	\item Handles endianness
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\subsection{perf probe}
\begin{frame}{perf probe}
\begin{enumerate}
	\item Inserts dynamic probes
	\item Doesn't necessarily requires debuginfo
	\item Can collect variables
	\item Struct members can be specified to any level
	\item Works with callchains
	\item Works on the core kernel and on modules
	\item Supports wildcards in probe names
	\item Together with perf trace == systemtap subset
	\item Example of use together with scripting later in this presentation
	\item Contributed by Masami Hiramatsu
\end{enumerate}
\end{frame}

% =====================================================================

\section{Scripting}

\begin{frame}{Scripting}

\begin{enumerate}
	\item Use scripting languages to process events
	\item Python and Perl
	\item Allows tapping into tons of language libraries
	\item Several scripts available
	\item Generate scripts from perf.data
	\item Contributed by Tom Zanussi
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\subsection{Available Scripts}
\begin{frame}[shrink=5,fragile]
\frametitle{Available Scripts}

\begin{verbatim}
[root@ana ~]# perf trace --list
List of available trace scripts:
  rw-by-pid                     system-wide r/w activity
  wakeup-latency                system-wide min/max/avg wakeup latency
  workqueue-stats               workqueue stats (ins/exe/create/destroy)
  rwtop [interval]              system-wide r/w top
  failed-syscalls [comm]        system-wide failed syscalls
  rw-by-file <comm>             r/w activity for a program, by file
  syscall-counts-by-pid [comm]  system-wide syscall counts, by pid
  netdev-times [tx] [rx] [dev=] display a process of packet and processing time
  sctop [comm] [interval]       syscall top
  futex-contention              futex contention measurement
  sched-migration               sched migration overview
  failed-syscalls-by-pid [comm] system-wide failed syscalls, by pid
  syscall-counts [comm]         system-wide syscall counts
[root@ana ~]#
\end{verbatim}

\end{frame}

% ---------------------------------------------------------------------

\subsection{Generate Scripts}

\begin{frame}{Generate Scripts}
\begin{enumerate}
	\item From the events found in perf.data file
	\item Quickly start writing event handling
	\item Creates function skeletons for each trace event
	\item With a common set of parameters
	\item Plus event specific parameters
	\item Calls methods at init, exit and for unhandled events
	\item Comes with library of tracing specific methods
\end{enumerate}
\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=1,fragile]
\frametitle{Listing Possible probe points}

\begin{verbatim}
[root@ana icmp]# perf probe -L icmp_rcv
<icmp_rcv:0>
      0  int icmp_rcv(struct sk_buff *skb)
      1  {
         
     59         if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
                        /*
                         * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
                         * silently ignored (we let user decide with a sysctl).
                         * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
                         * discarded if to broadcast/multicast.
                         */
     66                 if ((icmph->type == ICMP_ECHO ||
                             icmph->type == ICMP_TIMESTAMP) &&
                            net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
                                goto error;
                        }
     71                 if (icmph->type != ICMP_ECHO &&
\end{verbatim}

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=1,fragile]
\frametitle{Listing variables that can be collected}

\begin{verbatim}
[root@ana ~]# perf probe -V icmp_rcv:66
Available variables at icmp_rcv:66
        @<icmp_rcv+343>
                struct icmphdr* icmph
                struct net*     net
                struct rtable*  rt
                struct sk_buff* skb
[root@ana ~]#
\end{verbatim}
\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=1,fragile]
\frametitle{Adding a probe}

\begin{verbatim}
[root@ana icmp]# perf probe icmp_rcv:66 'type=icmph->type'
Add new event:
  probe:icmp_rcv    (on icmp_rcv:66 with type=icmph->type)

You can now use it on all perf tools, such as:

	perf record -e probe:icmp_rcv -aR sleep 1

[root@ana ~]# perf probe --list
  probe:icmp_rcv (on icmp_rcv:66@net/ipv4/icmp.c with type)

[root@ana icmp]# perf record -a -g -e probe:icmp_rcv
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.324 MB perf.data ]
\end{verbatim}

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=5,fragile]
\frametitle{Generating a python script from perf.data}

\begin{verbatim}
[root@ana icmp]# perf trace -g python
generated Python script: perf-trace.py

[root@ana icmp]# cat perf-trace.py 

def trace_begin():
        print "in trace_begin"

def trace_end():
        print "in trace_end"

def probe__icmp_rcv(evname, cpu, secs, nsecs, pid, comm,
                    probe_ip, type):
        print "%s %u.%u type=%u" % (evname, secs, nsecs, type)
\end{verbatim}

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=1,fragile]
\frametitle{Running python script}

\begin{verbatim}
[root@ana icmp]# perf trace -s perf-trace.py 
in trace_begin
probe__icmp_rcv 71171.964568380 type=8
probe__icmp_rcv 71177.792382154 type=8
probe__icmp_rcv 71178.792236953 type=8
in trace_end
[root@ana icmp]# 
\end{verbatim}

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=0,fragile]
\frametitle{Backtraces from probes}

\begin{verbatim}
[root@ana ~]# perf report --stdio
# Events: 2 
#
# Overhead  Command      Shared Object    Symbol
# ........  .......  .................  ........
#
   100.00%     ping  [kernel.kallsyms]  [k] icmp_rcv
               |
               --- icmp_rcv
                   ip_local_deliver_finish
                   NF_HOOK.clone.1
                   ip_local_deliver
                   ip_rcv_finish
                   NF_HOOK.clone.1
                   ip_rcv
                   __netif_receive_skb
                   process_backlog
                   net_rx_action
                   __do_softirq
                   0xb7707424

[root@ana ~]#
\end{verbatim}

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}{Scripting TODO List}
\begin{enumerate}
	\item Convert trace builtins to scripts (sched, kmem, etc)
	\item Convert net/ipv4/tcp\_probe.c
	\item SCTP and DCCP variants too
	\item Write more scripts for showing where IO is happening
	\item Improve passing data from record to trace
	\item Remove requirement on using netcat for dual machine use
	\item Write more scripts (you can help here!)
\end{enumerate}
\end{frame}

% =====================================================================

\section{KVM Support}

\begin{frame}{KVM Support}

\begin{enumerate}
	\item Collect guest OS statistics from host side.
	\item top, record, report, diff, buildid-list
	\item Need to specify guest vmlinux or kallsyms, /proc/modules
	\item Or --guestmount directory with sshfs mounted per pid subdirs
	\item Use --pid to specify specific guest
	\item Contributed by Zhang, Yanmin.
\end{enumerate}

\end{frame}

% ---------------------------------------------------------------------

\begin{frame}[shrink=5,fragile]
\frametitle{perf top kvm example}

\small
\begin{verbatim}
# perf kvm --host --guest --guestkallsyms=guest/kallsyms \
           --guestmodules=guest/modules top

 PerfTop: 16010 irqs/sec kernel:59.1% us: 1.5% guest
          kernel:31.9% guest us:7.5% [+1000Hz cycles]

  samples  pcnt function                  DSO
  _______ _____ _________________________ ______________

 38770.00 20.4% __ticket_spin_lock        [guest.kernel]
 22560.00 11.9% ftrace_likely_update      [kernel]
  9208.00  4.8% __lock_acquire            [kernel]
  5473.00  2.9% trace_hardirqs_off_caller [kernel]
  5222.00  2.7% copy_user_generic_string  [guest.kernel]
  4450.00  2.3% validate_chain            [kernel]
  4262.00  2.2% trace_hardirqs_on_caller  [kernel]
  4239.00  2.2% do_raw_spin_lock          [kernel]
  3548.00  1.9% do_raw_spin_unlock        [kernel]
  2487.00  1.3% lock_release              [kernel]
  2165.00  1.1% __local_bh_disable        [kernel]
  1905.00  1.0% check_chain_key           [kernel]
\end{verbatim}

\end{frame}

% =====================================================================

\section{Work in Progress}
\begin{frame}{Work in Progress}

\begin{center}
\begin{enumerate}
	\item cgroups support
	\item utrace to probe user space
	\item PerfKit GUI
	\item In addition to KernelShark and sysprof GUIs
\end{enumerate}
\end{center}

\end{frame}

% =====================================================================

\section{That is all folks!}
\begin{frame}

\begin{center}
Thanks!
\end{center}
\begin{center}
Arnaldo Carvalho de Melo
\end{center}
\begin{center}
acme@infradead.org
\end{center}
\begin{center}
acme@redhat.com
\end{center}
\begin{center}
linux-perf-users@vger.kernel.org
\end{center}

\end{frame}

% ---------------------------------------------------------------------

\end{document}

