acme@redhat.com |
---|
# perf mem record -a sleep 1 # # perf mem -t load report --sort=mem --stdio # Total Lost Samples: 0 # # Samples: 51K of event 'cpu/mem-loads,ldlat=30/P' # Total weight : 4819902 # Sort order : mem # # Overhead Samples Memory access # ........ ............ ........................ 44.87% 20217 LFB or LFB hit 27.30% 18618 L3 or L3 hit 22.53% 11712 L1 or L1 hit 4.85% 637 Local RAM or RAM hit 0.25% 1 Uncached or N/A hit 0.20% 188 L2 or L2 hit 0.00% 35 L3 miss
# perf mem record sleep 1 # # perf mem -t load report --sort=mem --stdio # Total Lost Samples: 0 # # Samples: 16 of event 'cpu/mem-loads,ldlat=30/P' # Total weight : 1556 # Sort order : mem # Overhead Samples Memory access # ........ ............ ........................ # 64.52% 8 LFB or LFB hit 14.07% 4 L1 or L1 hit 11.05% 3 L3 or L3 hit 10.35% 1 Local RAM or RAM hit
$ sudo perf mem record -- -c 10000 ^C[ perf record: Woken up 227 times to write data ] [ perf record: Captured and wrote 58.760 MB perf.data (836978 samples) ] $ sudo perf mem report -F mem,sample,snoop Samples: 836K of event 'ibs_op//', Event count (approx.): 8418762 Memory access Samples Snoop N/A 700620 N/A L1 hit 126675 N/A L2 hit 424 N/A L3 hit 664 HitM L3 hit 10 N/A Local RAM hit 2 N/A Remote RAM (1 hop) hit 8558 N/A Remote Cache (1 hop) hit 3 N/A Remote Cache (1 hop) hit 2 HitM Remote Cache (2 hops) hit 10 HitM Remote Cache (2 hops) hit 6 N/A Uncached hit 4 N/A $
$ make -j8 O=../build/allmodconfig/ make[1]: Entering directory '/home/acme/git/build/allmodconfig'# perf mem record sleep 1m [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.037 MB perf.data (20 samples) ] #
# perf mem report --stdio # Total Lost Samples: 0 # # Samples: 11 of event 'cpu/mem-loads,ldlat=30/P' # Total weight : 2155 # Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr # # Local Mem #Overhead Weig Access Symbol Sh Object Data Symbol Data Obj #........ .... ........ ................... ......... ................ ........... 23.94% 516 LocalRAM copy_page [kernel] 0xffff8d42228ea900 [unknown] 15.31% 330 LFB flush_signal_handle [kernel] 0xffff8d3f976020a0 [unknown] 14.66% 316 LFB strlen [kernel] 0xffffffff9b5f4cd3 [kernel].ro 13.36% 288 LFB _dl_relocate_object ld-linux.so 0x00007f6ccdc23068 libc.so.6 11.46% 247 LFB next_uptodate_page [kernel] 0xffffe401957e4df4 [unknown] 7.33% 158 LFB copy_page [kernel] 0xffff8d41f2dae920 [unknown] 4.04% 87 LFB unlock_page_memcg [kernel] 0xffffe4019333d8b8 [unknown] 3.06% 66 L1 check_preemption_di [kernel] 0xffffa8e8622ffc80 [unknown] 2.69% 58 LFB perf_output_begin [kernel] 0xffff8d3f52a1b01c [unknown] 2.13% 46 L3 task_work_run [kernel] 0xffff8d3f4a9c802c [unknown] 2.00% 43 L1 kmem_cache_alloc_tr [kernel] 0xffffa8e8622ffbc8 [unknown]
# perf c2c report --stdio ================================================= Shared Data Cache Line Table ================================================= # # -- Cacheline -- -- Load Hitm ---- Tot Total Total -- Stores -- # Idx Address Hitm Tot LclHitm rec Loads Stores L1Hit L1Miss # ... ................ ..... ... ....... ... ..... ...... ............ # 0 ffff8d449e7d6380 8.43% 53 53 510 499 11 11 0 1 ffff8d4058209340 6.20% 39 39 371 135 236 223 13 2 ffff8d449e7ff400 5.88% 37 37 501 479 22 22 0 3 ffffffff9bf53980 4.93% 31 31 233 208 25 24 1 4 ffff8d3f49ebd280 3.18% 20 20 162 153 9 9 0 5 ffff8d3f420d4880 2.86% 18 18 126 121 5 5 0
Cacheline 0xffff8d449e7ff400 -HITM- CL --- cycles --- Tot cpu LclHitm Off Code address lcl hitm load rec cnt Symbol Object Source:Line 97.30% 0x0 0xffffffff9a2d293b 113 44 454 8 __mod_node_page_state [kernel] vmstat.c:379 0.00% 0x8 0xffffffff9a2d29bb 0 112 40 8 __mod_node_page_state [kernel] atomic64_64.h:46 2.70% 0x18 0xffffffff9a2d2be5 959 103 2 2 refresh_cpu_vm_stats [kernel] atomic64_64.h:46
# tools/bpf/runqslower/.output/runqslower 200 Tracing run queue latency higher than 200 us TIME COMM PID LAT(us) 08:56:22 rcu_preempt 4105 229 08:56:22 rcu_tasks_trace 4966 525 08:56:22 podman 7238 766 08:56:22 podman 2189 728 08:56:22 swapper/13 721 244 ^C # strip tools/bpf/runqslower/.output/runqslower # ls -lah tools/bpf/runqslower/.output/runqslower -rwxr-xr-x. 1 acme acme 349K tools/bpf/runqslower/.output/runqslower #
$ cat tools/perf/util/bpf_skel/lock_contention.bpf.c SEC("tp_btf/contention_begin") int contention_begin(u64 *ctx) { struct task_struct *curr; struct tstamp_data *pelem; if (!enabled || !can_record()) return 0; curr = bpf_get_current_task_btf(); pelem = bpf_task_storage_get(&tstamp, curr, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!pelem || pelem->lock) return 0; pelem->timestamp = bpf_ktime_get_ns(); pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) lost++; return 0; }
$ sudo perf lock contention -b ^C contended total wait max wait avg wait type caller 42 192.67 us 13.64 us 4.59 us spinlock queue_work_on+0x20 23 85.54 us 10.28 us 3.72 us spinlock worker_thread+0x14a 6 13.92 us 6.51 us 2.32 us mutex kernfs_iop_permission+0x30 3 11.59 us 10.04 us 3.86 us mutex kernfs_dop_revalidate+0x3c 1 7.52 us 7.52 us 7.52 us spinlock kthread+0x115 1 7.24 us 7.24 us 7.24 us rwlock:W sys_epoll_wait+0x148 2 7.08 us 3.99 us 3.54 us spinlock delayed_work_timer_fn+0x1b 1 6.41 us 6.41 us 6.41 us spinlock idle_balance+0xa06 2 2.50 us 1.83 us 1.25 us mutex kernfs_iop_lookup+0x2f 1 1.71 us 1.71 us 1.71 us mutex kernfs_iop_getattr+0x2c ...
# perf kwork report Total Max Max Max Kwork Name | Cpu | Runtime |Count | runtime | runtime start | runtime end | --------------------------------------------------------------------------------------- nvme0q5:130 | 004 | 1.101 ms | 49 | 0.051 ms | 26035.056403 s | 26035.056455 s | amdgpu:162 | 002 | 0.176 ms | 9 | 0.046 ms | 26035.268020 s | 26035.268066 s | nvme0q24:149 | 023 | 0.161 ms | 55 | 0.009 ms | 26035.655280 s | 26035.655288 s | nvme0q20:145 | 019 | 0.090 ms | 33 | 0.014 ms | 26035.939018 s | 26035.939032 s | nvme0q31:156 | 030 | 0.075 ms | 21 | 0.010 ms | 26035.052237 s | 26035.052247 s | nvme0q8:133 | 007 | 0.062 ms | 12 | 0.021 ms | 26035.416840 s | 26035.416861 s | nvme0q6:131 | 005 | 0.054 ms | 22 | 0.010 ms | 26035.199919 s | 26035.199929 s | nvme0q19:144 | 018 | 0.052 ms | 14 | 0.010 ms | 26035.110615 s | 26035.110625 s | nvme0q7:132 | 006 | 0.049 ms | 13 | 0.007 ms | 26035.125180 s | 26035.125187 s | nvme0q18:143 | 017 | 0.033 ms | 14 | 0.007 ms | 26035.169698 s | 26035.169705 s | nvme0q17:142 | 016 | 0.013 ms | 1 | 0.013 ms | 26035.565147 s | 26035.565160 s | enp5s0-rx-0:164 | 006 | 0.004 ms | 4 | 0.002 ms | 26035.928882 s | 26035.928884 s | enp5s0-tx-0:166 | 008 | 0.003 ms | 3 | 0.002 ms | 26035.870923 s | 26035.870925 s | ---------------------------------------------------------------------------------------