perf.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318
  1. #!/usr/bin/env python3
  2. #
  3. # Script to aggregate and report Linux perf results.
  4. #
  5. # Example:
  6. # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
  7. # ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
  8. #
  9. # Copyright (c) 2022, The littlefs authors.
  10. # SPDX-License-Identifier: BSD-3-Clause
  11. #
  12. import bisect
  13. import collections as co
  14. import csv
  15. import errno
  16. import fcntl
  17. import functools as ft
  18. import itertools as it
  19. import math as m
  20. import multiprocessing as mp
  21. import os
  22. import re
  23. import shlex
  24. import shutil
  25. import subprocess as sp
  26. import tempfile
  27. import zipfile
  28. # TODO support non-zip perf results?
  29. PERF_TOOL = ['perf']
  30. PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
  31. PERF_FREQ = 100
  32. OBJDUMP_TOOL = ['objdump']
  33. THRESHOLD = (0.5, 0.85)
  34. # integer fields
  35. class Int(co.namedtuple('Int', 'x')):
  36. __slots__ = ()
  37. def __new__(cls, x=0):
  38. if isinstance(x, Int):
  39. return x
  40. if isinstance(x, str):
  41. try:
  42. x = int(x, 0)
  43. except ValueError:
  44. # also accept +-∞ and +-inf
  45. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  46. x = m.inf
  47. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  48. x = -m.inf
  49. else:
  50. raise
  51. assert isinstance(x, int) or m.isinf(x), x
  52. return super().__new__(cls, x)
  53. def __str__(self):
  54. if self.x == m.inf:
  55. return '∞'
  56. elif self.x == -m.inf:
  57. return '-∞'
  58. else:
  59. return str(self.x)
  60. def __int__(self):
  61. assert not m.isinf(self.x)
  62. return self.x
  63. def __float__(self):
  64. return float(self.x)
  65. none = '%7s' % '-'
  66. def table(self):
  67. return '%7s' % (self,)
  68. diff_none = '%7s' % '-'
  69. diff_table = table
  70. def diff_diff(self, other):
  71. new = self.x if self else 0
  72. old = other.x if other else 0
  73. diff = new - old
  74. if diff == +m.inf:
  75. return '%7s' % '+∞'
  76. elif diff == -m.inf:
  77. return '%7s' % '-∞'
  78. else:
  79. return '%+7d' % diff
  80. def ratio(self, other):
  81. new = self.x if self else 0
  82. old = other.x if other else 0
  83. if m.isinf(new) and m.isinf(old):
  84. return 0.0
  85. elif m.isinf(new):
  86. return +m.inf
  87. elif m.isinf(old):
  88. return -m.inf
  89. elif not old and not new:
  90. return 0.0
  91. elif not old:
  92. return 1.0
  93. else:
  94. return (new-old) / old
  95. def __add__(self, other):
  96. return self.__class__(self.x + other.x)
  97. def __sub__(self, other):
  98. return self.__class__(self.x - other.x)
  99. def __mul__(self, other):
  100. return self.__class__(self.x * other.x)
  101. # perf results
  102. class PerfResult(co.namedtuple('PerfResult', [
  103. 'file', 'function', 'line',
  104. 'cycles', 'bmisses', 'branches', 'cmisses', 'caches',
  105. 'children'])):
  106. _by = ['file', 'function', 'line']
  107. _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
  108. _types = {
  109. 'cycles': Int,
  110. 'bmisses': Int, 'branches': Int,
  111. 'cmisses': Int, 'caches': Int}
  112. __slots__ = ()
  113. def __new__(cls, file='', function='', line=0,
  114. cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
  115. children=[]):
  116. return super().__new__(cls, file, function, int(Int(line)),
  117. Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches),
  118. children)
  119. def __add__(self, other):
  120. return PerfResult(self.file, self.function, self.line,
  121. self.cycles + other.cycles,
  122. self.bmisses + other.bmisses,
  123. self.branches + other.branches,
  124. self.cmisses + other.cmisses,
  125. self.caches + other.caches,
  126. self.children + other.children)
  127. def openio(path, mode='r', buffering=-1):
  128. if path == '-':
  129. if mode == 'r':
  130. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  131. else:
  132. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  133. else:
  134. return open(path, mode, buffering)
  135. # run perf as a subprocess, storing measurements into a zip file
  136. def record(command, *,
  137. output=None,
  138. perf_freq=PERF_FREQ,
  139. perf_period=None,
  140. perf_events=PERF_EVENTS,
  141. perf_tool=PERF_TOOL,
  142. **args):
  143. # create a temporary file for perf to write to, as far as I can tell
  144. # this is strictly needed because perf's pipe-mode only works with stdout
  145. with tempfile.NamedTemporaryFile('rb') as f:
  146. # figure out our perf invocation
  147. perf = perf_tool + list(filter(None, [
  148. 'record',
  149. '-F%s' % perf_freq
  150. if perf_freq is not None
  151. and perf_period is None else None,
  152. '-c%s' % perf_period
  153. if perf_period is not None else None,
  154. '-B',
  155. '-g',
  156. '--all-user',
  157. '-e%s' % perf_events,
  158. '-o%s' % f.name]))
  159. # run our command
  160. try:
  161. if args.get('verbose'):
  162. print(' '.join(shlex.quote(c) for c in perf + command))
  163. err = sp.call(perf + command, close_fds=False)
  164. except KeyboardInterrupt:
  165. err = errno.EOWNERDEAD
  166. # synchronize access
  167. z = os.open(output, os.O_RDWR | os.O_CREAT)
  168. fcntl.flock(z, fcntl.LOCK_EX)
  169. # copy measurements into our zip file
  170. with os.fdopen(z, 'r+b') as z:
  171. with zipfile.ZipFile(z, 'a',
  172. compression=zipfile.ZIP_DEFLATED,
  173. compresslevel=1) as z:
  174. with z.open('perf.%d' % os.getpid(), 'w') as g:
  175. shutil.copyfileobj(f, g)
  176. # forward the return code
  177. return err
  178. # try to only process each dso onceS
  179. #
  180. # note this only caches with the non-keyword arguments
  181. def multiprocessing_cache(f):
  182. local_cache = {}
  183. manager = mp.Manager()
  184. global_cache = manager.dict()
  185. lock = mp.Lock()
  186. def multiprocessing_cache(*args, **kwargs):
  187. # check local cache?
  188. if args in local_cache:
  189. return local_cache[args]
  190. # check global cache?
  191. with lock:
  192. if args in global_cache:
  193. v = global_cache[args]
  194. local_cache[args] = v
  195. return v
  196. # fall back to calling the function
  197. v = f(*args, **kwargs)
  198. global_cache[args] = v
  199. local_cache[args] = v
  200. return v
  201. return multiprocessing_cache
  202. @multiprocessing_cache
  203. def collect_syms_and_lines(obj_path, *,
  204. objdump_tool=None,
  205. **args):
  206. symbol_pattern = re.compile(
  207. '^(?P<addr>[0-9a-fA-F]+)'
  208. '\s+.*'
  209. '\s+(?P<size>[0-9a-fA-F]+)'
  210. '\s+(?P<name>[^\s]+)\s*$')
  211. line_pattern = re.compile(
  212. '^\s+(?:'
  213. # matches dir/file table
  214. '(?P<no>[0-9]+)'
  215. '(?:\s+(?P<dir>[0-9]+))?'
  216. '\s+.*'
  217. '\s+(?P<path>[^\s]+)'
  218. # matches line opcodes
  219. '|' '\[[^\]]*\]\s+'
  220. '(?:'
  221. '(?P<op_special>Special)'
  222. '|' '(?P<op_copy>Copy)'
  223. '|' '(?P<op_end>End of Sequence)'
  224. '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
  225. '|' 'Line .*?to (?P<op_line>[0-9]+)'
  226. '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
  227. '|' '.' ')*'
  228. ')$', re.IGNORECASE)
  229. # figure out symbol addresses and file+line ranges
  230. syms = {}
  231. sym_at = []
  232. cmd = objdump_tool + ['-t', obj_path]
  233. if args.get('verbose'):
  234. print(' '.join(shlex.quote(c) for c in cmd))
  235. proc = sp.Popen(cmd,
  236. stdout=sp.PIPE,
  237. stderr=sp.PIPE if not args.get('verbose') else None,
  238. universal_newlines=True,
  239. errors='replace',
  240. close_fds=False)
  241. for line in proc.stdout:
  242. m = symbol_pattern.match(line)
  243. if m:
  244. name = m.group('name')
  245. addr = int(m.group('addr'), 16)
  246. size = int(m.group('size'), 16)
  247. # ignore zero-sized symbols
  248. if not size:
  249. continue
  250. # note multiple symbols can share a name
  251. if name not in syms:
  252. syms[name] = set()
  253. syms[name].add((addr, size))
  254. sym_at.append((addr, name, size))
  255. proc.wait()
  256. if proc.returncode != 0:
  257. if not args.get('verbose'):
  258. for line in proc.stderr:
  259. sys.stdout.write(line)
  260. # assume no debug-info on failure
  261. pass
  262. # sort and keep largest/first when duplicates
  263. sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
  264. sym_at_ = []
  265. for addr, name, size in sym_at:
  266. if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
  267. sym_at_.append((addr, name, size))
  268. sym_at = sym_at_
  269. # state machine for dwarf line numbers, note that objdump's
  270. # decodedline seems to have issues with multiple dir/file
  271. # tables, which is why we need this
  272. lines = []
  273. line_at = []
  274. dirs = {}
  275. files = {}
  276. op_file = 1
  277. op_line = 1
  278. op_addr = 0
  279. cmd = objdump_tool + ['--dwarf=rawline', obj_path]
  280. if args.get('verbose'):
  281. print(' '.join(shlex.quote(c) for c in cmd))
  282. proc = sp.Popen(cmd,
  283. stdout=sp.PIPE,
  284. stderr=sp.PIPE if not args.get('verbose') else None,
  285. universal_newlines=True,
  286. errors='replace',
  287. close_fds=False)
  288. for line in proc.stdout:
  289. m = line_pattern.match(line)
  290. if m:
  291. if m.group('no') and not m.group('dir'):
  292. # found a directory entry
  293. dirs[int(m.group('no'))] = m.group('path')
  294. elif m.group('no'):
  295. # found a file entry
  296. dir = int(m.group('dir'))
  297. if dir in dirs:
  298. files[int(m.group('no'))] = os.path.join(
  299. dirs[dir],
  300. m.group('path'))
  301. else:
  302. files[int(m.group('no'))] = m.group('path')
  303. else:
  304. # found a state machine update
  305. if m.group('op_file'):
  306. op_file = int(m.group('op_file'), 0)
  307. if m.group('op_line'):
  308. op_line = int(m.group('op_line'), 0)
  309. if m.group('op_addr'):
  310. op_addr = int(m.group('op_addr'), 0)
  311. if (m.group('op_special')
  312. or m.group('op_copy')
  313. or m.group('op_end')):
  314. file = os.path.abspath(files.get(op_file, '?'))
  315. lines.append((file, op_line, op_addr))
  316. line_at.append((op_addr, file, op_line))
  317. if m.group('op_end'):
  318. op_file = 1
  319. op_line = 1
  320. op_addr = 0
  321. proc.wait()
  322. if proc.returncode != 0:
  323. if not args.get('verbose'):
  324. for line in proc.stderr:
  325. sys.stdout.write(line)
  326. # assume no debug-info on failure
  327. pass
  328. # sort and keep first when duplicates
  329. lines.sort()
  330. lines_ = []
  331. for file, line, addr in lines:
  332. if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
  333. lines_.append((file, line, addr))
  334. lines = lines_
  335. # sort and keep first when duplicates
  336. line_at.sort()
  337. line_at_ = []
  338. for addr, file, line in line_at:
  339. if len(line_at_) == 0 or line_at_[-1][0] != addr:
  340. line_at_.append((addr, file, line))
  341. line_at = line_at_
  342. return syms, sym_at, lines, line_at
  343. def collect_decompressed(path, *,
  344. perf_tool=PERF_TOOL,
  345. sources=None,
  346. everything=False,
  347. propagate=0,
  348. depth=1,
  349. **args):
  350. sample_pattern = re.compile(
  351. '(?P<comm>\w+)'
  352. '\s+(?P<pid>\w+)'
  353. '\s+(?P<time>[\w.]+):'
  354. '\s*(?P<period>\w+)'
  355. '\s+(?P<event>[^:]+):')
  356. frame_pattern = re.compile(
  357. '\s+(?P<addr>\w+)'
  358. '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
  359. '\s+\((?P<dso>[^\)]+)\)')
  360. events = {
  361. 'cycles': 'cycles',
  362. 'branch-misses': 'bmisses',
  363. 'branches': 'branches',
  364. 'cache-misses': 'cmisses',
  365. 'cache-references': 'caches'}
  366. # note perf_tool may contain extra args
  367. cmd = perf_tool + [
  368. 'script',
  369. '-i%s' % path]
  370. if args.get('verbose'):
  371. print(' '.join(shlex.quote(c) for c in cmd))
  372. proc = sp.Popen(cmd,
  373. stdout=sp.PIPE,
  374. stderr=sp.PIPE if not args.get('verbose') else None,
  375. universal_newlines=True,
  376. errors='replace',
  377. close_fds=False)
  378. last_filtered = False
  379. last_event = ''
  380. last_period = 0
  381. last_stack = []
  382. deltas = co.defaultdict(lambda: {})
  383. syms_ = co.defaultdict(lambda: {})
  384. at_cache = {}
  385. results = {}
  386. def commit():
  387. # tail-recursively propagate measurements
  388. for i in range(len(last_stack)):
  389. results_ = results
  390. for j in reversed(range(i+1)):
  391. if i+1-j > depth:
  392. break
  393. # propagate
  394. name = last_stack[j]
  395. if name not in results_:
  396. results_[name] = (co.defaultdict(lambda: 0), {})
  397. results_[name][0][last_event] += last_period
  398. # recurse
  399. results_ = results_[name][1]
  400. for line in proc.stdout:
  401. # we need to process a lot of data, so wait to use regex as late
  402. # as possible
  403. if not line.startswith('\t'):
  404. if last_filtered:
  405. commit()
  406. last_filtered = False
  407. if line:
  408. m = sample_pattern.match(line)
  409. if m and m.group('event') in events:
  410. last_filtered = True
  411. last_event = m.group('event')
  412. last_period = int(m.group('period'), 0)
  413. last_stack = []
  414. elif last_filtered:
  415. m = frame_pattern.match(line)
  416. if m:
  417. # filter out internal/kernel functions
  418. if not everything and (
  419. m.group('sym').startswith('__')
  420. or m.group('sym').startswith('0')
  421. or m.group('sym').startswith('-')
  422. or m.group('sym').startswith('[')
  423. or m.group('dso').startswith('/usr/lib')):
  424. continue
  425. dso = m.group('dso')
  426. sym = m.group('sym')
  427. off = int(m.group('off'), 0) if m.group('off') else 0
  428. addr_ = int(m.group('addr'), 16)
  429. # get the syms/lines for the dso, this is cached
  430. syms, sym_at, lines, line_at = collect_syms_and_lines(
  431. dso,
  432. **args)
  433. # ASLR is tricky, we have symbols+offsets, but static symbols
  434. # means we may have multiple options for each symbol.
  435. #
  436. # To try to solve this, we use previous seen symbols to build
  437. # confidence for the correct ASLR delta. This means we may
  438. # guess incorrectly for early symbols, but this will only affect
  439. # a few samples.
  440. if sym in syms:
  441. sym_addr_ = addr_ - off
  442. # track possible deltas?
  443. for sym_addr, size in syms[sym]:
  444. delta = sym_addr - sym_addr_
  445. if delta not in deltas[dso]:
  446. deltas[dso][delta] = sum(
  447. abs(a_+delta - a)
  448. for s, (a_, _) in syms_[dso].items()
  449. for a, _ in syms[s])
  450. for delta in deltas[dso].keys():
  451. deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
  452. syms_[dso][sym] = sym_addr_, size
  453. # guess the best delta
  454. delta, _ = min(deltas[dso].items(),
  455. key=lambda x: (x[1], x[0]))
  456. addr = addr_ + delta
  457. # cached?
  458. if (dso,addr) in at_cache:
  459. cached = at_cache[(dso,addr)]
  460. if cached is None:
  461. # cache says to skip
  462. continue
  463. file, line = cached
  464. else:
  465. # find file+line
  466. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  467. if i > 0:
  468. _, file, line = line_at[i-1]
  469. else:
  470. file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
  471. # ignore filtered sources
  472. if sources is not None:
  473. if not any(
  474. os.path.abspath(file) == os.path.abspath(s)
  475. for s in sources):
  476. at_cache[(dso,addr)] = None
  477. continue
  478. else:
  479. # default to only cwd
  480. if not everything and not os.path.commonpath([
  481. os.getcwd(),
  482. os.path.abspath(file)]) == os.getcwd():
  483. at_cache[(dso,addr)] = None
  484. continue
  485. # simplify path
  486. if os.path.commonpath([
  487. os.getcwd(),
  488. os.path.abspath(file)]) == os.getcwd():
  489. file = os.path.relpath(file)
  490. else:
  491. file = os.path.abspath(file)
  492. at_cache[(dso,addr)] = file, line
  493. else:
  494. file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
  495. last_stack.append((file, sym, line))
  496. # stop propogating?
  497. if propagate and len(last_stack) >= propagate:
  498. commit()
  499. last_filtered = False
  500. if last_filtered:
  501. commit()
  502. proc.wait()
  503. if proc.returncode != 0:
  504. if not args.get('verbose'):
  505. for line in proc.stderr:
  506. sys.stdout.write(line)
  507. sys.exit(-1)
  508. # rearrange results into result type
  509. def to_results(results):
  510. results_ = []
  511. for name, (r, children) in results.items():
  512. results_.append(PerfResult(*name,
  513. **{events[k]: v for k, v in r.items()},
  514. children=to_results(children)))
  515. return results_
  516. return to_results(results)
  517. def collect_job(path, i, **args):
  518. # decompress into a temporary file, this is to work around
  519. # some limitations of perf
  520. with zipfile.ZipFile(path) as z:
  521. with z.open(i) as f:
  522. with tempfile.NamedTemporaryFile('wb') as g:
  523. shutil.copyfileobj(f, g)
  524. g.flush()
  525. return collect_decompressed(g.name, **args)
  526. def starapply(args):
  527. f, args, kwargs = args
  528. return f(*args, **kwargs)
  529. def collect(perf_paths, *,
  530. jobs=None,
  531. **args):
  532. # automatic job detection?
  533. if jobs == 0:
  534. jobs = len(os.sched_getaffinity(0))
  535. records = []
  536. for path in perf_paths:
  537. # each .perf file is actually a zip file containing perf files from
  538. # multiple runs
  539. with zipfile.ZipFile(path) as z:
  540. records.extend((path, i) for i in z.infolist())
  541. # we're dealing with a lot of data but also surprisingly
  542. # parallelizable
  543. if jobs is not None:
  544. results = []
  545. with mp.Pool(jobs) as p:
  546. for results_ in p.imap_unordered(
  547. starapply,
  548. ((collect_job, (path, i), args) for path, i in records)):
  549. results.extend(results_)
  550. else:
  551. results = []
  552. for path, i in records:
  553. results.extend(collect_job(path, i, **args))
  554. return results
  555. def fold(Result, results, *,
  556. by=None,
  557. defines=None,
  558. **_):
  559. if by is None:
  560. by = Result._by
  561. for k in it.chain(by or [], (k for k, _ in defines or [])):
  562. if k not in Result._by and k not in Result._fields:
  563. print("error: could not find field %r?" % k)
  564. sys.exit(-1)
  565. # filter by matching defines
  566. if defines is not None:
  567. results_ = []
  568. for r in results:
  569. if all(getattr(r, k) in vs for k, vs in defines):
  570. results_.append(r)
  571. results = results_
  572. # organize results into conflicts
  573. folding = co.OrderedDict()
  574. for r in results:
  575. name = tuple(getattr(r, k) for k in by)
  576. if name not in folding:
  577. folding[name] = []
  578. folding[name].append(r)
  579. # merge conflicts
  580. folded = []
  581. for name, rs in folding.items():
  582. folded.append(sum(rs[1:], start=rs[0]))
  583. # fold recursively
  584. folded_ = []
  585. for r in folded:
  586. folded_.append(r._replace(children=fold(
  587. Result, r.children,
  588. by=by,
  589. defines=defines)))
  590. folded = folded_
  591. return folded
  592. def table(Result, results, diff_results=None, *,
  593. by=None,
  594. fields=None,
  595. sort=None,
  596. summary=False,
  597. all=False,
  598. percent=False,
  599. depth=1,
  600. **_):
  601. all_, all = all, __builtins__.all
  602. if by is None:
  603. by = Result._by
  604. if fields is None:
  605. fields = Result._fields
  606. types = Result._types
  607. # fold again
  608. results = fold(Result, results, by=by)
  609. if diff_results is not None:
  610. diff_results = fold(Result, diff_results, by=by)
  611. # organize by name
  612. table = {
  613. ','.join(str(getattr(r, k) or '') for k in by): r
  614. for r in results}
  615. diff_table = {
  616. ','.join(str(getattr(r, k) or '') for k in by): r
  617. for r in diff_results or []}
  618. names = list(table.keys() | diff_table.keys())
  619. # sort again, now with diff info, note that python's sort is stable
  620. names.sort()
  621. if diff_results is not None:
  622. names.sort(key=lambda n: tuple(
  623. types[k].ratio(
  624. getattr(table.get(n), k, None),
  625. getattr(diff_table.get(n), k, None))
  626. for k in fields),
  627. reverse=True)
  628. if sort:
  629. for k, reverse in reversed(sort):
  630. names.sort(key=lambda n: (getattr(table[n], k),)
  631. if getattr(table.get(n), k, None) is not None else (),
  632. reverse=reverse ^ (not k or k in Result._fields))
  633. # build up our lines
  634. lines = []
  635. # header
  636. header = []
  637. header.append('%s%s' % (
  638. ','.join(by),
  639. ' (%d added, %d removed)' % (
  640. sum(1 for n in table if n not in diff_table),
  641. sum(1 for n in diff_table if n not in table))
  642. if diff_results is not None and not percent else '')
  643. if not summary else '')
  644. if diff_results is None:
  645. for k in fields:
  646. header.append(k)
  647. elif percent:
  648. for k in fields:
  649. header.append(k)
  650. else:
  651. for k in fields:
  652. header.append('o'+k)
  653. for k in fields:
  654. header.append('n'+k)
  655. for k in fields:
  656. header.append('d'+k)
  657. header.append('')
  658. lines.append(header)
  659. def table_entry(name, r, diff_r=None, ratios=[]):
  660. entry = []
  661. entry.append(name)
  662. if diff_results is None:
  663. for k in fields:
  664. entry.append(getattr(r, k).table()
  665. if getattr(r, k, None) is not None
  666. else types[k].none)
  667. elif percent:
  668. for k in fields:
  669. entry.append(getattr(r, k).diff_table()
  670. if getattr(r, k, None) is not None
  671. else types[k].diff_none)
  672. else:
  673. for k in fields:
  674. entry.append(getattr(diff_r, k).diff_table()
  675. if getattr(diff_r, k, None) is not None
  676. else types[k].diff_none)
  677. for k in fields:
  678. entry.append(getattr(r, k).diff_table()
  679. if getattr(r, k, None) is not None
  680. else types[k].diff_none)
  681. for k in fields:
  682. entry.append(types[k].diff_diff(
  683. getattr(r, k, None),
  684. getattr(diff_r, k, None)))
  685. if diff_results is None:
  686. entry.append('')
  687. elif percent:
  688. entry.append(' (%s)' % ', '.join(
  689. '+∞%' if t == +m.inf
  690. else '-∞%' if t == -m.inf
  691. else '%+.1f%%' % (100*t)
  692. for t in ratios))
  693. else:
  694. entry.append(' (%s)' % ', '.join(
  695. '+∞%' if t == +m.inf
  696. else '-∞%' if t == -m.inf
  697. else '%+.1f%%' % (100*t)
  698. for t in ratios
  699. if t)
  700. if any(ratios) else '')
  701. return entry
  702. # entries
  703. if not summary:
  704. for name in names:
  705. r = table.get(name)
  706. if diff_results is None:
  707. diff_r = None
  708. ratios = None
  709. else:
  710. diff_r = diff_table.get(name)
  711. ratios = [
  712. types[k].ratio(
  713. getattr(r, k, None),
  714. getattr(diff_r, k, None))
  715. for k in fields]
  716. if not all_ and not any(ratios):
  717. continue
  718. lines.append(table_entry(name, r, diff_r, ratios))
  719. # total
  720. r = next(iter(fold(Result, results, by=[])), None)
  721. if diff_results is None:
  722. diff_r = None
  723. ratios = None
  724. else:
  725. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  726. ratios = [
  727. types[k].ratio(
  728. getattr(r, k, None),
  729. getattr(diff_r, k, None))
  730. for k in fields]
  731. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  732. # find the best widths, note that column 0 contains the names and column -1
  733. # the ratios, so those are handled a bit differently
  734. widths = [
  735. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  736. for w, i in zip(
  737. it.chain([23], it.repeat(7)),
  738. range(len(lines[0])-1))]
  739. # adjust the name width based on the expected call depth, though
  740. # note this doesn't really work with unbounded recursion
  741. if not summary and not m.isinf(depth):
  742. widths[0] += 4*(depth-1)
  743. # print the tree recursively
  744. print('%-*s %s%s' % (
  745. widths[0], lines[0][0],
  746. ' '.join('%*s' % (w, x)
  747. for w, x in zip(widths[1:], lines[0][1:-1])),
  748. lines[0][-1]))
  749. if not summary:
  750. def recurse(results_, depth_, prefixes=('', '', '', '')):
  751. # rebuild our tables at each layer
  752. table_ = {
  753. ','.join(str(getattr(r, k) or '') for k in by): r
  754. for r in results_}
  755. names_ = list(table_.keys())
  756. # sort again at each layer, keep in mind the numbers are
  757. # changing as we descend
  758. names_.sort()
  759. if sort:
  760. for k, reverse in reversed(sort):
  761. names_.sort(key=lambda n: (getattr(table_[n], k),)
  762. if getattr(table_.get(n), k, None) is not None else (),
  763. reverse=reverse ^ (not k or k in Result._fields))
  764. for i, name in enumerate(names_):
  765. r = table_[name]
  766. is_last = (i == len(names_)-1)
  767. print('%s%-*s %s' % (
  768. prefixes[0+is_last],
  769. widths[0] - (
  770. len(prefixes[0+is_last])
  771. if not m.isinf(depth) else 0),
  772. name,
  773. ' '.join('%*s' % (w, x)
  774. for w, x in zip(
  775. widths[1:],
  776. table_entry(name, r)[1:]))))
  777. # recurse?
  778. if depth_ > 1:
  779. recurse(
  780. r.children,
  781. depth_-1,
  782. (prefixes[2+is_last] + "|-> ",
  783. prefixes[2+is_last] + "'-> ",
  784. prefixes[2+is_last] + "| ",
  785. prefixes[2+is_last] + " "))
  786. # we have enough going on with diffing to make the top layer
  787. # a special case
  788. for name, line in zip(names, lines[1:-1]):
  789. print('%-*s %s%s' % (
  790. widths[0], line[0],
  791. ' '.join('%*s' % (w, x)
  792. for w, x in zip(widths[1:], line[1:-1])),
  793. line[-1]))
  794. if name in table and depth > 1:
  795. recurse(
  796. table[name].children,
  797. depth-1,
  798. ("|-> ",
  799. "'-> ",
  800. "| ",
  801. " "))
  802. print('%-*s %s%s' % (
  803. widths[0], lines[-1][0],
  804. ' '.join('%*s' % (w, x)
  805. for w, x in zip(widths[1:], lines[-1][1:-1])),
  806. lines[-1][-1]))
  807. def annotate(Result, results, *,
  808. annotate=None,
  809. threshold=None,
  810. branches=False,
  811. caches=False,
  812. **args):
  813. # figure out the threshold
  814. if threshold is None:
  815. t0, t1 = THRESHOLD
  816. elif len(threshold) == 1:
  817. t0, t1 = threshold[0], threshold[0]
  818. else:
  819. t0, t1 = threshold
  820. t0, t1 = min(t0, t1), max(t0, t1)
  821. if not branches and not caches:
  822. tk = 'cycles'
  823. elif branches:
  824. tk = 'bmisses'
  825. else:
  826. tk = 'cmisses'
  827. # find max cycles
  828. max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
  829. for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
  830. # flatten to line info
  831. results = fold(Result, results, by=['file', 'line'])
  832. table = {r.line: r for r in results if r.file == path}
  833. # calculate spans to show
  834. if not annotate:
  835. spans = []
  836. last = None
  837. func = None
  838. for line, r in sorted(table.items()):
  839. if float(getattr(r, tk)) / max_ >= t0:
  840. if last is not None and line - last.stop <= args['context']:
  841. last = range(
  842. last.start,
  843. line+1+args['context'])
  844. else:
  845. if last is not None:
  846. spans.append((last, func))
  847. last = range(
  848. line-args['context'],
  849. line+1+args['context'])
  850. func = r.function
  851. if last is not None:
  852. spans.append((last, func))
  853. with open(path) as f:
  854. skipped = False
  855. for i, line in enumerate(f):
  856. # skip lines not in spans?
  857. if not annotate and not any(i+1 in s for s, _ in spans):
  858. skipped = True
  859. continue
  860. if skipped:
  861. skipped = False
  862. print('%s@@ %s:%d: %s @@%s' % (
  863. '\x1b[36m' if args['color'] else '',
  864. path,
  865. i+1,
  866. next(iter(f for _, f in spans)),
  867. '\x1b[m' if args['color'] else ''))
  868. # build line
  869. if line.endswith('\n'):
  870. line = line[:-1]
  871. r = table.get(i+1)
  872. if r is not None and (
  873. float(r.cycles) > 0
  874. if not branches and not caches
  875. else float(r.bmisses) > 0 or float(r.branches) > 0
  876. if branches
  877. else float(r.cmisses) > 0 or float(r.caches) > 0):
  878. line = '%-*s // %s' % (
  879. args['width'],
  880. line,
  881. '%s cycles' % r.cycles
  882. if not branches and not caches
  883. else '%s bmisses, %s branches' % (r.bmisses, r.branches)
  884. if branches
  885. else '%s cmisses, %s caches' % (r.cmisses, r.caches))
  886. if args['color']:
  887. if float(getattr(r, tk)) / max_ >= t1:
  888. line = '\x1b[1;31m%s\x1b[m' % line
  889. elif float(getattr(r, tk)) / max_ >= t0:
  890. line = '\x1b[35m%s\x1b[m' % line
  891. print(line)
  892. def report(perf_paths, *,
  893. by=None,
  894. fields=None,
  895. defines=None,
  896. sort=None,
  897. branches=False,
  898. caches=False,
  899. **args):
  900. # figure out what color should be
  901. if args.get('color') == 'auto':
  902. args['color'] = sys.stdout.isatty()
  903. elif args.get('color') == 'always':
  904. args['color'] = True
  905. else:
  906. args['color'] = False
  907. # depth of 0 == m.inf
  908. if args.get('depth') == 0:
  909. args['depth'] = m.inf
  910. # find sizes
  911. if not args.get('use', None):
  912. results = collect(perf_paths, **args)
  913. else:
  914. results = []
  915. with openio(args['use']) as f:
  916. reader = csv.DictReader(f, restval='')
  917. for r in reader:
  918. try:
  919. results.append(PerfResult(
  920. **{k: r[k] for k in PerfResult._by
  921. if k in r and r[k].strip()},
  922. **{k: r['perf_'+k] for k in PerfResult._fields
  923. if 'perf_'+k in r and r['perf_'+k].strip()}))
  924. except TypeError:
  925. pass
  926. # fold
  927. results = fold(PerfResult, results, by=by, defines=defines)
  928. # sort, note that python's sort is stable
  929. results.sort()
  930. if sort:
  931. for k, reverse in reversed(sort):
  932. results.sort(key=lambda r: (getattr(r, k),)
  933. if getattr(r, k) is not None else (),
  934. reverse=reverse ^ (not k or k in PerfResult._fields))
  935. # write results to CSV
  936. if args.get('output'):
  937. with openio(args['output'], 'w') as f:
  938. writer = csv.DictWriter(f,
  939. (by if by is not None else PerfResult._by)
  940. + ['perf_'+k for k in PerfResult._fields])
  941. writer.writeheader()
  942. for r in results:
  943. writer.writerow(
  944. {k: getattr(r, k)
  945. for k in (by if by is not None else PerfResult._by)}
  946. | {'perf_'+k: getattr(r, k)
  947. for k in PerfResult._fields})
  948. # find previous results?
  949. if args.get('diff'):
  950. diff_results = []
  951. try:
  952. with openio(args['diff']) as f:
  953. reader = csv.DictReader(f, restval='')
  954. for r in reader:
  955. try:
  956. diff_results.append(PerfResult(
  957. **{k: r[k] for k in PerfResult._by
  958. if k in r and r[k].strip()},
  959. **{k: r['perf_'+k] for k in PerfResult._fields
  960. if 'perf_'+k in r and r['perf_'+k].strip()}))
  961. except TypeError:
  962. pass
  963. except FileNotFoundError:
  964. pass
  965. # fold
  966. diff_results = fold(PerfResult, diff_results, by=by, defines=defines)
  967. # print table
  968. if not args.get('quiet'):
  969. if args.get('annotate') or args.get('threshold'):
  970. # annotate sources
  971. annotate(PerfResult, results,
  972. branches=branches,
  973. caches=caches,
  974. **args)
  975. else:
  976. # print table
  977. table(PerfResult, results,
  978. diff_results if args.get('diff') else None,
  979. by=by if by is not None else ['function'],
  980. fields=fields if fields is not None
  981. else ['cycles'] if not branches and not caches
  982. else ['bmisses', 'branches'] if branches
  983. else ['cmisses', 'caches'],
  984. sort=sort,
  985. **args)
  986. def main(**args):
  987. if args.get('record'):
  988. return record(**args)
  989. else:
  990. return report(**args)
  991. if __name__ == "__main__":
  992. import argparse
  993. import sys
  994. # bit of a hack, but parse_intermixed_args and REMAINDER are
  995. # incompatible, so we need to figure out what we want before running
  996. # argparse
  997. if '-R' in sys.argv or '--record' in sys.argv:
  998. nargs = argparse.REMAINDER
  999. else:
  1000. nargs = '*'
  1001. argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None
  1002. argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
  1003. parser = argparse.ArgumentParser(
  1004. description="Aggregate and report Linux perf results.",
  1005. allow_abbrev=False,
  1006. conflict_handler='ignore')
  1007. parser.add_argument(
  1008. 'perf_paths',
  1009. nargs=nargs,
  1010. help="Input *.perf files.")
  1011. parser.add_argument(
  1012. '-v', '--verbose',
  1013. action='store_true',
  1014. help="Output commands that run behind the scenes.")
  1015. parser.add_argument(
  1016. '-q', '--quiet',
  1017. action='store_true',
  1018. help="Don't show anything, useful with -o.")
  1019. parser.add_argument(
  1020. '-o', '--output',
  1021. help="Specify CSV file to store results.")
  1022. parser.add_argument(
  1023. '-u', '--use',
  1024. help="Don't parse anything, use this CSV file.")
  1025. parser.add_argument(
  1026. '-d', '--diff',
  1027. help="Specify CSV file to diff against.")
  1028. parser.add_argument(
  1029. '-a', '--all',
  1030. action='store_true',
  1031. help="Show all, not just the ones that changed.")
  1032. parser.add_argument(
  1033. '-p', '--percent',
  1034. action='store_true',
  1035. help="Only show percentage change, not a full diff.")
  1036. parser.add_argument(
  1037. '-b', '--by',
  1038. action='append',
  1039. choices=PerfResult._by,
  1040. help="Group by this field.")
  1041. parser.add_argument(
  1042. '-f', '--field',
  1043. dest='fields',
  1044. action='append',
  1045. choices=PerfResult._fields,
  1046. help="Show this field.")
  1047. parser.add_argument(
  1048. '-D', '--define',
  1049. dest='defines',
  1050. action='append',
  1051. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  1052. help="Only include results where this field is this value.")
  1053. class AppendSort(argparse.Action):
  1054. def __call__(self, parser, namespace, value, option):
  1055. if namespace.sort is None:
  1056. namespace.sort = []
  1057. namespace.sort.append((value, True if option == '-S' else False))
  1058. parser.add_argument(
  1059. '-s', '--sort',
  1060. action=AppendSort,
  1061. help="Sort by this fields.")
  1062. parser.add_argument(
  1063. '-S', '--reverse-sort',
  1064. action=AppendSort,
  1065. help="Sort by this fields, but backwards.")
  1066. parser.add_argument(
  1067. '-Y', '--summary',
  1068. action='store_true',
  1069. help="Only show the total.")
  1070. parser.add_argument(
  1071. '-F', '--source',
  1072. dest='sources',
  1073. action='append',
  1074. help="Only consider definitions in this file. Defaults to anything "
  1075. "in the current directory.")
  1076. parser.add_argument(
  1077. '--everything',
  1078. action='store_true',
  1079. help="Include builtin and libc specific symbols.")
  1080. parser.add_argument(
  1081. '--branches',
  1082. action='store_true',
  1083. help="Show branches and branch misses.")
  1084. parser.add_argument(
  1085. '--caches',
  1086. action='store_true',
  1087. help="Show cache accesses and cache misses.")
  1088. parser.add_argument(
  1089. '-P', '--propagate',
  1090. type=lambda x: int(x, 0),
  1091. help="Depth to propagate samples up the call-stack. 0 propagates up "
  1092. "to the entry point, 1 does no propagation. Defaults to 0.")
  1093. parser.add_argument(
  1094. '-Z', '--depth',
  1095. nargs='?',
  1096. type=lambda x: int(x, 0),
  1097. const=0,
  1098. help="Depth of function calls to show. 0 shows all calls but may not "
  1099. "terminate!")
  1100. parser.add_argument(
  1101. '-A', '--annotate',
  1102. action='store_true',
  1103. help="Show source files annotated with coverage info.")
  1104. parser.add_argument(
  1105. '-T', '--threshold',
  1106. nargs='?',
  1107. type=lambda x: tuple(float(x) for x in x.split(',')),
  1108. const=THRESHOLD,
  1109. help="Show lines with samples above this threshold as a percent of "
  1110. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1111. parser.add_argument(
  1112. '-c', '--context',
  1113. type=lambda x: int(x, 0),
  1114. default=3,
  1115. help="Show n additional lines of context. Defaults to 3.")
  1116. parser.add_argument(
  1117. '-W', '--width',
  1118. type=lambda x: int(x, 0),
  1119. default=80,
  1120. help="Assume source is styled with this many columns. Defaults to 80.")
  1121. parser.add_argument(
  1122. '--color',
  1123. choices=['never', 'always', 'auto'],
  1124. default='auto',
  1125. help="When to use terminal colors. Defaults to 'auto'.")
  1126. parser.add_argument(
  1127. '-j', '--jobs',
  1128. nargs='?',
  1129. type=lambda x: int(x, 0),
  1130. const=0,
  1131. help="Number of processes to use. 0 spawns one process per core.")
  1132. parser.add_argument(
  1133. '--perf-tool',
  1134. type=lambda x: x.split(),
  1135. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1136. parser.add_argument(
  1137. '--objdump-tool',
  1138. type=lambda x: x.split(),
  1139. default=OBJDUMP_TOOL,
  1140. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  1141. # record flags
  1142. record_parser = parser.add_argument_group('record options')
  1143. record_parser.add_argument(
  1144. 'command',
  1145. nargs=nargs,
  1146. help="Command to run.")
  1147. record_parser.add_argument(
  1148. '-R', '--record',
  1149. action='store_true',
  1150. help="Run a command and aggregate perf measurements.")
  1151. record_parser.add_argument(
  1152. '-o', '--output',
  1153. help="Output file. Uses flock to synchronize. This is stored as a "
  1154. "zip-file of multiple perf results.")
  1155. record_parser.add_argument(
  1156. '--perf-freq',
  1157. help="perf sampling frequency. This is passed directly to perf. "
  1158. "Defaults to %r." % PERF_FREQ)
  1159. record_parser.add_argument(
  1160. '--perf-period',
  1161. help="perf sampling period. This is passed directly to perf.")
  1162. record_parser.add_argument(
  1163. '--perf-events',
  1164. help="perf events to record. This is passed directly to perf. "
  1165. "Defaults to %r." % PERF_EVENTS)
  1166. record_parser.add_argument(
  1167. '--perf-tool',
  1168. type=lambda x: x.split(),
  1169. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1170. # avoid intermixed/REMAINDER conflict, see above
  1171. if nargs == argparse.REMAINDER:
  1172. args = parser.parse_args()
  1173. else:
  1174. args = parser.parse_intermixed_args()
  1175. # perf_paths/command overlap, so need to do some munging here
  1176. args.command = args.perf_paths
  1177. if args.record:
  1178. if not args.command:
  1179. print('error: no command specified?')
  1180. sys.exit(-1)
  1181. if not args.output:
  1182. print('error: no output file specified?')
  1183. sys.exit(-1)
  1184. sys.exit(main(**{k: v
  1185. for k, v in vars(args).items()
  1186. if v is not None}))