perf.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302
  1. #!/usr/bin/env python3
  2. #
  3. # Script to aggregate and report Linux perf results.
  4. #
  5. # Example:
  6. # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
  7. # ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles
  8. #
  9. # Copyright (c) 2022, The littlefs authors.
  10. # SPDX-License-Identifier: BSD-3-Clause
  11. #
  12. import bisect
  13. import collections as co
  14. import csv
  15. import errno
  16. import fcntl
  17. import functools as ft
  18. import glob
  19. import itertools as it
  20. import math as m
  21. import multiprocessing as mp
  22. import os
  23. import re
  24. import shlex
  25. import shutil
  26. import subprocess as sp
  27. import tempfile
  28. import zipfile
  29. # TODO support non-zip perf results?
  30. PERF_PATHS = ['*.perf']
  31. PERF_TOOL = ['perf']
  32. PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
  33. PERF_FREQ = 100
  34. OBJDUMP_TOOL = ['objdump']
  35. THRESHOLD = (0.5, 0.85)
  36. # integer fields
  37. class Int(co.namedtuple('Int', 'x')):
  38. __slots__ = ()
  39. def __new__(cls, x=0):
  40. if isinstance(x, Int):
  41. return x
  42. if isinstance(x, str):
  43. try:
  44. x = int(x, 0)
  45. except ValueError:
  46. # also accept +-∞ and +-inf
  47. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  48. x = m.inf
  49. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  50. x = -m.inf
  51. else:
  52. raise
  53. assert isinstance(x, int) or m.isinf(x), x
  54. return super().__new__(cls, x)
  55. def __str__(self):
  56. if self.x == m.inf:
  57. return '∞'
  58. elif self.x == -m.inf:
  59. return '-∞'
  60. else:
  61. return str(self.x)
  62. def __int__(self):
  63. assert not m.isinf(self.x)
  64. return self.x
  65. def __float__(self):
  66. return float(self.x)
  67. none = '%7s' % '-'
  68. def table(self):
  69. return '%7s' % (self,)
  70. diff_none = '%7s' % '-'
  71. diff_table = table
  72. def diff_diff(self, other):
  73. new = self.x if self else 0
  74. old = other.x if other else 0
  75. diff = new - old
  76. if diff == +m.inf:
  77. return '%7s' % '+∞'
  78. elif diff == -m.inf:
  79. return '%7s' % '-∞'
  80. else:
  81. return '%+7d' % diff
  82. def ratio(self, other):
  83. new = self.x if self else 0
  84. old = other.x if other else 0
  85. if m.isinf(new) and m.isinf(old):
  86. return 0.0
  87. elif m.isinf(new):
  88. return +m.inf
  89. elif m.isinf(old):
  90. return -m.inf
  91. elif not old and not new:
  92. return 0.0
  93. elif not old:
  94. return 1.0
  95. else:
  96. return (new-old) / old
  97. def __add__(self, other):
  98. return self.__class__(self.x + other.x)
  99. def __sub__(self, other):
  100. return self.__class__(self.x - other.x)
  101. def __mul__(self, other):
  102. return self.__class__(self.x * other.x)
  103. # perf results
  104. class PerfResult(co.namedtuple('PerfResult', [
  105. 'file', 'function', 'line',
  106. 'cycles', 'bmisses', 'branches', 'cmisses', 'caches',
  107. 'children'])):
  108. _by = ['file', 'function', 'line']
  109. _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
  110. _types = {
  111. 'cycles': Int,
  112. 'bmisses': Int, 'branches': Int,
  113. 'cmisses': Int, 'caches': Int}
  114. __slots__ = ()
  115. def __new__(cls, file='', function='', line=0,
  116. cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
  117. children=[]):
  118. return super().__new__(cls, file, function, int(Int(line)),
  119. Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches),
  120. children)
  121. def __add__(self, other):
  122. return PerfResult(self.file, self.function, self.line,
  123. self.cycles + other.cycles,
  124. self.bmisses + other.bmisses,
  125. self.branches + other.branches,
  126. self.cmisses + other.cmisses,
  127. self.caches + other.caches,
  128. self.children + other.children)
  129. def openio(path, mode='r'):
  130. if path == '-':
  131. if mode == 'r':
  132. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  133. else:
  134. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  135. else:
  136. return open(path, mode)
  137. # run perf as a subprocess, storing measurements into a zip file
  138. def record(command, *,
  139. output=None,
  140. perf_freq=PERF_FREQ,
  141. perf_period=None,
  142. perf_events=PERF_EVENTS,
  143. perf_tool=PERF_TOOL,
  144. **args):
  145. if not command:
  146. print('error: no command specified?')
  147. sys.exit(-1)
  148. if not output:
  149. print('error: no output file specified?')
  150. sys.exit(-1)
  151. # create a temporary file for perf to write to, as far as I can tell
  152. # this is strictly needed because perf's pipe-mode only works with stdout
  153. with tempfile.NamedTemporaryFile('rb') as f:
  154. # figure out our perf invocation
  155. perf = perf_tool + list(filter(None, [
  156. 'record',
  157. '-F%s' % perf_freq
  158. if perf_freq is not None
  159. and perf_period is None else None,
  160. '-c%s' % perf_period
  161. if perf_period is not None else None,
  162. '-B',
  163. '-g',
  164. '--all-user',
  165. '-e%s' % perf_events,
  166. '-o%s' % f.name]))
  167. # run our command
  168. try:
  169. if args.get('verbose'):
  170. print(' '.join(shlex.quote(c) for c in perf + command))
  171. err = sp.call(perf + command, close_fds=False)
  172. except KeyboardInterrupt:
  173. err = errno.EOWNERDEAD
  174. # synchronize access
  175. z = os.open(output, os.O_RDWR | os.O_CREAT)
  176. fcntl.flock(z, fcntl.LOCK_EX)
  177. # copy measurements into our zip file
  178. with os.fdopen(z, 'r+b') as z:
  179. with zipfile.ZipFile(z, 'a',
  180. compression=zipfile.ZIP_DEFLATED,
  181. compresslevel=1) as z:
  182. with z.open('perf.%d' % os.getpid(), 'w') as g:
  183. shutil.copyfileobj(f, g)
  184. # forward the return code
  185. return err
  186. def collect_decompressed(path, *,
  187. perf_tool=PERF_TOOL,
  188. everything=False,
  189. propagate=0,
  190. depth=1,
  191. **args):
  192. sample_pattern = re.compile(
  193. '(?P<comm>\w+)'
  194. '\s+(?P<pid>\w+)'
  195. '\s+(?P<time>[\w.]+):'
  196. '\s*(?P<period>\w+)'
  197. '\s+(?P<event>[^:]+):')
  198. frame_pattern = re.compile(
  199. '\s+(?P<addr>\w+)'
  200. '\s+(?P<sym>[^\s]+)'
  201. '\s+\((?P<dso>[^\)]+)\)')
  202. events = {
  203. 'cycles': 'cycles',
  204. 'branch-misses': 'bmisses',
  205. 'branches': 'branches',
  206. 'cache-misses': 'cmisses',
  207. 'cache-references': 'caches'}
  208. # note perf_tool may contain extra args
  209. cmd = perf_tool + [
  210. 'script',
  211. '-i%s' % path]
  212. if args.get('verbose'):
  213. print(' '.join(shlex.quote(c) for c in cmd))
  214. proc = sp.Popen(cmd,
  215. stdout=sp.PIPE,
  216. stderr=sp.PIPE if not args.get('verbose') else None,
  217. universal_newlines=True,
  218. errors='replace',
  219. close_fds=False)
  220. last_filtered = False
  221. last_event = ''
  222. last_period = 0
  223. last_stack = []
  224. results = {}
  225. def commit():
  226. # tail-recursively propagate measurements
  227. for i in range(len(last_stack)):
  228. results_ = results
  229. for j in reversed(range(i+1)):
  230. if i+1-j > depth:
  231. break
  232. # propagate
  233. name = last_stack[j]
  234. if name not in results_:
  235. results_[name] = (co.defaultdict(lambda: 0), {})
  236. results_[name][0][last_event] += last_period
  237. # recurse
  238. results_ = results_[name][1]
  239. for line in proc.stdout:
  240. # we need to process a lot of data, so wait to use regex as late
  241. # as possible
  242. if not line:
  243. continue
  244. if not line.startswith('\t'):
  245. m = sample_pattern.match(line)
  246. if m:
  247. if last_stack:
  248. commit()
  249. last_event = m.group('event')
  250. last_filtered = last_event in events
  251. last_period = int(m.group('period'), 0)
  252. last_stack = []
  253. elif last_filtered:
  254. m = frame_pattern.match(line)
  255. if m:
  256. # filter out internal/kernel functions
  257. if not everything and (
  258. m.group('sym').startswith('__')
  259. or m.group('dso').startswith('/usr/lib')
  260. or not m.group('sym')[:1].isalpha()):
  261. continue
  262. last_stack.append((
  263. m.group('dso'),
  264. m.group('sym'),
  265. int(m.group('addr'), 16)))
  266. # stop propogating?
  267. if propagate and len(last_stack) >= propagate:
  268. last_filtered = False
  269. if last_stack:
  270. commit()
  271. proc.wait()
  272. if proc.returncode != 0:
  273. if not args.get('verbose'):
  274. for line in proc.stderr:
  275. sys.stdout.write(line)
  276. sys.exit(-1)
  277. # rearrange results into result type
  278. def to_results(results):
  279. results_ = []
  280. for name, (r, children) in results.items():
  281. results_.append(PerfResult(*name,
  282. **{events[k]: v for k, v in r.items()},
  283. children=to_results(children)))
  284. return results_
  285. return to_results(results)
  286. def collect_job(path, i, **args):
  287. # decompress into a temporary file, this is to work around
  288. # some limitations of perf
  289. with zipfile.ZipFile(path) as z:
  290. with z.open(i) as f:
  291. with tempfile.NamedTemporaryFile('wb') as g:
  292. shutil.copyfileobj(f, g)
  293. g.flush()
  294. return collect_decompressed(g.name, **args)
  295. def starapply(args):
  296. f, args, kwargs = args
  297. return f(*args, **kwargs)
  298. def collect(paths, *,
  299. jobs=None,
  300. objdump_tool=None,
  301. sources=None,
  302. everything=False,
  303. **args):
  304. symbol_pattern = re.compile(
  305. '^(?P<addr>[0-9a-fA-F]+)\s.*\s(?P<name>[^\s]+)\s*$')
  306. line_pattern = re.compile(
  307. '^\s+(?:'
  308. # matches dir/file table
  309. '(?P<no>[0-9]+)\s+'
  310. '(?:(?P<dir>[0-9]+)\s+)?'
  311. '.*\s+'
  312. '(?P<path>[^\s]+)'
  313. # matches line opcodes
  314. '|' '\[[^\]]*\]\s+'
  315. '(?:'
  316. '(?P<op_special>Special)'
  317. '|' '(?P<op_copy>Copy)'
  318. '|' '(?P<op_end>End of Sequence)'
  319. '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
  320. '|' 'Line .*?to (?P<op_line>[0-9]+)'
  321. '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
  322. '|' '.' ')*'
  323. ')$', re.IGNORECASE)
  324. records = []
  325. for path in paths:
  326. # each .perf file is actually a zip file containing perf files from
  327. # multiple runs
  328. with zipfile.ZipFile(path) as z:
  329. records.extend((path, i) for i in z.infolist())
  330. # we're dealing with a lot of data but also surprisingly
  331. # parallelizable
  332. dsos = {}
  333. results = []
  334. with mp.Pool(jobs or len(os.sched_getaffinity(0))) as p:
  335. for results_ in p.imap_unordered(
  336. starapply,
  337. ((collect_job, (path, i), dict(
  338. everything=everything,
  339. **args))
  340. for path, i in records)):
  341. # organize by dso
  342. results__ = {}
  343. for r in results_:
  344. if r.file not in results__:
  345. results__[r.file] = []
  346. results__[r.file].append(r)
  347. results_ = results__
  348. for dso, results_ in results_.items():
  349. if dso not in dsos:
  350. # find file+line ranges for dsos
  351. #
  352. # do this here so we only process each dso once
  353. syms = {}
  354. sym_at = []
  355. cmd = objdump_tool + ['-t', dso]
  356. if args.get('verbose'):
  357. print(' '.join(shlex.quote(c) for c in cmd))
  358. proc = sp.Popen(cmd,
  359. stdout=sp.PIPE,
  360. stderr=sp.PIPE if not args.get('verbose') else None,
  361. universal_newlines=True,
  362. errors='replace',
  363. close_fds=False)
  364. for line in proc.stdout:
  365. m = symbol_pattern.match(line)
  366. if m:
  367. name = m.group('name')
  368. addr = int(m.group('addr'), 16)
  369. # note multiple symbols can share a name
  370. if name not in syms:
  371. syms[name] = set()
  372. syms[name].add(addr)
  373. sym_at.append((addr, name))
  374. proc.wait()
  375. if proc.returncode != 0:
  376. if not args.get('verbose'):
  377. for line in proc.stderr:
  378. sys.stdout.write(line)
  379. # assume no debug-info on failure
  380. pass
  381. # sort and keep first when duplicates
  382. sym_at.sort()
  383. sym_at_ = []
  384. for addr, name in sym_at:
  385. if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
  386. sym_at_.append((addr, name))
  387. sym_at = sym_at_
  388. # state machine for dwarf line numbers, note that objdump's
  389. # decodedline seems to have issues with multiple dir/file
  390. # tables, which is why we need this
  391. line_at = []
  392. dirs = {}
  393. files = {}
  394. op_file = 1
  395. op_line = 1
  396. op_addr = 0
  397. cmd = objdump_tool + ['--dwarf=rawline', dso]
  398. if args.get('verbose'):
  399. print(' '.join(shlex.quote(c) for c in cmd))
  400. proc = sp.Popen(cmd,
  401. stdout=sp.PIPE,
  402. stderr=sp.PIPE if not args.get('verbose') else None,
  403. universal_newlines=True,
  404. errors='replace',
  405. close_fds=False)
  406. for line in proc.stdout:
  407. m = line_pattern.match(line)
  408. if m:
  409. if m.group('no') and not m.group('dir'):
  410. # found a directory entry
  411. dirs[int(m.group('no'))] = m.group('path')
  412. elif m.group('no'):
  413. # found a file entry
  414. dir = int(m.group('dir'))
  415. if dir in dirs:
  416. files[int(m.group('no'))] = os.path.join(
  417. dirs[dir],
  418. m.group('path'))
  419. else:
  420. files[int(m.group('no'))] = m.group('path')
  421. else:
  422. # found a state machine update
  423. if m.group('op_file'):
  424. op_file = int(m.group('op_file'), 0)
  425. if m.group('op_line'):
  426. op_line = int(m.group('op_line'), 0)
  427. if m.group('op_addr'):
  428. op_addr = int(m.group('op_addr'), 0)
  429. if (m.group('op_special')
  430. or m.group('op_copy')
  431. or m.group('op_end')):
  432. line_at.append((
  433. op_addr,
  434. files.get(op_file, '?'),
  435. op_line))
  436. if m.group('op_end'):
  437. op_file = 1
  438. op_line = 1
  439. op_addr = 0
  440. proc.wait()
  441. if proc.returncode != 0:
  442. if not args.get('verbose'):
  443. for line in proc.stderr:
  444. sys.stdout.write(line)
  445. # assume no debug-info on failure
  446. pass
  447. # sort and keep first when duplicates
  448. #
  449. # I think dwarf requires this to be sorted but just in case
  450. line_at.sort()
  451. line_at_ = []
  452. for addr, file, line in line_at:
  453. if len(line_at_) == 0 or line_at_[-1][0] != addr:
  454. line_at_.append((addr, file, line))
  455. line_at = line_at_
  456. # discard lines outside of the range of the containing
  457. # function, these are introduced by dwarf for inlined
  458. # functions but don't map to elf-level symbols
  459. sym_at_ = []
  460. for addr, sym in sym_at:
  461. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  462. if i > 0:
  463. _, file, line = line_at[i-1]
  464. sym_at_.append((file, line, sym))
  465. sym_at_.sort()
  466. line_at_ = []
  467. for addr, file, line in line_at:
  468. # only keep if sym-at-addr and sym-at-line match
  469. i = bisect.bisect(
  470. sym_at, addr, key=lambda x: x[0])
  471. j = bisect.bisect(
  472. sym_at_, (file, line), key=lambda x: (x[0], x[1]))
  473. if i > 0 and j > 0 and (
  474. sym_at[i-1][1] == sym_at_[j-1][2]):
  475. line_at_.append((addr, file, line))
  476. line_at = line_at_
  477. dsos[dso] = (syms, sym_at, line_at)
  478. syms, _, line_at = dsos[dso]
  479. # first try to reverse ASLR
  480. def deltas(r, d):
  481. if '+' in r.function:
  482. sym, off = r.function.split('+', 1)
  483. off = int(off, 0)
  484. else:
  485. sym, off = r.function, 0
  486. addr = r.line - off + d
  487. for addr_ in syms.get(sym, []):
  488. yield addr_ - addr
  489. delta = min(
  490. it.chain.from_iterable(
  491. deltas(r, 0) for r in results_),
  492. key=lambda d: sum(it.chain.from_iterable(
  493. deltas(r, d) for r in results_)),
  494. default=0)
  495. # then try to map addrs -> file+line
  496. #
  497. # note we need to do this recursively
  498. def remap(results):
  499. results_ = []
  500. for r in results:
  501. addr = r.line + delta
  502. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  503. if i > 0:
  504. _, file, line = line_at[i-1]
  505. else:
  506. file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
  507. # ignore filtered sources
  508. if sources is not None:
  509. if not any(
  510. os.path.abspath(file) == os.path.abspath(s)
  511. for s in sources):
  512. continue
  513. else:
  514. # default to only cwd
  515. if not everything and not os.path.commonpath([
  516. os.getcwd(),
  517. os.path.abspath(file)]) == os.getcwd():
  518. continue
  519. # simplify path
  520. if os.path.commonpath([
  521. os.getcwd(),
  522. os.path.abspath(file)]) == os.getcwd():
  523. file = os.path.relpath(file)
  524. else:
  525. file = os.path.abspath(file)
  526. function, *_ = r.function.split('+', 1)
  527. results_.append(r._replace(
  528. file=file, function=function, line=line,
  529. children=remap(r.children)))
  530. return results_
  531. results.extend(remap(results_))
  532. return results
  533. def fold(Result, results, *,
  534. by=None,
  535. defines=None,
  536. **_):
  537. if by is None:
  538. by = Result._by
  539. for k in it.chain(by or [], (k for k, _ in defines or [])):
  540. if k not in Result._by and k not in Result._fields:
  541. print("error: could not find field %r?" % k)
  542. sys.exit(-1)
  543. # filter by matching defines
  544. if defines is not None:
  545. results_ = []
  546. for r in results:
  547. if all(getattr(r, k) in vs for k, vs in defines):
  548. results_.append(r)
  549. results = results_
  550. # organize results into conflicts
  551. folding = co.OrderedDict()
  552. for r in results:
  553. name = tuple(getattr(r, k) for k in by)
  554. if name not in folding:
  555. folding[name] = []
  556. folding[name].append(r)
  557. # merge conflicts
  558. folded = []
  559. for name, rs in folding.items():
  560. folded.append(sum(rs[1:], start=rs[0]))
  561. # fold recursively
  562. folded_ = []
  563. for r in folded:
  564. folded_.append(r._replace(children=fold(
  565. Result, r.children,
  566. by=by,
  567. defines=defines)))
  568. folded = folded_
  569. return folded
  570. def table(Result, results, diff_results=None, *,
  571. by=None,
  572. fields=None,
  573. sort=None,
  574. summary=False,
  575. all=False,
  576. percent=False,
  577. depth=1,
  578. **_):
  579. all_, all = all, __builtins__.all
  580. if by is None:
  581. by = Result._by
  582. if fields is None:
  583. fields = Result._fields
  584. types = Result._types
  585. # fold again
  586. results = fold(Result, results, by=by)
  587. if diff_results is not None:
  588. diff_results = fold(Result, diff_results, by=by)
  589. # organize by name
  590. table = {
  591. ','.join(str(getattr(r, k) or '') for k in by): r
  592. for r in results}
  593. diff_table = {
  594. ','.join(str(getattr(r, k) or '') for k in by): r
  595. for r in diff_results or []}
  596. names = list(table.keys() | diff_table.keys())
  597. # sort again, now with diff info, note that python's sort is stable
  598. names.sort()
  599. if diff_results is not None:
  600. names.sort(key=lambda n: tuple(
  601. types[k].ratio(
  602. getattr(table.get(n), k, None),
  603. getattr(diff_table.get(n), k, None))
  604. for k in fields),
  605. reverse=True)
  606. if sort:
  607. for k, reverse in reversed(sort):
  608. names.sort(key=lambda n: (getattr(table[n], k),)
  609. if getattr(table.get(n), k, None) is not None else (),
  610. reverse=reverse ^ (not k or k in Result._fields))
  611. # build up our lines
  612. lines = []
  613. # header
  614. header = []
  615. header.append('%s%s' % (
  616. ','.join(by),
  617. ' (%d added, %d removed)' % (
  618. sum(1 for n in table if n not in diff_table),
  619. sum(1 for n in diff_table if n not in table))
  620. if diff_results is not None and not percent else '')
  621. if not summary else '')
  622. if diff_results is None:
  623. for k in fields:
  624. header.append(k)
  625. elif percent:
  626. for k in fields:
  627. header.append(k)
  628. else:
  629. for k in fields:
  630. header.append('o'+k)
  631. for k in fields:
  632. header.append('n'+k)
  633. for k in fields:
  634. header.append('d'+k)
  635. header.append('')
  636. lines.append(header)
  637. def table_entry(name, r, diff_r=None, ratios=[]):
  638. entry = []
  639. entry.append(name)
  640. if diff_results is None:
  641. for k in fields:
  642. entry.append(getattr(r, k).table()
  643. if getattr(r, k, None) is not None
  644. else types[k].none)
  645. elif percent:
  646. for k in fields:
  647. entry.append(getattr(r, k).diff_table()
  648. if getattr(r, k, None) is not None
  649. else types[k].diff_none)
  650. else:
  651. for k in fields:
  652. entry.append(getattr(diff_r, k).diff_table()
  653. if getattr(diff_r, k, None) is not None
  654. else types[k].diff_none)
  655. for k in fields:
  656. entry.append(getattr(r, k).diff_table()
  657. if getattr(r, k, None) is not None
  658. else types[k].diff_none)
  659. for k in fields:
  660. entry.append(types[k].diff_diff(
  661. getattr(r, k, None),
  662. getattr(diff_r, k, None)))
  663. if diff_results is None:
  664. entry.append('')
  665. elif percent:
  666. entry.append(' (%s)' % ', '.join(
  667. '+∞%' if t == +m.inf
  668. else '-∞%' if t == -m.inf
  669. else '%+.1f%%' % (100*t)
  670. for t in ratios))
  671. else:
  672. entry.append(' (%s)' % ', '.join(
  673. '+∞%' if t == +m.inf
  674. else '-∞%' if t == -m.inf
  675. else '%+.1f%%' % (100*t)
  676. for t in ratios
  677. if t)
  678. if any(ratios) else '')
  679. return entry
  680. # entries
  681. if not summary:
  682. for name in names:
  683. r = table.get(name)
  684. if diff_results is None:
  685. diff_r = None
  686. ratios = None
  687. else:
  688. diff_r = diff_table.get(name)
  689. ratios = [
  690. types[k].ratio(
  691. getattr(r, k, None),
  692. getattr(diff_r, k, None))
  693. for k in fields]
  694. if not all_ and not any(ratios):
  695. continue
  696. lines.append(table_entry(name, r, diff_r, ratios))
  697. # total
  698. r = next(iter(fold(Result, results, by=[])), None)
  699. if diff_results is None:
  700. diff_r = None
  701. ratios = None
  702. else:
  703. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  704. ratios = [
  705. types[k].ratio(
  706. getattr(r, k, None),
  707. getattr(diff_r, k, None))
  708. for k in fields]
  709. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  710. # find the best widths, note that column 0 contains the names and column -1
  711. # the ratios, so those are handled a bit differently
  712. widths = [
  713. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  714. for w, i in zip(
  715. it.chain([23], it.repeat(7)),
  716. range(len(lines[0])-1))]
  717. # adjust the name width based on the expected call depth, though
  718. # note this doesn't really work with unbounded recursion
  719. if not summary and not m.isinf(depth):
  720. widths[0] += 4*(depth-1)
  721. # print the tree recursively
  722. print('%-*s %s%s' % (
  723. widths[0], lines[0][0],
  724. ' '.join('%*s' % (w, x)
  725. for w, x in zip(widths[1:], lines[0][1:-1])),
  726. lines[0][-1]))
  727. if not summary:
  728. def recurse(results_, depth_, prefixes=('', '', '', '')):
  729. # rebuild our tables at each layer
  730. table_ = {
  731. ','.join(str(getattr(r, k) or '') for k in by): r
  732. for r in results_}
  733. names_ = list(table_.keys())
  734. # sort again at each layer, keep in mind the numbers are
  735. # changing as we descend
  736. names_.sort()
  737. if sort:
  738. for k, reverse in reversed(sort):
  739. names_.sort(key=lambda n: (getattr(table_[n], k),)
  740. if getattr(table_.get(n), k, None) is not None else (),
  741. reverse=reverse ^ (not k or k in Result._fields))
  742. for i, name in enumerate(names_):
  743. r = table_[name]
  744. is_last = (i == len(names_)-1)
  745. print('%s%-*s %s' % (
  746. prefixes[0+is_last],
  747. widths[0] - (
  748. len(prefixes[0+is_last])
  749. if not m.isinf(depth) else 0),
  750. name,
  751. ' '.join('%*s' % (w, x)
  752. for w, x in zip(
  753. widths[1:],
  754. table_entry(name, r)[1:]))))
  755. # recurse?
  756. if depth_ > 1:
  757. recurse(
  758. r.children,
  759. depth_-1,
  760. (prefixes[2+is_last] + "|-> ",
  761. prefixes[2+is_last] + "'-> ",
  762. prefixes[2+is_last] + "| ",
  763. prefixes[2+is_last] + " "))
  764. # we have enough going on with diffing to make the top layer
  765. # a special case
  766. for name, line in zip(names, lines[1:-1]):
  767. print('%-*s %s%s' % (
  768. widths[0], line[0],
  769. ' '.join('%*s' % (w, x)
  770. for w, x in zip(widths[1:], line[1:-1])),
  771. line[-1]))
  772. if name in table and depth > 1:
  773. recurse(
  774. table[name].children,
  775. depth-1,
  776. ("|-> ",
  777. "'-> ",
  778. "| ",
  779. " "))
  780. print('%-*s %s%s' % (
  781. widths[0], lines[-1][0],
  782. ' '.join('%*s' % (w, x)
  783. for w, x in zip(widths[1:], lines[-1][1:-1])),
  784. lines[-1][-1]))
  785. def annotate(Result, results, *,
  786. annotate=None,
  787. threshold=None,
  788. branches=False,
  789. caches=False,
  790. **args):
  791. # figure out the threshold
  792. if threshold is None:
  793. t0, t1 = THRESHOLD
  794. elif len(threshold) == 1:
  795. t0, t1 = threshold[0], threshold[0]
  796. else:
  797. t0, t1 = threshold
  798. t0, t1 = min(t0, t1), max(t0, t1)
  799. if not branches and not caches:
  800. tk = 'cycles'
  801. elif branches:
  802. tk = 'bmisses'
  803. else:
  804. tk = 'cmisses'
  805. # find max cycles
  806. max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
  807. for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
  808. # flatten to line info
  809. results = fold(Result, results, by=['file', 'line'])
  810. table = {r.line: r for r in results if r.file == path}
  811. # calculate spans to show
  812. if not annotate:
  813. spans = []
  814. last = None
  815. func = None
  816. for line, r in sorted(table.items()):
  817. if float(getattr(r, tk)) / max_ >= t0:
  818. if last is not None and line - last.stop <= args['context']:
  819. last = range(
  820. last.start,
  821. line+1+args['context'])
  822. else:
  823. if last is not None:
  824. spans.append((last, func))
  825. last = range(
  826. line-args['context'],
  827. line+1+args['context'])
  828. func = r.function
  829. if last is not None:
  830. spans.append((last, func))
  831. with open(path) as f:
  832. skipped = False
  833. for i, line in enumerate(f):
  834. # skip lines not in spans?
  835. if not annotate and not any(i+1 in s for s, _ in spans):
  836. skipped = True
  837. continue
  838. if skipped:
  839. skipped = False
  840. print('%s@@ %s:%d: %s @@%s' % (
  841. '\x1b[36m' if args['color'] else '',
  842. path,
  843. i+1,
  844. next(iter(f for _, f in spans)),
  845. '\x1b[m' if args['color'] else ''))
  846. # build line
  847. if line.endswith('\n'):
  848. line = line[:-1]
  849. r = table.get(i+1)
  850. if r is not None and (
  851. float(r.cycles) > 0
  852. if not branches and not caches
  853. else float(r.bmisses) > 0 or float(r.branches) > 0
  854. if branches
  855. else float(r.cmisses) > 0 or float(r.caches) > 0):
  856. line = '%-*s // %s' % (
  857. args['width'],
  858. line,
  859. '%s cycles' % r.cycles
  860. if not branches and not caches
  861. else '%s bmisses, %s branches' % (r.bmisses, r.branches)
  862. if branches
  863. else '%s cmisses, %s caches' % (r.cmisses, r.caches))
  864. if args['color']:
  865. if float(getattr(r, tk)) / max_ >= t1:
  866. line = '\x1b[1;31m%s\x1b[m' % line
  867. elif float(getattr(r, tk)) / max_ >= t0:
  868. line = '\x1b[35m%s\x1b[m' % line
  869. print(line)
  870. def report(perf_paths, *,
  871. by=None,
  872. fields=None,
  873. defines=None,
  874. sort=None,
  875. self=False,
  876. branches=False,
  877. caches=False,
  878. **args):
  879. # figure out what color should be
  880. if args.get('color') == 'auto':
  881. args['color'] = sys.stdout.isatty()
  882. elif args.get('color') == 'always':
  883. args['color'] = True
  884. else:
  885. args['color'] = False
  886. # depth of 0 == m.inf
  887. if args.get('depth') == 0:
  888. args['depth'] = m.inf
  889. # find sizes
  890. if not args.get('use', None):
  891. # find .o files
  892. paths = []
  893. for path in perf_paths:
  894. if os.path.isdir(path):
  895. path = path + '/*.perf'
  896. for path in glob.glob(path):
  897. paths.append(path)
  898. if not paths:
  899. print("error: no .perf files found in %r?" % perf_paths)
  900. sys.exit(-1)
  901. results = collect(paths, **args)
  902. else:
  903. results = []
  904. with openio(args['use']) as f:
  905. reader = csv.DictReader(f, restval='')
  906. for r in reader:
  907. try:
  908. results.append(PerfResult(
  909. **{k: r[k] for k in PerfResult._by
  910. if k in r and r[k].strip()},
  911. **{k: r['perf_'+k] for k in PerfResult._fields
  912. if 'perf_'+k in r and r['perf_'+k].strip()}))
  913. except TypeError:
  914. pass
  915. # fold
  916. results = fold(PerfResult, results, by=by, defines=defines)
  917. # sort, note that python's sort is stable
  918. results.sort()
  919. if sort:
  920. for k, reverse in reversed(sort):
  921. results.sort(key=lambda r: (getattr(r, k),)
  922. if getattr(r, k) is not None else (),
  923. reverse=reverse ^ (not k or k in PerfResult._fields))
  924. # write results to CSV
  925. if args.get('output'):
  926. with openio(args['output'], 'w') as f:
  927. writer = csv.DictWriter(f,
  928. (by if by is not None else PerfResult._by)
  929. + ['perf_'+k for k in PerfResult._fields])
  930. writer.writeheader()
  931. for r in results:
  932. writer.writerow(
  933. {k: getattr(r, k)
  934. for k in (by if by is not None else PerfResult._by)}
  935. | {'perf_'+k: getattr(r, k)
  936. for k in PerfResult._fields})
  937. # find previous results?
  938. if args.get('diff'):
  939. diff_results = []
  940. try:
  941. with openio(args['diff']) as f:
  942. reader = csv.DictReader(f, restval='')
  943. for r in reader:
  944. try:
  945. diff_results.append(PerfResult(
  946. **{k: r[k] for k in PerfResult._by
  947. if k in r and r[k].strip()},
  948. **{k: r['perf_'+k] for k in PerfResult._fields
  949. if 'perf_'+k in r and r['perf_'+k].strip()}))
  950. except TypeError:
  951. pass
  952. except FileNotFoundError:
  953. pass
  954. # fold
  955. diff_results = fold(PerfResult, diff_results, by=by, defines=defines)
  956. # print table
  957. if not args.get('quiet'):
  958. if args.get('annotate') or args.get('threshold'):
  959. # annotate sources
  960. annotate(PerfResult, results,
  961. branches=branches,
  962. caches=caches,
  963. **args)
  964. else:
  965. # print table
  966. table(PerfResult, results,
  967. diff_results if args.get('diff') else None,
  968. by=by if by is not None else ['function'],
  969. fields=fields if fields is not None
  970. else ['cycles'] if not branches and not caches
  971. else ['bmisses', 'branches'] if branches
  972. else ['cmisses', 'caches'],
  973. sort=sort,
  974. **args)
  975. def main(**args):
  976. if args.get('record'):
  977. return record(**args)
  978. else:
  979. return report(**args)
  980. if __name__ == "__main__":
  981. import argparse
  982. import sys
  983. # bit of a hack, but parse_intermixed_args and REMAINDER are
  984. # incompatible, so we need to figure out what we want before running
  985. # argparse
  986. if '-R' in sys.argv or '--record' in sys.argv:
  987. nargs = argparse.REMAINDER
  988. else:
  989. nargs = '*'
  990. argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None
  991. argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
  992. parser = argparse.ArgumentParser(
  993. description="Aggregate and report Linux perf results.",
  994. allow_abbrev=False,
  995. conflict_handler='ignore')
  996. parser.add_argument(
  997. 'perf_paths',
  998. nargs=nargs,
  999. help="Description of where to find *.perf files. May be a directory "
  1000. "or a list of paths. Defaults to %r." % PERF_PATHS)
  1001. parser.add_argument(
  1002. '-v', '--verbose',
  1003. action='store_true',
  1004. help="Output commands that run behind the scenes.")
  1005. parser.add_argument(
  1006. '-q', '--quiet',
  1007. action='store_true',
  1008. help="Don't show anything, useful with -o.")
  1009. parser.add_argument(
  1010. '-o', '--output',
  1011. help="Specify CSV file to store results.")
  1012. parser.add_argument(
  1013. '-u', '--use',
  1014. help="Don't parse anything, use this CSV file.")
  1015. parser.add_argument(
  1016. '-d', '--diff',
  1017. help="Specify CSV file to diff against.")
  1018. parser.add_argument(
  1019. '-a', '--all',
  1020. action='store_true',
  1021. help="Show all, not just the ones that changed.")
  1022. parser.add_argument(
  1023. '-p', '--percent',
  1024. action='store_true',
  1025. help="Only show percentage change, not a full diff.")
  1026. parser.add_argument(
  1027. '-b', '--by',
  1028. action='append',
  1029. choices=PerfResult._by,
  1030. help="Group by this field.")
  1031. parser.add_argument(
  1032. '-f', '--field',
  1033. dest='fields',
  1034. action='append',
  1035. choices=PerfResult._fields,
  1036. help="Show this field.")
  1037. parser.add_argument(
  1038. '-D', '--define',
  1039. dest='defines',
  1040. action='append',
  1041. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  1042. help="Only include results where this field is this value.")
  1043. class AppendSort(argparse.Action):
  1044. def __call__(self, parser, namespace, value, option):
  1045. if namespace.sort is None:
  1046. namespace.sort = []
  1047. namespace.sort.append((value, True if option == '-S' else False))
  1048. parser.add_argument(
  1049. '-s', '--sort',
  1050. action=AppendSort,
  1051. help="Sort by this fields.")
  1052. parser.add_argument(
  1053. '-S', '--reverse-sort',
  1054. action=AppendSort,
  1055. help="Sort by this fields, but backwards.")
  1056. parser.add_argument(
  1057. '-Y', '--summary',
  1058. action='store_true',
  1059. help="Only show the total.")
  1060. parser.add_argument(
  1061. '-F', '--source',
  1062. dest='sources',
  1063. action='append',
  1064. help="Only consider definitions in this file. Defaults to anything "
  1065. "in the current directory.")
  1066. parser.add_argument(
  1067. '--everything',
  1068. action='store_true',
  1069. help="Include builtin and libc specific symbols.")
  1070. parser.add_argument(
  1071. '--branches',
  1072. action='store_true',
  1073. help="Show branches and branch misses.")
  1074. parser.add_argument(
  1075. '--caches',
  1076. action='store_true',
  1077. help="Show cache accesses and cache misses.")
  1078. parser.add_argument(
  1079. '-P', '--propagate',
  1080. type=lambda x: int(x, 0),
  1081. help="Depth to propagate samples up the call-stack. 0 propagates up "
  1082. "to the entry point, 1 does no propagation. Defaults to 0.")
  1083. parser.add_argument(
  1084. '-Z', '--depth',
  1085. nargs='?',
  1086. type=lambda x: int(x, 0),
  1087. const=0,
  1088. help="Depth of function calls to show. 0 shows all calls but may not "
  1089. "terminate!")
  1090. parser.add_argument(
  1091. '-A', '--annotate',
  1092. action='store_true',
  1093. help="Show source files annotated with coverage info.")
  1094. parser.add_argument(
  1095. '-T', '--threshold',
  1096. nargs='?',
  1097. type=lambda x: tuple(float(x) for x in x.split(',')),
  1098. const=THRESHOLD,
  1099. help="Show lines wth samples above this threshold as a percent of "
  1100. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1101. parser.add_argument(
  1102. '-c', '--context',
  1103. type=lambda x: int(x, 0),
  1104. default=3,
  1105. help="Show n additional lines of context. Defaults to 3.")
  1106. parser.add_argument(
  1107. '-W', '--width',
  1108. type=lambda x: int(x, 0),
  1109. default=80,
  1110. help="Assume source is styled with this many columns. Defaults to 80.")
  1111. parser.add_argument(
  1112. '--color',
  1113. choices=['never', 'always', 'auto'],
  1114. default='auto',
  1115. help="When to use terminal colors. Defaults to 'auto'.")
  1116. parser.add_argument(
  1117. '-j', '--jobs',
  1118. nargs='?',
  1119. type=lambda x: int(x, 0),
  1120. const=0,
  1121. help="Number of processes to use. 0 spawns one process per core.")
  1122. parser.add_argument(
  1123. '--perf-tool',
  1124. type=lambda x: x.split(),
  1125. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1126. parser.add_argument(
  1127. '--objdump-tool',
  1128. type=lambda x: x.split(),
  1129. default=OBJDUMP_TOOL,
  1130. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  1131. # record flags
  1132. record_parser = parser.add_argument_group('record options')
  1133. record_parser.add_argument(
  1134. 'command',
  1135. nargs=nargs,
  1136. help="Command to run.")
  1137. record_parser.add_argument(
  1138. '-R', '--record',
  1139. action='store_true',
  1140. help="Run a command and aggregate perf measurements.")
  1141. record_parser.add_argument(
  1142. '-o', '--output',
  1143. help="Output file. Uses flock to synchronize. This is stored as a "
  1144. "zip-file of multiple perf results.")
  1145. record_parser.add_argument(
  1146. '--perf-freq',
  1147. help="perf sampling frequency. This is passed directly to perf. "
  1148. "Defaults to %r." % PERF_FREQ)
  1149. record_parser.add_argument(
  1150. '--perf-period',
  1151. help="perf sampling period. This is passed directly to perf.")
  1152. record_parser.add_argument(
  1153. '--perf-events',
  1154. help="perf events to record. This is passed directly to perf. "
  1155. "Defaults to %r." % PERF_EVENTS)
  1156. record_parser.add_argument(
  1157. '--perf-tool',
  1158. type=lambda x: x.split(),
  1159. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1160. # avoid intermixed/REMAINDER conflict, see above
  1161. if nargs == argparse.REMAINDER:
  1162. args = parser.parse_args()
  1163. else:
  1164. args = parser.parse_intermixed_args()
  1165. # perf_paths/command overlap, so need to do some munging here
  1166. args.command = args.perf_paths
  1167. args.perf_paths = args.perf_paths or PERF_PATHS
  1168. sys.exit(main(**{k: v
  1169. for k, v in vars(args).items()
  1170. if v is not None}))