perf.py 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263
  1. #!/usr/bin/env python3
  2. #
  3. # Script to aggregate and report Linux perf results.
  4. #
  5. # Example:
  6. # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
  7. # ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles
  8. #
  9. # Copyright (c) 2022, The littlefs authors.
  10. # SPDX-License-Identifier: BSD-3-Clause
  11. #
  12. import bisect
  13. import collections as co
  14. import csv
  15. import errno
  16. import fcntl
  17. import functools as ft
  18. import glob
  19. import itertools as it
  20. import math as m
  21. import multiprocessing as mp
  22. import os
  23. import re
  24. import shlex
  25. import shutil
  26. import subprocess as sp
  27. import tempfile
  28. import zipfile
  29. PERF_PATHS = ['*.perf']
  30. PERF_TOOL = ['perf']
  31. PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
  32. PERF_FREQ = 100
  33. OBJDUMP_TOOL = ['objdump']
  34. THRESHOLD = (0.5, 0.85)
  35. # integer fields
  36. class Int(co.namedtuple('Int', 'x')):
  37. __slots__ = ()
  38. def __new__(cls, x=0):
  39. if isinstance(x, Int):
  40. return x
  41. if isinstance(x, str):
  42. try:
  43. x = int(x, 0)
  44. except ValueError:
  45. # also accept +-∞ and +-inf
  46. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  47. x = m.inf
  48. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  49. x = -m.inf
  50. else:
  51. raise
  52. assert isinstance(x, int) or m.isinf(x), x
  53. return super().__new__(cls, x)
  54. def __str__(self):
  55. if self.x == m.inf:
  56. return '∞'
  57. elif self.x == -m.inf:
  58. return '-∞'
  59. else:
  60. return str(self.x)
  61. def __int__(self):
  62. assert not m.isinf(self.x)
  63. return self.x
  64. def __float__(self):
  65. return float(self.x)
  66. none = '%7s' % '-'
  67. def table(self):
  68. return '%7s' % (self,)
  69. diff_none = '%7s' % '-'
  70. diff_table = table
  71. def diff_diff(self, other):
  72. new = self.x if self else 0
  73. old = other.x if other else 0
  74. diff = new - old
  75. if diff == +m.inf:
  76. return '%7s' % '+∞'
  77. elif diff == -m.inf:
  78. return '%7s' % '-∞'
  79. else:
  80. return '%+7d' % diff
  81. def ratio(self, other):
  82. new = self.x if self else 0
  83. old = other.x if other else 0
  84. if m.isinf(new) and m.isinf(old):
  85. return 0.0
  86. elif m.isinf(new):
  87. return +m.inf
  88. elif m.isinf(old):
  89. return -m.inf
  90. elif not old and not new:
  91. return 0.0
  92. elif not old:
  93. return 1.0
  94. else:
  95. return (new-old) / old
  96. def __add__(self, other):
  97. return self.__class__(self.x + other.x)
  98. def __sub__(self, other):
  99. return self.__class__(self.x - other.x)
  100. def __mul__(self, other):
  101. return self.__class__(self.x * other.x)
  102. # perf results
  103. class PerfResult(co.namedtuple('PerfResult', [
  104. 'file', 'function', 'line',
  105. 'self_cycles',
  106. 'self_bmisses', 'self_branches',
  107. 'self_cmisses', 'self_caches',
  108. 'cycles',
  109. 'bmisses', 'branches',
  110. 'cmisses', 'caches',
  111. 'children', 'parents'])):
  112. _by = ['file', 'function', 'line']
  113. _fields = [
  114. 'self_cycles',
  115. 'self_bmisses', 'self_branches',
  116. 'self_cmisses', 'self_caches',
  117. 'cycles',
  118. 'bmisses', 'branches',
  119. 'cmisses', 'caches']
  120. _types = {
  121. 'self_cycles': Int,
  122. 'self_bmisses': Int, 'self_branches': Int,
  123. 'self_cmisses': Int, 'self_caches': Int,
  124. 'cycles': Int,
  125. 'bmisses': Int, 'branches': Int,
  126. 'cmisses': Int, 'caches': Int}
  127. __slots__ = ()
  128. def __new__(cls, file='', function='', line=0,
  129. self_cycles=0,
  130. self_bmisses=0, self_branches=0,
  131. self_cmisses=0, self_caches=0,
  132. cycles=0,
  133. bmisses=0, branches=0,
  134. cmisses=0, caches=0,
  135. children=set(), parents=set()):
  136. return super().__new__(cls, file, function, int(Int(line)),
  137. Int(self_cycles),
  138. Int(self_bmisses), Int(self_branches),
  139. Int(self_cmisses), Int(self_caches),
  140. Int(cycles),
  141. Int(bmisses), Int(branches),
  142. Int(cmisses), Int(caches),
  143. children, parents)
  144. def __add__(self, other):
  145. return PerfResult(self.file, self.function, self.line,
  146. self.self_cycles + other.self_cycles,
  147. self.self_bmisses + other.self_bmisses,
  148. self.self_branches + other.self_branches,
  149. self.self_cmisses + other.self_cmisses,
  150. self.self_caches + other.self_caches,
  151. self.cycles + other.cycles,
  152. self.bmisses + other.bmisses,
  153. self.branches + other.branches,
  154. self.cmisses + other.cmisses,
  155. self.caches + other.caches,
  156. self.children | other.children,
  157. self.parents | other.parents)
  158. def openio(path, mode='r'):
  159. if path == '-':
  160. if mode == 'r':
  161. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  162. else:
  163. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  164. else:
  165. return open(path, mode)
  166. # run perf as a subprocess, storing measurements into a zip file
  167. def record(command, *,
  168. output=None,
  169. perf_freq=PERF_FREQ,
  170. perf_period=None,
  171. perf_events=PERF_EVENTS,
  172. perf_tool=PERF_TOOL,
  173. **args):
  174. if not command:
  175. print('error: no command specified?')
  176. sys.exit(-1)
  177. if not output:
  178. print('error: no output file specified?')
  179. sys.exit(-1)
  180. # create a temporary file for perf to write to, as far as I can tell
  181. # this is strictly needed because perf's pipe-mode only works with stdout
  182. with tempfile.NamedTemporaryFile('rb') as f:
  183. # figure out our perf invocation
  184. perf = perf_tool + list(filter(None, [
  185. 'record',
  186. '-F%s' % perf_freq
  187. if perf_freq is not None
  188. and perf_period is None else None,
  189. '-c%s' % perf_period
  190. if perf_period is not None else None,
  191. '-B',
  192. '-g',
  193. '--all-user',
  194. '-e%s' % perf_events,
  195. '-o%s' % f.name]))
  196. # run our command
  197. try:
  198. if args.get('verbose'):
  199. print(' '.join(shlex.quote(c) for c in perf + command))
  200. err = sp.call(perf + command, close_fds=False)
  201. except KeyboardInterrupt:
  202. err = errno.EOWNERDEAD
  203. # synchronize access
  204. z = os.open(output, os.O_RDWR | os.O_CREAT)
  205. fcntl.flock(z, fcntl.LOCK_EX)
  206. # copy measurements into our zip file
  207. with os.fdopen(z, 'r+b') as z:
  208. with zipfile.ZipFile(z, 'a',
  209. compression=zipfile.ZIP_DEFLATED,
  210. compresslevel=1) as z:
  211. with z.open('perf.%d' % os.getpid(), 'w') as g:
  212. shutil.copyfileobj(f, g)
  213. # forward the return code
  214. return err
  215. def collect_decompressed(path, *,
  216. perf_tool=PERF_TOOL,
  217. everything=False,
  218. depth=0,
  219. **args):
  220. sample_pattern = re.compile(
  221. '(?P<comm>\w+)'
  222. '\s+(?P<pid>\w+)'
  223. '\s+(?P<time>[\w.]+):'
  224. '\s*(?P<period>\w+)'
  225. '\s+(?P<event>[^:]+):')
  226. frame_pattern = re.compile(
  227. '\s+(?P<addr>\w+)'
  228. '\s+(?P<sym>[^\s]+)'
  229. '\s+\((?P<dso>[^\)]+)\)')
  230. events = {
  231. 'cycles': 'cycles',
  232. 'branch-misses': 'bmisses',
  233. 'branches': 'branches',
  234. 'cache-misses': 'cmisses',
  235. 'cache-references': 'caches'}
  236. # note perf_tool may contain extra args
  237. cmd = perf_tool + [
  238. 'script',
  239. '-i%s' % path]
  240. if args.get('verbose'):
  241. print(' '.join(shlex.quote(c) for c in cmd))
  242. proc = sp.Popen(cmd,
  243. stdout=sp.PIPE,
  244. stderr=sp.PIPE if not args.get('verbose') else None,
  245. universal_newlines=True,
  246. errors='replace',
  247. close_fds=False)
  248. last_filtered = False
  249. last_has_frame = False
  250. last_event = ''
  251. last_period = 0
  252. results = co.defaultdict(lambda: co.defaultdict(lambda: (0, 0)))
  253. for line in proc.stdout:
  254. # we need to process a lot of data, so wait to use regex as late
  255. # as possible
  256. if not line:
  257. continue
  258. if not line.startswith('\t'):
  259. m = sample_pattern.match(line)
  260. if m:
  261. last_event = m.group('event')
  262. last_filtered = last_event in events
  263. last_period = int(m.group('period'), 0)
  264. last_has_frame = False
  265. elif last_filtered:
  266. m = frame_pattern.match(line)
  267. if m:
  268. # filter out internal/kernel functions
  269. if not everything and (
  270. m.group('sym').startswith('__')
  271. or m.group('dso').startswith('/usr/lib')
  272. or not m.group('sym')[:1].isalpha()):
  273. continue
  274. name = (
  275. m.group('dso'),
  276. m.group('sym'),
  277. int(m.group('addr'), 16))
  278. self, total = results[name][last_event]
  279. if not last_has_frame:
  280. results[name][last_event] = (
  281. self + last_period,
  282. total + last_period)
  283. last_has_frame = True
  284. else:
  285. results[name][last_event] = (
  286. self,
  287. total + last_period)
  288. proc.wait()
  289. if proc.returncode != 0:
  290. if not args.get('verbose'):
  291. for line in proc.stderr:
  292. sys.stdout.write(line)
  293. sys.exit(-1)
  294. # rearrange results into result type
  295. results_ = []
  296. for name, r in results.items():
  297. results_.append(PerfResult(*name,
  298. **{'self_'+events[e]: s for e, (s, _) in r.items()},
  299. **{ events[e]: t for e, (_, t) in r.items()}))
  300. results = results_
  301. return results
  302. def collect_job(path, i, **args):
  303. # decompress into a temporary file, this is to work around
  304. # some limitations of perf
  305. with zipfile.ZipFile(path) as z:
  306. with z.open(i) as f:
  307. with tempfile.NamedTemporaryFile('wb') as g:
  308. shutil.copyfileobj(f, g)
  309. g.flush()
  310. return collect_decompressed(g.name, **args)
  311. def starapply(args):
  312. f, args, kwargs = args
  313. return f(*args, **kwargs)
  314. def collect(paths, *,
  315. jobs=None,
  316. objdump_tool=None,
  317. sources=None,
  318. everything=False,
  319. **args):
  320. symbol_pattern = re.compile(
  321. '^(?P<addr>[0-9a-fA-F]+)\s.*\s(?P<name>[^\s]+)\s*$')
  322. line_pattern = re.compile(
  323. '^\s+(?:'
  324. # matches dir/file table
  325. '(?P<no>[0-9]+)\s+'
  326. '(?:(?P<dir>[0-9]+)\s+)?'
  327. '.*\s+'
  328. '(?P<path>[^\s]+)'
  329. # matches line opcodes
  330. '|' '\[[^\]]*\]\s+'
  331. '(?:'
  332. '(?P<op_special>Special)'
  333. '|' '(?P<op_copy>Copy)'
  334. '|' '(?P<op_end>End of Sequence)'
  335. '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
  336. '|' 'Line .*?to (?P<op_line>[0-9]+)'
  337. '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
  338. '|' '.' ')*'
  339. ')$', re.IGNORECASE)
  340. records = []
  341. for path in paths:
  342. # each .perf file is actually a zip file containing perf files from
  343. # multiple runs
  344. with zipfile.ZipFile(path) as z:
  345. records.extend((path, i) for i in z.infolist())
  346. # we're dealing with a lot of data but also surprisingly
  347. # parallelizable
  348. dsos = {}
  349. results = []
  350. with mp.Pool(jobs or len(os.sched_getaffinity(0))) as p:
  351. for results_ in p.imap_unordered(
  352. starapply,
  353. ((collect_job, (path, i), dict(
  354. everything=everything,
  355. **args))
  356. for path, i in records)):
  357. # organize by dso
  358. results__ = {}
  359. for r in results_:
  360. if r.file not in results__:
  361. results__[r.file] = []
  362. results__[r.file].append(r)
  363. results_ = results__
  364. for dso, results_ in results_.items():
  365. if dso not in dsos:
  366. # find file+line ranges for dsos
  367. #
  368. # do this here so we only process each dso once
  369. syms = {}
  370. sym_at = []
  371. cmd = objdump_tool + ['-t', dso]
  372. if args.get('verbose'):
  373. print(' '.join(shlex.quote(c) for c in cmd))
  374. proc = sp.Popen(cmd,
  375. stdout=sp.PIPE,
  376. stderr=sp.PIPE if not args.get('verbose') else None,
  377. universal_newlines=True,
  378. errors='replace',
  379. close_fds=False)
  380. for line in proc.stdout:
  381. m = symbol_pattern.match(line)
  382. if m:
  383. name = m.group('name')
  384. addr = int(m.group('addr'), 16)
  385. # note multiple symbols can share a name
  386. if name not in syms:
  387. syms[name] = set()
  388. syms[name].add(addr)
  389. sym_at.append((addr, name))
  390. proc.wait()
  391. if proc.returncode != 0:
  392. if not args.get('verbose'):
  393. for line in proc.stderr:
  394. sys.stdout.write(line)
  395. # assume no debug-info on failure
  396. pass
  397. # sort and keep first when duplicates
  398. sym_at.sort()
  399. sym_at_ = []
  400. for addr, name in sym_at:
  401. if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
  402. sym_at_.append((addr, name))
  403. sym_at = sym_at_
  404. # state machine for dwarf line numbers, note that objdump's
  405. # decodedline seems to have issues with multiple dir/file
  406. # tables, which is why we need this
  407. line_at = []
  408. dirs = {}
  409. files = {}
  410. op_file = 1
  411. op_line = 1
  412. op_addr = 0
  413. cmd = objdump_tool + ['--dwarf=rawline', dso]
  414. if args.get('verbose'):
  415. print(' '.join(shlex.quote(c) for c in cmd))
  416. proc = sp.Popen(cmd,
  417. stdout=sp.PIPE,
  418. stderr=sp.PIPE if not args.get('verbose') else None,
  419. universal_newlines=True,
  420. errors='replace',
  421. close_fds=False)
  422. for line in proc.stdout:
  423. m = line_pattern.match(line)
  424. if m:
  425. if m.group('no') and not m.group('dir'):
  426. # found a directory entry
  427. dirs[int(m.group('no'))] = m.group('path')
  428. elif m.group('no'):
  429. # found a file entry
  430. dir = int(m.group('dir'))
  431. if dir in dirs:
  432. files[int(m.group('no'))] = os.path.join(
  433. dirs[dir],
  434. m.group('path'))
  435. else:
  436. files[int(m.group('no'))] = m.group('path')
  437. else:
  438. # found a state machine update
  439. if m.group('op_file'):
  440. op_file = int(m.group('op_file'), 0)
  441. if m.group('op_line'):
  442. op_line = int(m.group('op_line'), 0)
  443. if m.group('op_addr'):
  444. op_addr = int(m.group('op_addr'), 0)
  445. if (m.group('op_special')
  446. or m.group('op_copy')
  447. or m.group('op_end')):
  448. line_at.append((
  449. op_addr,
  450. files.get(op_file, '?'),
  451. op_line))
  452. if m.group('op_end'):
  453. op_file = 1
  454. op_line = 1
  455. op_addr = 0
  456. proc.wait()
  457. if proc.returncode != 0:
  458. if not args.get('verbose'):
  459. for line in proc.stderr:
  460. sys.stdout.write(line)
  461. # assume no debug-info on failure
  462. pass
  463. # sort and keep first when duplicates
  464. #
  465. # I think dwarf requires this to be sorted but just in case
  466. line_at.sort()
  467. line_at_ = []
  468. for addr, file, line in line_at:
  469. if len(line_at_) == 0 or line_at_[-1][0] != addr:
  470. line_at_.append((addr, file, line))
  471. line_at = line_at_
  472. # discard lines outside of the range of the containing
  473. # function, these are introduced by dwarf for inlined
  474. # functions but don't map to elf-level symbols
  475. sym_at_ = []
  476. for addr, sym in sym_at:
  477. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  478. if i > 0:
  479. _, file, line = line_at[i-1]
  480. sym_at_.append((file, line, sym))
  481. sym_at_.sort()
  482. line_at_ = []
  483. for addr, file, line in line_at:
  484. # only keep if sym-at-addr and sym-at-line match
  485. i = bisect.bisect(
  486. sym_at, addr, key=lambda x: x[0])
  487. j = bisect.bisect(
  488. sym_at_, (file, line), key=lambda x: (x[0], x[1]))
  489. if i > 0 and j > 0 and (
  490. sym_at[i-1][1] == sym_at_[j-1][2]):
  491. line_at_.append((addr, file, line))
  492. line_at = line_at_
  493. dsos[dso] = (syms, sym_at, line_at)
  494. syms, _, line_at = dsos[dso]
  495. # first try to reverse ASLR
  496. def deltas(r, d):
  497. if '+' in r.function:
  498. sym, off = r.function.split('+', 1)
  499. off = int(off, 0)
  500. else:
  501. sym, off = r.function, 0
  502. addr = r.line - off + d
  503. for addr_ in syms.get(sym, []):
  504. yield addr_ - addr
  505. delta = min(
  506. it.chain.from_iterable(
  507. deltas(r, 0) for r in results_),
  508. key=lambda d: sum(it.chain.from_iterable(
  509. deltas(r, d) for r in results_)),
  510. default=0)
  511. # then try to map addrs -> file+line
  512. for r in results_:
  513. addr = r.line + delta
  514. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  515. if i > 0:
  516. _, file, line = line_at[i-1]
  517. else:
  518. file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
  519. # ignore filtered sources
  520. if sources is not None:
  521. if not any(
  522. os.path.abspath(file) == os.path.abspath(s)
  523. for s in sources):
  524. continue
  525. else:
  526. # default to only cwd
  527. if not everything and not os.path.commonpath([
  528. os.getcwd(),
  529. os.path.abspath(file)]) == os.getcwd():
  530. continue
  531. # simplify path
  532. if os.path.commonpath([
  533. os.getcwd(),
  534. os.path.abspath(file)]) == os.getcwd():
  535. file = os.path.relpath(file)
  536. else:
  537. file = os.path.abspath(file)
  538. function, *_ = r.function.split('+', 1)
  539. results.append(PerfResult(file, function, line,
  540. **{k: getattr(r, k) for k in PerfResult._fields}))
  541. return results
  542. def fold(Result, results, *,
  543. by=None,
  544. defines=None,
  545. **_):
  546. if by is None:
  547. by = Result._by
  548. for k in it.chain(by or [], (k for k, _ in defines or [])):
  549. if k not in Result._by and k not in Result._fields:
  550. print("error: could not find field %r?" % k)
  551. sys.exit(-1)
  552. # filter by matching defines
  553. if defines is not None:
  554. results_ = []
  555. for r in results:
  556. if all(getattr(r, k) in vs for k, vs in defines):
  557. results_.append(r)
  558. results = results_
  559. # organize results into conflicts
  560. folding = co.OrderedDict()
  561. for r in results:
  562. name = tuple(getattr(r, k) for k in by)
  563. if name not in folding:
  564. folding[name] = []
  565. folding[name].append(r)
  566. # merge conflicts
  567. folded = []
  568. for name, rs in folding.items():
  569. folded.append(sum(rs[1:], start=rs[0]))
  570. return folded
  571. def table(Result, results, diff_results=None, *,
  572. by=None,
  573. fields=None,
  574. sort=None,
  575. summary=False,
  576. all=False,
  577. percent=False,
  578. **_):
  579. all_, all = all, __builtins__.all
  580. if by is None:
  581. by = Result._by
  582. if fields is None:
  583. fields = Result._fields
  584. types = Result._types
  585. # fold again
  586. results = fold(Result, results, by=by)
  587. if diff_results is not None:
  588. diff_results = fold(Result, diff_results, by=by)
  589. # organize by name
  590. table = {
  591. ','.join(str(getattr(r, k) or '') for k in by): r
  592. for r in results}
  593. diff_table = {
  594. ','.join(str(getattr(r, k) or '') for k in by): r
  595. for r in diff_results or []}
  596. names = list(table.keys() | diff_table.keys())
  597. # sort again, now with diff info, note that python's sort is stable
  598. names.sort()
  599. if diff_results is not None:
  600. names.sort(key=lambda n: tuple(
  601. types[k].ratio(
  602. getattr(table.get(n), k, None),
  603. getattr(diff_table.get(n), k, None))
  604. for k in fields),
  605. reverse=True)
  606. if sort:
  607. for k, reverse in reversed(sort):
  608. names.sort(key=lambda n: (getattr(table[n], k),)
  609. if getattr(table.get(n), k, None) is not None else (),
  610. reverse=reverse ^ (not k or k in Result._fields))
  611. # build up our lines
  612. lines = []
  613. # header
  614. line = []
  615. line.append('%s%s' % (
  616. ','.join(by),
  617. ' (%d added, %d removed)' % (
  618. sum(1 for n in table if n not in diff_table),
  619. sum(1 for n in diff_table if n not in table))
  620. if diff_results is not None and not percent else '')
  621. if not summary else '')
  622. if diff_results is None:
  623. for k in fields:
  624. line.append(k)
  625. elif percent:
  626. for k in fields:
  627. line.append(k)
  628. else:
  629. for k in fields:
  630. line.append('o'+k)
  631. for k in fields:
  632. line.append('n'+k)
  633. for k in fields:
  634. line.append('d'+k)
  635. line.append('')
  636. lines.append(line)
  637. # entries
  638. if not summary:
  639. for name in names:
  640. r = table.get(name)
  641. if diff_results is not None:
  642. diff_r = diff_table.get(name)
  643. ratios = [
  644. types[k].ratio(
  645. getattr(r, k, None),
  646. getattr(diff_r, k, None))
  647. for k in fields]
  648. if not any(ratios) and not all_:
  649. continue
  650. line = []
  651. line.append(name)
  652. if diff_results is None:
  653. for k in fields:
  654. line.append(getattr(r, k).table()
  655. if getattr(r, k, None) is not None
  656. else types[k].none)
  657. elif percent:
  658. for k in fields:
  659. line.append(getattr(r, k).diff_table()
  660. if getattr(r, k, None) is not None
  661. else types[k].diff_none)
  662. else:
  663. for k in fields:
  664. line.append(getattr(diff_r, k).diff_table()
  665. if getattr(diff_r, k, None) is not None
  666. else types[k].diff_none)
  667. for k in fields:
  668. line.append(getattr(r, k).diff_table()
  669. if getattr(r, k, None) is not None
  670. else types[k].diff_none)
  671. for k in fields:
  672. line.append(types[k].diff_diff(
  673. getattr(r, k, None),
  674. getattr(diff_r, k, None)))
  675. if diff_results is None:
  676. line.append('')
  677. elif percent:
  678. line.append(' (%s)' % ', '.join(
  679. '+∞%' if t == +m.inf
  680. else '-∞%' if t == -m.inf
  681. else '%+.1f%%' % (100*t)
  682. for t in ratios))
  683. else:
  684. line.append(' (%s)' % ', '.join(
  685. '+∞%' if t == +m.inf
  686. else '-∞%' if t == -m.inf
  687. else '%+.1f%%' % (100*t)
  688. for t in ratios
  689. if t)
  690. if any(ratios) else '')
  691. lines.append(line)
  692. # total
  693. r = next(iter(fold(Result, results, by=[])), None)
  694. if diff_results is not None:
  695. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  696. ratios = [
  697. types[k].ratio(
  698. getattr(r, k, None),
  699. getattr(diff_r, k, None))
  700. for k in fields]
  701. line = []
  702. line.append('TOTAL')
  703. if diff_results is None:
  704. for k in fields:
  705. line.append(getattr(r, k).table()
  706. if getattr(r, k, None) is not None
  707. else types[k].none)
  708. elif percent:
  709. for k in fields:
  710. line.append(getattr(r, k).diff_table()
  711. if getattr(r, k, None) is not None
  712. else types[k].diff_none)
  713. else:
  714. for k in fields:
  715. line.append(getattr(diff_r, k).diff_table()
  716. if getattr(diff_r, k, None) is not None
  717. else types[k].diff_none)
  718. for k in fields:
  719. line.append(getattr(r, k).diff_table()
  720. if getattr(r, k, None) is not None
  721. else types[k].diff_none)
  722. for k in fields:
  723. line.append(types[k].diff_diff(
  724. getattr(r, k, None),
  725. getattr(diff_r, k, None)))
  726. if diff_results is None:
  727. line.append('')
  728. elif percent:
  729. line.append(' (%s)' % ', '.join(
  730. '+∞%' if t == +m.inf
  731. else '-∞%' if t == -m.inf
  732. else '%+.1f%%' % (100*t)
  733. for t in ratios))
  734. else:
  735. line.append(' (%s)' % ', '.join(
  736. '+∞%' if t == +m.inf
  737. else '-∞%' if t == -m.inf
  738. else '%+.1f%%' % (100*t)
  739. for t in ratios
  740. if t)
  741. if any(ratios) else '')
  742. lines.append(line)
  743. # find the best widths, note that column 0 contains the names and column -1
  744. # the ratios, so those are handled a bit differently
  745. widths = [
  746. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  747. for w, i in zip(
  748. it.chain([23], it.repeat(7)),
  749. range(len(lines[0])-1))]
  750. # print our table
  751. for line in lines:
  752. print('%-*s %s%s' % (
  753. widths[0], line[0],
  754. ' '.join('%*s' % (w, x)
  755. for w, x in zip(widths[1:], line[1:-1])),
  756. line[-1]))
  757. def annotate(Result, results, *,
  758. annotate=None,
  759. threshold=None,
  760. branches=False,
  761. caches=False,
  762. **args):
  763. # figure out the threshold
  764. if threshold is None:
  765. t0, t1 = THRESHOLD
  766. elif len(threshold) == 1:
  767. t0, t1 = threshold[0], threshold[0]
  768. else:
  769. t0, t1 = threshold
  770. t0, t1 = min(t0, t1), max(t0, t1)
  771. if not branches and not caches:
  772. tk = 'self_cycles'
  773. elif branches:
  774. tk = 'self_bmisses'
  775. else:
  776. tk = 'self_cmisses'
  777. # find max cycles
  778. max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
  779. for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
  780. # flatten to line info
  781. results = fold(Result, results, by=['file', 'line'])
  782. table = {r.line: r for r in results if r.file == path}
  783. # calculate spans to show
  784. if not annotate:
  785. spans = []
  786. last = None
  787. func = None
  788. for line, r in sorted(table.items()):
  789. if float(getattr(r, tk)) / max_ >= t0:
  790. if last is not None and line - last.stop <= args['context']:
  791. last = range(
  792. last.start,
  793. line+1+args['context'])
  794. else:
  795. if last is not None:
  796. spans.append((last, func))
  797. last = range(
  798. line-args['context'],
  799. line+1+args['context'])
  800. func = r.function
  801. if last is not None:
  802. spans.append((last, func))
  803. with open(path) as f:
  804. skipped = False
  805. for i, line in enumerate(f):
  806. # skip lines not in spans?
  807. if not annotate and not any(i+1 in s for s, _ in spans):
  808. skipped = True
  809. continue
  810. if skipped:
  811. skipped = False
  812. print('%s@@ %s:%d: %s @@%s' % (
  813. '\x1b[36m' if args['color'] else '',
  814. path,
  815. i+1,
  816. next(iter(f for _, f in spans)),
  817. '\x1b[m' if args['color'] else ''))
  818. # build line
  819. if line.endswith('\n'):
  820. line = line[:-1]
  821. r = table.get(i+1)
  822. if r is not None and (
  823. float(r.self_cycles) > 0
  824. if not branches and not caches
  825. else float(r.self_bmisses) > 0
  826. or float(r.self_branches) > 0
  827. if branches
  828. else float(r.self_cmisses) > 0
  829. or float(r.self_caches) > 0):
  830. line = '%-*s // %s' % (
  831. args['width'],
  832. line,
  833. '%s cycles' % r.self_cycles
  834. if not branches and not caches
  835. else '%s bmisses, %s branches' % (
  836. r.self_bmisses, r.self_branches)
  837. if branches
  838. else '%s cmisses, %s caches' % (
  839. r.self_cmisses, r.self_caches))
  840. if args['color']:
  841. if float(getattr(r, tk)) / max_ >= t1:
  842. line = '\x1b[1;31m%s\x1b[m' % line
  843. elif float(getattr(r, tk)) / max_ >= t0:
  844. line = '\x1b[35m%s\x1b[m' % line
  845. print(line)
  846. def report(perf_paths, *,
  847. by=None,
  848. fields=None,
  849. defines=None,
  850. sort=None,
  851. self=False,
  852. branches=False,
  853. caches=False,
  854. tree=False,
  855. depth=None,
  856. **args):
  857. # figure out what color should be
  858. if args.get('color') == 'auto':
  859. args['color'] = sys.stdout.isatty()
  860. elif args.get('color') == 'always':
  861. args['color'] = True
  862. else:
  863. args['color'] = False
  864. # it doesn't really make sense to not have a depth with tree,
  865. # so assume depth=inf if tree by default
  866. if args.get('depth') is None:
  867. args['depth'] = m.inf if tree else 1
  868. elif args.get('depth') == 0:
  869. args['depth'] = m.inf
  870. # find sizes
  871. if not args.get('use', None):
  872. # find .o files
  873. paths = []
  874. for path in perf_paths:
  875. if os.path.isdir(path):
  876. path = path + '/*.perf'
  877. for path in glob.glob(path):
  878. paths.append(path)
  879. if not paths:
  880. print("error: no .perf files found in %r?" % perf_paths)
  881. sys.exit(-1)
  882. results = collect(paths, **args)
  883. else:
  884. results = []
  885. with openio(args['use']) as f:
  886. reader = csv.DictReader(f, restval='')
  887. for r in reader:
  888. try:
  889. results.append(PerfResult(
  890. **{k: r[k] for k in PerfResult._by
  891. if k in r and r[k].strip()},
  892. **{k: r['perf_'+k] for k in PerfResult._fields
  893. if 'perf_'+k in r and r['perf_'+k].strip()}))
  894. except TypeError:
  895. pass
  896. # fold
  897. results = fold(PerfResult, results, by=by, defines=defines)
  898. # sort, note that python's sort is stable
  899. results.sort()
  900. if sort:
  901. for k, reverse in reversed(sort):
  902. results.sort(key=lambda r: (getattr(r, k),)
  903. if getattr(r, k) is not None else (),
  904. reverse=reverse ^ (not k or k in PerfResult._fields))
  905. # write results to CSV
  906. if args.get('output'):
  907. with openio(args['output'], 'w') as f:
  908. writer = csv.DictWriter(f,
  909. (by if by is not None else PerfResult._by)
  910. + ['perf_'+k for k in PerfResult._fields])
  911. writer.writeheader()
  912. for r in results:
  913. writer.writerow(
  914. {k: getattr(r, k)
  915. for k in (by if by is not None else PerfResult._by)}
  916. | {'perf_'+k: getattr(r, k)
  917. for k in PerfResult._fields})
  918. # find previous results?
  919. if args.get('diff'):
  920. diff_results = []
  921. try:
  922. with openio(args['diff']) as f:
  923. reader = csv.DictReader(f, restval='')
  924. for r in reader:
  925. try:
  926. diff_results.append(PerfResult(
  927. **{k: r[k] for k in PerfResult._by
  928. if k in r and r[k].strip()},
  929. **{k: r['perf_'+k] for k in PerfResult._fields
  930. if 'perf_'+k in r and r['perf_'+k].strip()}))
  931. except TypeError:
  932. pass
  933. except FileNotFoundError:
  934. pass
  935. # fold
  936. diff_results = fold(PerfResult, diff_results, by=by, defines=defines)
  937. # print table
  938. if not args.get('quiet'):
  939. if args.get('annotate') or args.get('threshold'):
  940. # annotate sources
  941. annotate(PerfResult, results,
  942. branches=branches,
  943. caches=caches,
  944. **args)
  945. else:
  946. # print table
  947. table(PerfResult, results,
  948. diff_results if args.get('diff') else None,
  949. by=by if by is not None else ['function'],
  950. fields=fields if fields is not None else [
  951. 'self_'+k if self else k
  952. for k in (
  953. ['cycles'] if not branches and not caches
  954. else ['bmisses', 'branches'] if branches
  955. else ['cmisses', 'caches'])],
  956. sort=sort,
  957. **args)
  958. def main(**args):
  959. if args.get('record'):
  960. return record(**args)
  961. else:
  962. return report(**args)
  963. if __name__ == "__main__":
  964. import argparse
  965. import sys
  966. # bit of a hack, but parse_intermixed_args and REMAINDER are
  967. # incompatible, so we need to figure out what we want before running
  968. # argparse
  969. if '-R' in sys.argv or '--record' in sys.argv:
  970. nargs = argparse.REMAINDER
  971. else:
  972. nargs = '*'
  973. argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None
  974. argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
  975. parser = argparse.ArgumentParser(
  976. description="Aggregate and report Linux perf results.",
  977. allow_abbrev=False,
  978. conflict_handler='ignore')
  979. parser.add_argument(
  980. 'perf_paths',
  981. nargs=nargs,
  982. help="Description of where to find *.perf files. May be a directory "
  983. "or a list of paths. Defaults to %r." % PERF_PATHS)
  984. parser.add_argument(
  985. '-v', '--verbose',
  986. action='store_true',
  987. help="Output commands that run behind the scenes.")
  988. parser.add_argument(
  989. '-q', '--quiet',
  990. action='store_true',
  991. help="Don't show anything, useful with -o.")
  992. parser.add_argument(
  993. '-o', '--output',
  994. help="Specify CSV file to store results.")
  995. parser.add_argument(
  996. '-u', '--use',
  997. help="Don't parse anything, use this CSV file.")
  998. parser.add_argument(
  999. '-d', '--diff',
  1000. help="Specify CSV file to diff against.")
  1001. parser.add_argument(
  1002. '-a', '--all',
  1003. action='store_true',
  1004. help="Show all, not just the ones that changed.")
  1005. parser.add_argument(
  1006. '-p', '--percent',
  1007. action='store_true',
  1008. help="Only show percentage change, not a full diff.")
  1009. parser.add_argument(
  1010. '-b', '--by',
  1011. action='append',
  1012. choices=PerfResult._by,
  1013. help="Group by this field.")
  1014. parser.add_argument(
  1015. '-f', '--field',
  1016. dest='fields',
  1017. action='append',
  1018. choices=PerfResult._fields,
  1019. help="Show this field.")
  1020. parser.add_argument(
  1021. '-D', '--define',
  1022. dest='defines',
  1023. action='append',
  1024. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  1025. help="Only include results where this field is this value.")
  1026. class AppendSort(argparse.Action):
  1027. def __call__(self, parser, namespace, value, option):
  1028. if namespace.sort is None:
  1029. namespace.sort = []
  1030. namespace.sort.append((value, True if option == '-S' else False))
  1031. parser.add_argument(
  1032. '-s', '--sort',
  1033. action=AppendSort,
  1034. help="Sort by this fields.")
  1035. parser.add_argument(
  1036. '-S', '--reverse-sort',
  1037. action=AppendSort,
  1038. help="Sort by this fields, but backwards.")
  1039. parser.add_argument(
  1040. '-Y', '--summary',
  1041. action='store_true',
  1042. help="Only show the total.")
  1043. parser.add_argument(
  1044. '-F', '--source',
  1045. dest='sources',
  1046. action='append',
  1047. help="Only consider definitions in this file. Defaults to anything "
  1048. "in the current directory.")
  1049. parser.add_argument(
  1050. '--everything',
  1051. action='store_true',
  1052. help="Include builtin and libc specific symbols.")
  1053. parser.add_argument(
  1054. '--self',
  1055. action='store_true',
  1056. help="Show samples before propagation up the call-chain.")
  1057. parser.add_argument(
  1058. '--branches',
  1059. action='store_true',
  1060. help="Show branches and branch misses.")
  1061. parser.add_argument(
  1062. '--caches',
  1063. action='store_true',
  1064. help="Show cache accesses and cache misses.")
  1065. parser.add_argument(
  1066. '-A', '--annotate',
  1067. action='store_true',
  1068. help="Show source files annotated with coverage info.")
  1069. parser.add_argument(
  1070. '-T', '--threshold',
  1071. nargs='?',
  1072. type=lambda x: tuple(float(x) for x in x.split(',')),
  1073. const=THRESHOLD,
  1074. help="Show lines wth samples above this threshold as a percent of "
  1075. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1076. parser.add_argument(
  1077. '-c', '--context',
  1078. type=lambda x: int(x, 0),
  1079. default=3,
  1080. help="Show n additional lines of context. Defaults to 3.")
  1081. parser.add_argument(
  1082. '-W', '--width',
  1083. type=lambda x: int(x, 0),
  1084. default=80,
  1085. help="Assume source is styled with this many columns. Defaults to 80.")
  1086. parser.add_argument(
  1087. '--color',
  1088. choices=['never', 'always', 'auto'],
  1089. default='auto',
  1090. help="When to use terminal colors. Defaults to 'auto'.")
  1091. parser.add_argument(
  1092. '-j', '--jobs',
  1093. nargs='?',
  1094. type=lambda x: int(x, 0),
  1095. const=0,
  1096. help="Number of processes to use. 0 spawns one process per core.")
  1097. parser.add_argument(
  1098. '--perf-tool',
  1099. type=lambda x: x.split(),
  1100. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1101. parser.add_argument(
  1102. '--objdump-tool',
  1103. type=lambda x: x.split(),
  1104. default=OBJDUMP_TOOL,
  1105. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  1106. # record flags
  1107. record_parser = parser.add_argument_group('record options')
  1108. record_parser.add_argument(
  1109. 'command',
  1110. nargs=nargs,
  1111. help="Command to run.")
  1112. record_parser.add_argument(
  1113. '-R', '--record',
  1114. action='store_true',
  1115. help="Run a command and aggregate perf measurements.")
  1116. record_parser.add_argument(
  1117. '-o', '--output',
  1118. help="Output file. Uses flock to synchronize. This is stored as a "
  1119. "zip-file of multiple perf results.")
  1120. record_parser.add_argument(
  1121. '--perf-freq',
  1122. help="perf sampling frequency. This is passed directly to perf. "
  1123. "Defaults to %r." % PERF_FREQ)
  1124. record_parser.add_argument(
  1125. '--perf-period',
  1126. help="perf sampling period. This is passed directly to perf.")
  1127. record_parser.add_argument(
  1128. '--perf-events',
  1129. help="perf events to record. This is passed directly to perf. "
  1130. "Defaults to %r." % PERF_EVENTS)
  1131. record_parser.add_argument(
  1132. '--perf-tool',
  1133. type=lambda x: x.split(),
  1134. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1135. # avoid intermixed/REMAINDER conflict, see above
  1136. if nargs == argparse.REMAINDER:
  1137. args = parser.parse_args()
  1138. else:
  1139. args = parser.parse_intermixed_args()
  1140. # perf_paths/command overlap, so need to do some munging here
  1141. args.command = args.perf_paths
  1142. args.perf_paths = args.perf_paths or PERF_PATHS
  1143. sys.exit(main(**{k: v
  1144. for k, v in vars(args).items()
  1145. if v is not None}))