perf.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319
  1. #!/usr/bin/env python3
  2. #
  3. # Script to aggregate and report Linux perf results.
  4. #
  5. # Example:
  6. # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
  7. # ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
  8. #
  9. # Copyright (c) 2022, The littlefs authors.
  10. # SPDX-License-Identifier: BSD-3-Clause
  11. #
  12. import bisect
  13. import collections as co
  14. import csv
  15. import errno
  16. import fcntl
  17. import functools as ft
  18. import itertools as it
  19. import math as m
  20. import multiprocessing as mp
  21. import os
  22. import re
  23. import shlex
  24. import shutil
  25. import subprocess as sp
  26. import tempfile
  27. import zipfile
  28. # TODO support non-zip perf results?
  29. PERF_TOOL = ['perf']
  30. PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
  31. PERF_FREQ = 100
  32. OBJDUMP_TOOL = ['objdump']
  33. THRESHOLD = (0.5, 0.85)
  34. # integer fields
  35. class Int(co.namedtuple('Int', 'x')):
  36. __slots__ = ()
  37. def __new__(cls, x=0):
  38. if isinstance(x, Int):
  39. return x
  40. if isinstance(x, str):
  41. try:
  42. x = int(x, 0)
  43. except ValueError:
  44. # also accept +-∞ and +-inf
  45. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  46. x = m.inf
  47. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  48. x = -m.inf
  49. else:
  50. raise
  51. assert isinstance(x, int) or m.isinf(x), x
  52. return super().__new__(cls, x)
  53. def __str__(self):
  54. if self.x == m.inf:
  55. return '∞'
  56. elif self.x == -m.inf:
  57. return '-∞'
  58. else:
  59. return str(self.x)
  60. def __int__(self):
  61. assert not m.isinf(self.x)
  62. return self.x
  63. def __float__(self):
  64. return float(self.x)
  65. none = '%7s' % '-'
  66. def table(self):
  67. return '%7s' % (self,)
  68. diff_none = '%7s' % '-'
  69. diff_table = table
  70. def diff_diff(self, other):
  71. new = self.x if self else 0
  72. old = other.x if other else 0
  73. diff = new - old
  74. if diff == +m.inf:
  75. return '%7s' % '+∞'
  76. elif diff == -m.inf:
  77. return '%7s' % '-∞'
  78. else:
  79. return '%+7d' % diff
  80. def ratio(self, other):
  81. new = self.x if self else 0
  82. old = other.x if other else 0
  83. if m.isinf(new) and m.isinf(old):
  84. return 0.0
  85. elif m.isinf(new):
  86. return +m.inf
  87. elif m.isinf(old):
  88. return -m.inf
  89. elif not old and not new:
  90. return 0.0
  91. elif not old:
  92. return 1.0
  93. else:
  94. return (new-old) / old
  95. def __add__(self, other):
  96. return self.__class__(self.x + other.x)
  97. def __sub__(self, other):
  98. return self.__class__(self.x - other.x)
  99. def __mul__(self, other):
  100. return self.__class__(self.x * other.x)
  101. # perf results
  102. class PerfResult(co.namedtuple('PerfResult', [
  103. 'file', 'function', 'line',
  104. 'cycles', 'bmisses', 'branches', 'cmisses', 'caches',
  105. 'children'])):
  106. _by = ['file', 'function', 'line']
  107. _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
  108. _types = {
  109. 'cycles': Int,
  110. 'bmisses': Int, 'branches': Int,
  111. 'cmisses': Int, 'caches': Int}
  112. __slots__ = ()
  113. def __new__(cls, file='', function='', line=0,
  114. cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
  115. children=[]):
  116. return super().__new__(cls, file, function, int(Int(line)),
  117. Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches),
  118. children)
  119. def __add__(self, other):
  120. return PerfResult(self.file, self.function, self.line,
  121. self.cycles + other.cycles,
  122. self.bmisses + other.bmisses,
  123. self.branches + other.branches,
  124. self.cmisses + other.cmisses,
  125. self.caches + other.caches,
  126. self.children + other.children)
  127. def openio(path, mode='r', buffering=-1):
  128. # allow '-' for stdin/stdout
  129. if path == '-':
  130. if mode == 'r':
  131. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  132. else:
  133. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  134. else:
  135. return open(path, mode, buffering)
  136. # run perf as a subprocess, storing measurements into a zip file
  137. def record(command, *,
  138. output=None,
  139. perf_freq=PERF_FREQ,
  140. perf_period=None,
  141. perf_events=PERF_EVENTS,
  142. perf_tool=PERF_TOOL,
  143. **args):
  144. # create a temporary file for perf to write to, as far as I can tell
  145. # this is strictly needed because perf's pipe-mode only works with stdout
  146. with tempfile.NamedTemporaryFile('rb') as f:
  147. # figure out our perf invocation
  148. perf = perf_tool + list(filter(None, [
  149. 'record',
  150. '-F%s' % perf_freq
  151. if perf_freq is not None
  152. and perf_period is None else None,
  153. '-c%s' % perf_period
  154. if perf_period is not None else None,
  155. '-B',
  156. '-g',
  157. '--all-user',
  158. '-e%s' % perf_events,
  159. '-o%s' % f.name]))
  160. # run our command
  161. try:
  162. if args.get('verbose'):
  163. print(' '.join(shlex.quote(c) for c in perf + command))
  164. err = sp.call(perf + command, close_fds=False)
  165. except KeyboardInterrupt:
  166. err = errno.EOWNERDEAD
  167. # synchronize access
  168. z = os.open(output, os.O_RDWR | os.O_CREAT)
  169. fcntl.flock(z, fcntl.LOCK_EX)
  170. # copy measurements into our zip file
  171. with os.fdopen(z, 'r+b') as z:
  172. with zipfile.ZipFile(z, 'a',
  173. compression=zipfile.ZIP_DEFLATED,
  174. compresslevel=1) as z:
  175. with z.open('perf.%d' % os.getpid(), 'w') as g:
  176. shutil.copyfileobj(f, g)
  177. # forward the return code
  178. return err
  179. # try to only process each dso onceS
  180. #
  181. # note this only caches with the non-keyword arguments
  182. def multiprocessing_cache(f):
  183. local_cache = {}
  184. manager = mp.Manager()
  185. global_cache = manager.dict()
  186. lock = mp.Lock()
  187. def multiprocessing_cache(*args, **kwargs):
  188. # check local cache?
  189. if args in local_cache:
  190. return local_cache[args]
  191. # check global cache?
  192. with lock:
  193. if args in global_cache:
  194. v = global_cache[args]
  195. local_cache[args] = v
  196. return v
  197. # fall back to calling the function
  198. v = f(*args, **kwargs)
  199. global_cache[args] = v
  200. local_cache[args] = v
  201. return v
  202. return multiprocessing_cache
  203. @multiprocessing_cache
  204. def collect_syms_and_lines(obj_path, *,
  205. objdump_tool=None,
  206. **args):
  207. symbol_pattern = re.compile(
  208. '^(?P<addr>[0-9a-fA-F]+)'
  209. '\s+.*'
  210. '\s+(?P<size>[0-9a-fA-F]+)'
  211. '\s+(?P<name>[^\s]+)\s*$')
  212. line_pattern = re.compile(
  213. '^\s+(?:'
  214. # matches dir/file table
  215. '(?P<no>[0-9]+)'
  216. '(?:\s+(?P<dir>[0-9]+))?'
  217. '\s+.*'
  218. '\s+(?P<path>[^\s]+)'
  219. # matches line opcodes
  220. '|' '\[[^\]]*\]\s+'
  221. '(?:'
  222. '(?P<op_special>Special)'
  223. '|' '(?P<op_copy>Copy)'
  224. '|' '(?P<op_end>End of Sequence)'
  225. '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
  226. '|' 'Line .*?to (?P<op_line>[0-9]+)'
  227. '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
  228. '|' '.' ')*'
  229. ')$', re.IGNORECASE)
  230. # figure out symbol addresses and file+line ranges
  231. syms = {}
  232. sym_at = []
  233. cmd = objdump_tool + ['-t', obj_path]
  234. if args.get('verbose'):
  235. print(' '.join(shlex.quote(c) for c in cmd))
  236. proc = sp.Popen(cmd,
  237. stdout=sp.PIPE,
  238. stderr=sp.PIPE if not args.get('verbose') else None,
  239. universal_newlines=True,
  240. errors='replace',
  241. close_fds=False)
  242. for line in proc.stdout:
  243. m = symbol_pattern.match(line)
  244. if m:
  245. name = m.group('name')
  246. addr = int(m.group('addr'), 16)
  247. size = int(m.group('size'), 16)
  248. # ignore zero-sized symbols
  249. if not size:
  250. continue
  251. # note multiple symbols can share a name
  252. if name not in syms:
  253. syms[name] = set()
  254. syms[name].add((addr, size))
  255. sym_at.append((addr, name, size))
  256. proc.wait()
  257. if proc.returncode != 0:
  258. if not args.get('verbose'):
  259. for line in proc.stderr:
  260. sys.stdout.write(line)
  261. # assume no debug-info on failure
  262. pass
  263. # sort and keep largest/first when duplicates
  264. sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
  265. sym_at_ = []
  266. for addr, name, size in sym_at:
  267. if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
  268. sym_at_.append((addr, name, size))
  269. sym_at = sym_at_
  270. # state machine for dwarf line numbers, note that objdump's
  271. # decodedline seems to have issues with multiple dir/file
  272. # tables, which is why we need this
  273. lines = []
  274. line_at = []
  275. dirs = {}
  276. files = {}
  277. op_file = 1
  278. op_line = 1
  279. op_addr = 0
  280. cmd = objdump_tool + ['--dwarf=rawline', obj_path]
  281. if args.get('verbose'):
  282. print(' '.join(shlex.quote(c) for c in cmd))
  283. proc = sp.Popen(cmd,
  284. stdout=sp.PIPE,
  285. stderr=sp.PIPE if not args.get('verbose') else None,
  286. universal_newlines=True,
  287. errors='replace',
  288. close_fds=False)
  289. for line in proc.stdout:
  290. m = line_pattern.match(line)
  291. if m:
  292. if m.group('no') and not m.group('dir'):
  293. # found a directory entry
  294. dirs[int(m.group('no'))] = m.group('path')
  295. elif m.group('no'):
  296. # found a file entry
  297. dir = int(m.group('dir'))
  298. if dir in dirs:
  299. files[int(m.group('no'))] = os.path.join(
  300. dirs[dir],
  301. m.group('path'))
  302. else:
  303. files[int(m.group('no'))] = m.group('path')
  304. else:
  305. # found a state machine update
  306. if m.group('op_file'):
  307. op_file = int(m.group('op_file'), 0)
  308. if m.group('op_line'):
  309. op_line = int(m.group('op_line'), 0)
  310. if m.group('op_addr'):
  311. op_addr = int(m.group('op_addr'), 0)
  312. if (m.group('op_special')
  313. or m.group('op_copy')
  314. or m.group('op_end')):
  315. file = os.path.abspath(files.get(op_file, '?'))
  316. lines.append((file, op_line, op_addr))
  317. line_at.append((op_addr, file, op_line))
  318. if m.group('op_end'):
  319. op_file = 1
  320. op_line = 1
  321. op_addr = 0
  322. proc.wait()
  323. if proc.returncode != 0:
  324. if not args.get('verbose'):
  325. for line in proc.stderr:
  326. sys.stdout.write(line)
  327. # assume no debug-info on failure
  328. pass
  329. # sort and keep first when duplicates
  330. lines.sort()
  331. lines_ = []
  332. for file, line, addr in lines:
  333. if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
  334. lines_.append((file, line, addr))
  335. lines = lines_
  336. # sort and keep first when duplicates
  337. line_at.sort()
  338. line_at_ = []
  339. for addr, file, line in line_at:
  340. if len(line_at_) == 0 or line_at_[-1][0] != addr:
  341. line_at_.append((addr, file, line))
  342. line_at = line_at_
  343. return syms, sym_at, lines, line_at
  344. def collect_decompressed(path, *,
  345. perf_tool=PERF_TOOL,
  346. sources=None,
  347. everything=False,
  348. propagate=0,
  349. depth=1,
  350. **args):
  351. sample_pattern = re.compile(
  352. '(?P<comm>\w+)'
  353. '\s+(?P<pid>\w+)'
  354. '\s+(?P<time>[\w.]+):'
  355. '\s*(?P<period>\w+)'
  356. '\s+(?P<event>[^:]+):')
  357. frame_pattern = re.compile(
  358. '\s+(?P<addr>\w+)'
  359. '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
  360. '\s+\((?P<dso>[^\)]+)\)')
  361. events = {
  362. 'cycles': 'cycles',
  363. 'branch-misses': 'bmisses',
  364. 'branches': 'branches',
  365. 'cache-misses': 'cmisses',
  366. 'cache-references': 'caches'}
  367. # note perf_tool may contain extra args
  368. cmd = perf_tool + [
  369. 'script',
  370. '-i%s' % path]
  371. if args.get('verbose'):
  372. print(' '.join(shlex.quote(c) for c in cmd))
  373. proc = sp.Popen(cmd,
  374. stdout=sp.PIPE,
  375. stderr=sp.PIPE if not args.get('verbose') else None,
  376. universal_newlines=True,
  377. errors='replace',
  378. close_fds=False)
  379. last_filtered = False
  380. last_event = ''
  381. last_period = 0
  382. last_stack = []
  383. deltas = co.defaultdict(lambda: {})
  384. syms_ = co.defaultdict(lambda: {})
  385. at_cache = {}
  386. results = {}
  387. def commit():
  388. # tail-recursively propagate measurements
  389. for i in range(len(last_stack)):
  390. results_ = results
  391. for j in reversed(range(i+1)):
  392. if i+1-j > depth:
  393. break
  394. # propagate
  395. name = last_stack[j]
  396. if name not in results_:
  397. results_[name] = (co.defaultdict(lambda: 0), {})
  398. results_[name][0][last_event] += last_period
  399. # recurse
  400. results_ = results_[name][1]
  401. for line in proc.stdout:
  402. # we need to process a lot of data, so wait to use regex as late
  403. # as possible
  404. if not line.startswith('\t'):
  405. if last_filtered:
  406. commit()
  407. last_filtered = False
  408. if line:
  409. m = sample_pattern.match(line)
  410. if m and m.group('event') in events:
  411. last_filtered = True
  412. last_event = m.group('event')
  413. last_period = int(m.group('period'), 0)
  414. last_stack = []
  415. elif last_filtered:
  416. m = frame_pattern.match(line)
  417. if m:
  418. # filter out internal/kernel functions
  419. if not everything and (
  420. m.group('sym').startswith('__')
  421. or m.group('sym').startswith('0')
  422. or m.group('sym').startswith('-')
  423. or m.group('sym').startswith('[')
  424. or m.group('dso').startswith('/usr/lib')):
  425. continue
  426. dso = m.group('dso')
  427. sym = m.group('sym')
  428. off = int(m.group('off'), 0) if m.group('off') else 0
  429. addr_ = int(m.group('addr'), 16)
  430. # get the syms/lines for the dso, this is cached
  431. syms, sym_at, lines, line_at = collect_syms_and_lines(
  432. dso,
  433. **args)
  434. # ASLR is tricky, we have symbols+offsets, but static symbols
  435. # means we may have multiple options for each symbol.
  436. #
  437. # To try to solve this, we use previous seen symbols to build
  438. # confidence for the correct ASLR delta. This means we may
  439. # guess incorrectly for early symbols, but this will only affect
  440. # a few samples.
  441. if sym in syms:
  442. sym_addr_ = addr_ - off
  443. # track possible deltas?
  444. for sym_addr, size in syms[sym]:
  445. delta = sym_addr - sym_addr_
  446. if delta not in deltas[dso]:
  447. deltas[dso][delta] = sum(
  448. abs(a_+delta - a)
  449. for s, (a_, _) in syms_[dso].items()
  450. for a, _ in syms[s])
  451. for delta in deltas[dso].keys():
  452. deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
  453. syms_[dso][sym] = sym_addr_, size
  454. # guess the best delta
  455. delta, _ = min(deltas[dso].items(),
  456. key=lambda x: (x[1], x[0]))
  457. addr = addr_ + delta
  458. # cached?
  459. if (dso,addr) in at_cache:
  460. cached = at_cache[(dso,addr)]
  461. if cached is None:
  462. # cache says to skip
  463. continue
  464. file, line = cached
  465. else:
  466. # find file+line
  467. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  468. if i > 0:
  469. _, file, line = line_at[i-1]
  470. else:
  471. file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
  472. # ignore filtered sources
  473. if sources is not None:
  474. if not any(
  475. os.path.abspath(file) == os.path.abspath(s)
  476. for s in sources):
  477. at_cache[(dso,addr)] = None
  478. continue
  479. else:
  480. # default to only cwd
  481. if not everything and not os.path.commonpath([
  482. os.getcwd(),
  483. os.path.abspath(file)]) == os.getcwd():
  484. at_cache[(dso,addr)] = None
  485. continue
  486. # simplify path
  487. if os.path.commonpath([
  488. os.getcwd(),
  489. os.path.abspath(file)]) == os.getcwd():
  490. file = os.path.relpath(file)
  491. else:
  492. file = os.path.abspath(file)
  493. at_cache[(dso,addr)] = file, line
  494. else:
  495. file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
  496. last_stack.append((file, sym, line))
  497. # stop propogating?
  498. if propagate and len(last_stack) >= propagate:
  499. commit()
  500. last_filtered = False
  501. if last_filtered:
  502. commit()
  503. proc.wait()
  504. if proc.returncode != 0:
  505. if not args.get('verbose'):
  506. for line in proc.stderr:
  507. sys.stdout.write(line)
  508. sys.exit(-1)
  509. # rearrange results into result type
  510. def to_results(results):
  511. results_ = []
  512. for name, (r, children) in results.items():
  513. results_.append(PerfResult(*name,
  514. **{events[k]: v for k, v in r.items()},
  515. children=to_results(children)))
  516. return results_
  517. return to_results(results)
  518. def collect_job(path, i, **args):
  519. # decompress into a temporary file, this is to work around
  520. # some limitations of perf
  521. with zipfile.ZipFile(path) as z:
  522. with z.open(i) as f:
  523. with tempfile.NamedTemporaryFile('wb') as g:
  524. shutil.copyfileobj(f, g)
  525. g.flush()
  526. return collect_decompressed(g.name, **args)
  527. def starapply(args):
  528. f, args, kwargs = args
  529. return f(*args, **kwargs)
  530. def collect(perf_paths, *,
  531. jobs=None,
  532. **args):
  533. # automatic job detection?
  534. if jobs == 0:
  535. jobs = len(os.sched_getaffinity(0))
  536. records = []
  537. for path in perf_paths:
  538. # each .perf file is actually a zip file containing perf files from
  539. # multiple runs
  540. with zipfile.ZipFile(path) as z:
  541. records.extend((path, i) for i in z.infolist())
  542. # we're dealing with a lot of data but also surprisingly
  543. # parallelizable
  544. if jobs is not None:
  545. results = []
  546. with mp.Pool(jobs) as p:
  547. for results_ in p.imap_unordered(
  548. starapply,
  549. ((collect_job, (path, i), args) for path, i in records)):
  550. results.extend(results_)
  551. else:
  552. results = []
  553. for path, i in records:
  554. results.extend(collect_job(path, i, **args))
  555. return results
  556. def fold(Result, results, *,
  557. by=None,
  558. defines=None,
  559. **_):
  560. if by is None:
  561. by = Result._by
  562. for k in it.chain(by or [], (k for k, _ in defines or [])):
  563. if k not in Result._by and k not in Result._fields:
  564. print("error: could not find field %r?" % k)
  565. sys.exit(-1)
  566. # filter by matching defines
  567. if defines is not None:
  568. results_ = []
  569. for r in results:
  570. if all(getattr(r, k) in vs for k, vs in defines):
  571. results_.append(r)
  572. results = results_
  573. # organize results into conflicts
  574. folding = co.OrderedDict()
  575. for r in results:
  576. name = tuple(getattr(r, k) for k in by)
  577. if name not in folding:
  578. folding[name] = []
  579. folding[name].append(r)
  580. # merge conflicts
  581. folded = []
  582. for name, rs in folding.items():
  583. folded.append(sum(rs[1:], start=rs[0]))
  584. # fold recursively
  585. folded_ = []
  586. for r in folded:
  587. folded_.append(r._replace(children=fold(
  588. Result, r.children,
  589. by=by,
  590. defines=defines)))
  591. folded = folded_
  592. return folded
  593. def table(Result, results, diff_results=None, *,
  594. by=None,
  595. fields=None,
  596. sort=None,
  597. summary=False,
  598. all=False,
  599. percent=False,
  600. depth=1,
  601. **_):
  602. all_, all = all, __builtins__.all
  603. if by is None:
  604. by = Result._by
  605. if fields is None:
  606. fields = Result._fields
  607. types = Result._types
  608. # fold again
  609. results = fold(Result, results, by=by)
  610. if diff_results is not None:
  611. diff_results = fold(Result, diff_results, by=by)
  612. # organize by name
  613. table = {
  614. ','.join(str(getattr(r, k) or '') for k in by): r
  615. for r in results}
  616. diff_table = {
  617. ','.join(str(getattr(r, k) or '') for k in by): r
  618. for r in diff_results or []}
  619. names = list(table.keys() | diff_table.keys())
  620. # sort again, now with diff info, note that python's sort is stable
  621. names.sort()
  622. if diff_results is not None:
  623. names.sort(key=lambda n: tuple(
  624. types[k].ratio(
  625. getattr(table.get(n), k, None),
  626. getattr(diff_table.get(n), k, None))
  627. for k in fields),
  628. reverse=True)
  629. if sort:
  630. for k, reverse in reversed(sort):
  631. names.sort(key=lambda n: (getattr(table[n], k),)
  632. if getattr(table.get(n), k, None) is not None else (),
  633. reverse=reverse ^ (not k or k in Result._fields))
  634. # build up our lines
  635. lines = []
  636. # header
  637. header = []
  638. header.append('%s%s' % (
  639. ','.join(by),
  640. ' (%d added, %d removed)' % (
  641. sum(1 for n in table if n not in diff_table),
  642. sum(1 for n in diff_table if n not in table))
  643. if diff_results is not None and not percent else '')
  644. if not summary else '')
  645. if diff_results is None:
  646. for k in fields:
  647. header.append(k)
  648. elif percent:
  649. for k in fields:
  650. header.append(k)
  651. else:
  652. for k in fields:
  653. header.append('o'+k)
  654. for k in fields:
  655. header.append('n'+k)
  656. for k in fields:
  657. header.append('d'+k)
  658. header.append('')
  659. lines.append(header)
  660. def table_entry(name, r, diff_r=None, ratios=[]):
  661. entry = []
  662. entry.append(name)
  663. if diff_results is None:
  664. for k in fields:
  665. entry.append(getattr(r, k).table()
  666. if getattr(r, k, None) is not None
  667. else types[k].none)
  668. elif percent:
  669. for k in fields:
  670. entry.append(getattr(r, k).diff_table()
  671. if getattr(r, k, None) is not None
  672. else types[k].diff_none)
  673. else:
  674. for k in fields:
  675. entry.append(getattr(diff_r, k).diff_table()
  676. if getattr(diff_r, k, None) is not None
  677. else types[k].diff_none)
  678. for k in fields:
  679. entry.append(getattr(r, k).diff_table()
  680. if getattr(r, k, None) is not None
  681. else types[k].diff_none)
  682. for k in fields:
  683. entry.append(types[k].diff_diff(
  684. getattr(r, k, None),
  685. getattr(diff_r, k, None)))
  686. if diff_results is None:
  687. entry.append('')
  688. elif percent:
  689. entry.append(' (%s)' % ', '.join(
  690. '+∞%' if t == +m.inf
  691. else '-∞%' if t == -m.inf
  692. else '%+.1f%%' % (100*t)
  693. for t in ratios))
  694. else:
  695. entry.append(' (%s)' % ', '.join(
  696. '+∞%' if t == +m.inf
  697. else '-∞%' if t == -m.inf
  698. else '%+.1f%%' % (100*t)
  699. for t in ratios
  700. if t)
  701. if any(ratios) else '')
  702. return entry
  703. # entries
  704. if not summary:
  705. for name in names:
  706. r = table.get(name)
  707. if diff_results is None:
  708. diff_r = None
  709. ratios = None
  710. else:
  711. diff_r = diff_table.get(name)
  712. ratios = [
  713. types[k].ratio(
  714. getattr(r, k, None),
  715. getattr(diff_r, k, None))
  716. for k in fields]
  717. if not all_ and not any(ratios):
  718. continue
  719. lines.append(table_entry(name, r, diff_r, ratios))
  720. # total
  721. r = next(iter(fold(Result, results, by=[])), None)
  722. if diff_results is None:
  723. diff_r = None
  724. ratios = None
  725. else:
  726. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  727. ratios = [
  728. types[k].ratio(
  729. getattr(r, k, None),
  730. getattr(diff_r, k, None))
  731. for k in fields]
  732. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  733. # find the best widths, note that column 0 contains the names and column -1
  734. # the ratios, so those are handled a bit differently
  735. widths = [
  736. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  737. for w, i in zip(
  738. it.chain([23], it.repeat(7)),
  739. range(len(lines[0])-1))]
  740. # adjust the name width based on the expected call depth, though
  741. # note this doesn't really work with unbounded recursion
  742. if not summary and not m.isinf(depth):
  743. widths[0] += 4*(depth-1)
  744. # print the tree recursively
  745. print('%-*s %s%s' % (
  746. widths[0], lines[0][0],
  747. ' '.join('%*s' % (w, x)
  748. for w, x in zip(widths[1:], lines[0][1:-1])),
  749. lines[0][-1]))
  750. if not summary:
  751. def recurse(results_, depth_, prefixes=('', '', '', '')):
  752. # rebuild our tables at each layer
  753. table_ = {
  754. ','.join(str(getattr(r, k) or '') for k in by): r
  755. for r in results_}
  756. names_ = list(table_.keys())
  757. # sort again at each layer, keep in mind the numbers are
  758. # changing as we descend
  759. names_.sort()
  760. if sort:
  761. for k, reverse in reversed(sort):
  762. names_.sort(key=lambda n: (getattr(table_[n], k),)
  763. if getattr(table_.get(n), k, None) is not None else (),
  764. reverse=reverse ^ (not k or k in Result._fields))
  765. for i, name in enumerate(names_):
  766. r = table_[name]
  767. is_last = (i == len(names_)-1)
  768. print('%s%-*s %s' % (
  769. prefixes[0+is_last],
  770. widths[0] - (
  771. len(prefixes[0+is_last])
  772. if not m.isinf(depth) else 0),
  773. name,
  774. ' '.join('%*s' % (w, x)
  775. for w, x in zip(
  776. widths[1:],
  777. table_entry(name, r)[1:]))))
  778. # recurse?
  779. if depth_ > 1:
  780. recurse(
  781. r.children,
  782. depth_-1,
  783. (prefixes[2+is_last] + "|-> ",
  784. prefixes[2+is_last] + "'-> ",
  785. prefixes[2+is_last] + "| ",
  786. prefixes[2+is_last] + " "))
  787. # we have enough going on with diffing to make the top layer
  788. # a special case
  789. for name, line in zip(names, lines[1:-1]):
  790. print('%-*s %s%s' % (
  791. widths[0], line[0],
  792. ' '.join('%*s' % (w, x)
  793. for w, x in zip(widths[1:], line[1:-1])),
  794. line[-1]))
  795. if name in table and depth > 1:
  796. recurse(
  797. table[name].children,
  798. depth-1,
  799. ("|-> ",
  800. "'-> ",
  801. "| ",
  802. " "))
  803. print('%-*s %s%s' % (
  804. widths[0], lines[-1][0],
  805. ' '.join('%*s' % (w, x)
  806. for w, x in zip(widths[1:], lines[-1][1:-1])),
  807. lines[-1][-1]))
  808. def annotate(Result, results, *,
  809. annotate=None,
  810. threshold=None,
  811. branches=False,
  812. caches=False,
  813. **args):
  814. # figure out the threshold
  815. if threshold is None:
  816. t0, t1 = THRESHOLD
  817. elif len(threshold) == 1:
  818. t0, t1 = threshold[0], threshold[0]
  819. else:
  820. t0, t1 = threshold
  821. t0, t1 = min(t0, t1), max(t0, t1)
  822. if not branches and not caches:
  823. tk = 'cycles'
  824. elif branches:
  825. tk = 'bmisses'
  826. else:
  827. tk = 'cmisses'
  828. # find max cycles
  829. max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
  830. for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
  831. # flatten to line info
  832. results = fold(Result, results, by=['file', 'line'])
  833. table = {r.line: r for r in results if r.file == path}
  834. # calculate spans to show
  835. if not annotate:
  836. spans = []
  837. last = None
  838. func = None
  839. for line, r in sorted(table.items()):
  840. if float(getattr(r, tk)) / max_ >= t0:
  841. if last is not None and line - last.stop <= args['context']:
  842. last = range(
  843. last.start,
  844. line+1+args['context'])
  845. else:
  846. if last is not None:
  847. spans.append((last, func))
  848. last = range(
  849. line-args['context'],
  850. line+1+args['context'])
  851. func = r.function
  852. if last is not None:
  853. spans.append((last, func))
  854. with open(path) as f:
  855. skipped = False
  856. for i, line in enumerate(f):
  857. # skip lines not in spans?
  858. if not annotate and not any(i+1 in s for s, _ in spans):
  859. skipped = True
  860. continue
  861. if skipped:
  862. skipped = False
  863. print('%s@@ %s:%d: %s @@%s' % (
  864. '\x1b[36m' if args['color'] else '',
  865. path,
  866. i+1,
  867. next(iter(f for _, f in spans)),
  868. '\x1b[m' if args['color'] else ''))
  869. # build line
  870. if line.endswith('\n'):
  871. line = line[:-1]
  872. r = table.get(i+1)
  873. if r is not None and (
  874. float(r.cycles) > 0
  875. if not branches and not caches
  876. else float(r.bmisses) > 0 or float(r.branches) > 0
  877. if branches
  878. else float(r.cmisses) > 0 or float(r.caches) > 0):
  879. line = '%-*s // %s' % (
  880. args['width'],
  881. line,
  882. '%s cycles' % r.cycles
  883. if not branches and not caches
  884. else '%s bmisses, %s branches' % (r.bmisses, r.branches)
  885. if branches
  886. else '%s cmisses, %s caches' % (r.cmisses, r.caches))
  887. if args['color']:
  888. if float(getattr(r, tk)) / max_ >= t1:
  889. line = '\x1b[1;31m%s\x1b[m' % line
  890. elif float(getattr(r, tk)) / max_ >= t0:
  891. line = '\x1b[35m%s\x1b[m' % line
  892. print(line)
  893. def report(perf_paths, *,
  894. by=None,
  895. fields=None,
  896. defines=None,
  897. sort=None,
  898. branches=False,
  899. caches=False,
  900. **args):
  901. # figure out what color should be
  902. if args.get('color') == 'auto':
  903. args['color'] = sys.stdout.isatty()
  904. elif args.get('color') == 'always':
  905. args['color'] = True
  906. else:
  907. args['color'] = False
  908. # depth of 0 == m.inf
  909. if args.get('depth') == 0:
  910. args['depth'] = m.inf
  911. # find sizes
  912. if not args.get('use', None):
  913. results = collect(perf_paths, **args)
  914. else:
  915. results = []
  916. with openio(args['use']) as f:
  917. reader = csv.DictReader(f, restval='')
  918. for r in reader:
  919. try:
  920. results.append(PerfResult(
  921. **{k: r[k] for k in PerfResult._by
  922. if k in r and r[k].strip()},
  923. **{k: r['perf_'+k] for k in PerfResult._fields
  924. if 'perf_'+k in r and r['perf_'+k].strip()}))
  925. except TypeError:
  926. pass
  927. # fold
  928. results = fold(PerfResult, results, by=by, defines=defines)
  929. # sort, note that python's sort is stable
  930. results.sort()
  931. if sort:
  932. for k, reverse in reversed(sort):
  933. results.sort(key=lambda r: (getattr(r, k),)
  934. if getattr(r, k) is not None else (),
  935. reverse=reverse ^ (not k or k in PerfResult._fields))
  936. # write results to CSV
  937. if args.get('output'):
  938. with openio(args['output'], 'w') as f:
  939. writer = csv.DictWriter(f,
  940. (by if by is not None else PerfResult._by)
  941. + ['perf_'+k for k in PerfResult._fields])
  942. writer.writeheader()
  943. for r in results:
  944. writer.writerow(
  945. {k: getattr(r, k)
  946. for k in (by if by is not None else PerfResult._by)}
  947. | {'perf_'+k: getattr(r, k)
  948. for k in PerfResult._fields})
  949. # find previous results?
  950. if args.get('diff'):
  951. diff_results = []
  952. try:
  953. with openio(args['diff']) as f:
  954. reader = csv.DictReader(f, restval='')
  955. for r in reader:
  956. try:
  957. diff_results.append(PerfResult(
  958. **{k: r[k] for k in PerfResult._by
  959. if k in r and r[k].strip()},
  960. **{k: r['perf_'+k] for k in PerfResult._fields
  961. if 'perf_'+k in r and r['perf_'+k].strip()}))
  962. except TypeError:
  963. pass
  964. except FileNotFoundError:
  965. pass
  966. # fold
  967. diff_results = fold(PerfResult, diff_results, by=by, defines=defines)
  968. # print table
  969. if not args.get('quiet'):
  970. if args.get('annotate') or args.get('threshold'):
  971. # annotate sources
  972. annotate(PerfResult, results,
  973. branches=branches,
  974. caches=caches,
  975. **args)
  976. else:
  977. # print table
  978. table(PerfResult, results,
  979. diff_results if args.get('diff') else None,
  980. by=by if by is not None else ['function'],
  981. fields=fields if fields is not None
  982. else ['cycles'] if not branches and not caches
  983. else ['bmisses', 'branches'] if branches
  984. else ['cmisses', 'caches'],
  985. sort=sort,
  986. **args)
  987. def main(**args):
  988. if args.get('record'):
  989. return record(**args)
  990. else:
  991. return report(**args)
  992. if __name__ == "__main__":
  993. import argparse
  994. import sys
  995. # bit of a hack, but parse_intermixed_args and REMAINDER are
  996. # incompatible, so we need to figure out what we want before running
  997. # argparse
  998. if '-R' in sys.argv or '--record' in sys.argv:
  999. nargs = argparse.REMAINDER
  1000. else:
  1001. nargs = '*'
  1002. argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None
  1003. argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
  1004. parser = argparse.ArgumentParser(
  1005. description="Aggregate and report Linux perf results.",
  1006. allow_abbrev=False,
  1007. conflict_handler='ignore')
  1008. parser.add_argument(
  1009. 'perf_paths',
  1010. nargs=nargs,
  1011. help="Input *.perf files.")
  1012. parser.add_argument(
  1013. '-v', '--verbose',
  1014. action='store_true',
  1015. help="Output commands that run behind the scenes.")
  1016. parser.add_argument(
  1017. '-q', '--quiet',
  1018. action='store_true',
  1019. help="Don't show anything, useful with -o.")
  1020. parser.add_argument(
  1021. '-o', '--output',
  1022. help="Specify CSV file to store results.")
  1023. parser.add_argument(
  1024. '-u', '--use',
  1025. help="Don't parse anything, use this CSV file.")
  1026. parser.add_argument(
  1027. '-d', '--diff',
  1028. help="Specify CSV file to diff against.")
  1029. parser.add_argument(
  1030. '-a', '--all',
  1031. action='store_true',
  1032. help="Show all, not just the ones that changed.")
  1033. parser.add_argument(
  1034. '-p', '--percent',
  1035. action='store_true',
  1036. help="Only show percentage change, not a full diff.")
  1037. parser.add_argument(
  1038. '-b', '--by',
  1039. action='append',
  1040. choices=PerfResult._by,
  1041. help="Group by this field.")
  1042. parser.add_argument(
  1043. '-f', '--field',
  1044. dest='fields',
  1045. action='append',
  1046. choices=PerfResult._fields,
  1047. help="Show this field.")
  1048. parser.add_argument(
  1049. '-D', '--define',
  1050. dest='defines',
  1051. action='append',
  1052. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  1053. help="Only include results where this field is this value.")
  1054. class AppendSort(argparse.Action):
  1055. def __call__(self, parser, namespace, value, option):
  1056. if namespace.sort is None:
  1057. namespace.sort = []
  1058. namespace.sort.append((value, True if option == '-S' else False))
  1059. parser.add_argument(
  1060. '-s', '--sort',
  1061. action=AppendSort,
  1062. help="Sort by this fields.")
  1063. parser.add_argument(
  1064. '-S', '--reverse-sort',
  1065. action=AppendSort,
  1066. help="Sort by this fields, but backwards.")
  1067. parser.add_argument(
  1068. '-Y', '--summary',
  1069. action='store_true',
  1070. help="Only show the total.")
  1071. parser.add_argument(
  1072. '-F', '--source',
  1073. dest='sources',
  1074. action='append',
  1075. help="Only consider definitions in this file. Defaults to anything "
  1076. "in the current directory.")
  1077. parser.add_argument(
  1078. '--everything',
  1079. action='store_true',
  1080. help="Include builtin and libc specific symbols.")
  1081. parser.add_argument(
  1082. '--branches',
  1083. action='store_true',
  1084. help="Show branches and branch misses.")
  1085. parser.add_argument(
  1086. '--caches',
  1087. action='store_true',
  1088. help="Show cache accesses and cache misses.")
  1089. parser.add_argument(
  1090. '-P', '--propagate',
  1091. type=lambda x: int(x, 0),
  1092. help="Depth to propagate samples up the call-stack. 0 propagates up "
  1093. "to the entry point, 1 does no propagation. Defaults to 0.")
  1094. parser.add_argument(
  1095. '-Z', '--depth',
  1096. nargs='?',
  1097. type=lambda x: int(x, 0),
  1098. const=0,
  1099. help="Depth of function calls to show. 0 shows all calls but may not "
  1100. "terminate!")
  1101. parser.add_argument(
  1102. '-A', '--annotate',
  1103. action='store_true',
  1104. help="Show source files annotated with coverage info.")
  1105. parser.add_argument(
  1106. '-T', '--threshold',
  1107. nargs='?',
  1108. type=lambda x: tuple(float(x) for x in x.split(',')),
  1109. const=THRESHOLD,
  1110. help="Show lines with samples above this threshold as a percent of "
  1111. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1112. parser.add_argument(
  1113. '-c', '--context',
  1114. type=lambda x: int(x, 0),
  1115. default=3,
  1116. help="Show n additional lines of context. Defaults to 3.")
  1117. parser.add_argument(
  1118. '-W', '--width',
  1119. type=lambda x: int(x, 0),
  1120. default=80,
  1121. help="Assume source is styled with this many columns. Defaults to 80.")
  1122. parser.add_argument(
  1123. '--color',
  1124. choices=['never', 'always', 'auto'],
  1125. default='auto',
  1126. help="When to use terminal colors. Defaults to 'auto'.")
  1127. parser.add_argument(
  1128. '-j', '--jobs',
  1129. nargs='?',
  1130. type=lambda x: int(x, 0),
  1131. const=0,
  1132. help="Number of processes to use. 0 spawns one process per core.")
  1133. parser.add_argument(
  1134. '--perf-tool',
  1135. type=lambda x: x.split(),
  1136. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1137. parser.add_argument(
  1138. '--objdump-tool',
  1139. type=lambda x: x.split(),
  1140. default=OBJDUMP_TOOL,
  1141. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  1142. # record flags
  1143. record_parser = parser.add_argument_group('record options')
  1144. record_parser.add_argument(
  1145. 'command',
  1146. nargs=nargs,
  1147. help="Command to run.")
  1148. record_parser.add_argument(
  1149. '-R', '--record',
  1150. action='store_true',
  1151. help="Run a command and aggregate perf measurements.")
  1152. record_parser.add_argument(
  1153. '-o', '--output',
  1154. help="Output file. Uses flock to synchronize. This is stored as a "
  1155. "zip-file of multiple perf results.")
  1156. record_parser.add_argument(
  1157. '--perf-freq',
  1158. help="perf sampling frequency. This is passed directly to perf. "
  1159. "Defaults to %r." % PERF_FREQ)
  1160. record_parser.add_argument(
  1161. '--perf-period',
  1162. help="perf sampling period. This is passed directly to perf.")
  1163. record_parser.add_argument(
  1164. '--perf-events',
  1165. help="perf events to record. This is passed directly to perf. "
  1166. "Defaults to %r." % PERF_EVENTS)
  1167. record_parser.add_argument(
  1168. '--perf-tool',
  1169. type=lambda x: x.split(),
  1170. help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL)
  1171. # avoid intermixed/REMAINDER conflict, see above
  1172. if nargs == argparse.REMAINDER:
  1173. args = parser.parse_args()
  1174. else:
  1175. args = parser.parse_intermixed_args()
  1176. # perf_paths/command overlap, so need to do some munging here
  1177. args.command = args.perf_paths
  1178. if args.record:
  1179. if not args.command:
  1180. print('error: no command specified?')
  1181. sys.exit(-1)
  1182. if not args.output:
  1183. print('error: no output file specified?')
  1184. sys.exit(-1)
  1185. sys.exit(main(**{k: v
  1186. for k, v in vars(args).items()
  1187. if v is not None}))