perfbd.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276
  1. #!/usr/bin/env python3
  2. #
  3. # Aggregate and report call-stack propagated block-device operations
  4. # from trace output.
  5. #
  6. # Example:
  7. # ./scripts/bench.py -ttrace
  8. # ./scripts/perfbd.py trace -j -Flfs.c -Flfs_util.c -Serased -Sproged -Sreaded
  9. #
  10. # Copyright (c) 2022, The littlefs authors.
  11. # SPDX-License-Identifier: BSD-3-Clause
  12. #
  13. import bisect
  14. import collections as co
  15. import csv
  16. import functools as ft
  17. import itertools as it
  18. import math as m
  19. import multiprocessing as mp
  20. import os
  21. import re
  22. import shlex
  23. import subprocess as sp
  24. OBJDUMP_PATH = ['objdump']
  25. THRESHOLD = (0.5, 0.85)
  26. # integer fields
  27. class Int(co.namedtuple('Int', 'x')):
  28. __slots__ = ()
  29. def __new__(cls, x=0):
  30. if isinstance(x, Int):
  31. return x
  32. if isinstance(x, str):
  33. try:
  34. x = int(x, 0)
  35. except ValueError:
  36. # also accept +-∞ and +-inf
  37. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  38. x = m.inf
  39. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  40. x = -m.inf
  41. else:
  42. raise
  43. assert isinstance(x, int) or m.isinf(x), x
  44. return super().__new__(cls, x)
  45. def __str__(self):
  46. if self.x == m.inf:
  47. return '∞'
  48. elif self.x == -m.inf:
  49. return '-∞'
  50. else:
  51. return str(self.x)
  52. def __int__(self):
  53. assert not m.isinf(self.x)
  54. return self.x
  55. def __float__(self):
  56. return float(self.x)
  57. none = '%7s' % '-'
  58. def table(self):
  59. return '%7s' % (self,)
  60. diff_none = '%7s' % '-'
  61. diff_table = table
  62. def diff_diff(self, other):
  63. new = self.x if self else 0
  64. old = other.x if other else 0
  65. diff = new - old
  66. if diff == +m.inf:
  67. return '%7s' % '+∞'
  68. elif diff == -m.inf:
  69. return '%7s' % '-∞'
  70. else:
  71. return '%+7d' % diff
  72. def ratio(self, other):
  73. new = self.x if self else 0
  74. old = other.x if other else 0
  75. if m.isinf(new) and m.isinf(old):
  76. return 0.0
  77. elif m.isinf(new):
  78. return +m.inf
  79. elif m.isinf(old):
  80. return -m.inf
  81. elif not old and not new:
  82. return 0.0
  83. elif not old:
  84. return 1.0
  85. else:
  86. return (new-old) / old
  87. def __add__(self, other):
  88. return self.__class__(self.x + other.x)
  89. def __sub__(self, other):
  90. return self.__class__(self.x - other.x)
  91. def __mul__(self, other):
  92. return self.__class__(self.x * other.x)
  93. # perf results
  94. class PerfBdResult(co.namedtuple('PerfBdResult', [
  95. 'file', 'function', 'line',
  96. 'readed', 'proged', 'erased',
  97. 'children'])):
  98. _by = ['file', 'function', 'line']
  99. _fields = ['readed', 'proged', 'erased']
  100. _sort = ['erased', 'proged', 'readed']
  101. _types = {'readed': Int, 'proged': Int, 'erased': Int}
  102. __slots__ = ()
  103. def __new__(cls, file='', function='', line=0,
  104. readed=0, proged=0, erased=0,
  105. children=[]):
  106. return super().__new__(cls, file, function, int(Int(line)),
  107. Int(readed), Int(proged), Int(erased),
  108. children)
  109. def __add__(self, other):
  110. return PerfBdResult(self.file, self.function, self.line,
  111. self.readed + other.readed,
  112. self.proged + other.proged,
  113. self.erased + other.erased,
  114. self.children + other.children)
  115. def openio(path, mode='r', buffering=-1):
  116. # allow '-' for stdin/stdout
  117. if path == '-':
  118. if mode == 'r':
  119. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  120. else:
  121. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  122. else:
  123. return open(path, mode, buffering)
  124. def collect_syms_and_lines(obj_path, *,
  125. objdump_path=None,
  126. **args):
  127. symbol_pattern = re.compile(
  128. '^(?P<addr>[0-9a-fA-F]+)'
  129. '\s+.*'
  130. '\s+(?P<size>[0-9a-fA-F]+)'
  131. '\s+(?P<name>[^\s]+)\s*$')
  132. line_pattern = re.compile(
  133. '^\s+(?:'
  134. # matches dir/file table
  135. '(?P<no>[0-9]+)'
  136. '(?:\s+(?P<dir>[0-9]+))?'
  137. '\s+.*'
  138. '\s+(?P<path>[^\s]+)'
  139. # matches line opcodes
  140. '|' '\[[^\]]*\]\s+'
  141. '(?:'
  142. '(?P<op_special>Special)'
  143. '|' '(?P<op_copy>Copy)'
  144. '|' '(?P<op_end>End of Sequence)'
  145. '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
  146. '|' 'Line .*?to (?P<op_line>[0-9]+)'
  147. '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
  148. '|' '.' ')*'
  149. ')$', re.IGNORECASE)
  150. # figure out symbol addresses
  151. syms = {}
  152. sym_at = []
  153. cmd = objdump_path + ['-t', obj_path]
  154. if args.get('verbose'):
  155. print(' '.join(shlex.quote(c) for c in cmd))
  156. proc = sp.Popen(cmd,
  157. stdout=sp.PIPE,
  158. stderr=sp.PIPE if not args.get('verbose') else None,
  159. universal_newlines=True,
  160. errors='replace',
  161. close_fds=False)
  162. for line in proc.stdout:
  163. m = symbol_pattern.match(line)
  164. if m:
  165. name = m.group('name')
  166. addr = int(m.group('addr'), 16)
  167. size = int(m.group('size'), 16)
  168. # ignore zero-sized symbols
  169. if not size:
  170. continue
  171. # note multiple symbols can share a name
  172. if name not in syms:
  173. syms[name] = set()
  174. syms[name].add((addr, size))
  175. sym_at.append((addr, name, size))
  176. proc.wait()
  177. if proc.returncode != 0:
  178. if not args.get('verbose'):
  179. for line in proc.stderr:
  180. sys.stdout.write(line)
  181. # assume no debug-info on failure
  182. pass
  183. # sort and keep largest/first when duplicates
  184. sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
  185. sym_at_ = []
  186. for addr, name, size in sym_at:
  187. if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
  188. sym_at_.append((addr, name, size))
  189. sym_at = sym_at_
  190. # state machine for dwarf line numbers, note that objdump's
  191. # decodedline seems to have issues with multiple dir/file
  192. # tables, which is why we need this
  193. lines = []
  194. line_at = []
  195. dirs = {}
  196. files = {}
  197. op_file = 1
  198. op_line = 1
  199. op_addr = 0
  200. cmd = objdump_path + ['--dwarf=rawline', obj_path]
  201. if args.get('verbose'):
  202. print(' '.join(shlex.quote(c) for c in cmd))
  203. proc = sp.Popen(cmd,
  204. stdout=sp.PIPE,
  205. stderr=sp.PIPE if not args.get('verbose') else None,
  206. universal_newlines=True,
  207. errors='replace',
  208. close_fds=False)
  209. for line in proc.stdout:
  210. m = line_pattern.match(line)
  211. if m:
  212. if m.group('no') and not m.group('dir'):
  213. # found a directory entry
  214. dirs[int(m.group('no'))] = m.group('path')
  215. elif m.group('no'):
  216. # found a file entry
  217. dir = int(m.group('dir'))
  218. if dir in dirs:
  219. files[int(m.group('no'))] = os.path.join(
  220. dirs[dir],
  221. m.group('path'))
  222. else:
  223. files[int(m.group('no'))] = m.group('path')
  224. else:
  225. # found a state machine update
  226. if m.group('op_file'):
  227. op_file = int(m.group('op_file'), 0)
  228. if m.group('op_line'):
  229. op_line = int(m.group('op_line'), 0)
  230. if m.group('op_addr'):
  231. op_addr = int(m.group('op_addr'), 0)
  232. if (m.group('op_special')
  233. or m.group('op_copy')
  234. or m.group('op_end')):
  235. file = os.path.abspath(files.get(op_file, '?'))
  236. lines.append((file, op_line, op_addr))
  237. line_at.append((op_addr, file, op_line))
  238. if m.group('op_end'):
  239. op_file = 1
  240. op_line = 1
  241. op_addr = 0
  242. proc.wait()
  243. if proc.returncode != 0:
  244. if not args.get('verbose'):
  245. for line in proc.stderr:
  246. sys.stdout.write(line)
  247. # assume no debug-info on failure
  248. pass
  249. # sort and keep first when duplicates
  250. lines.sort()
  251. lines_ = []
  252. for file, line, addr in lines:
  253. if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
  254. lines_.append((file, line, addr))
  255. lines = lines_
  256. # sort and keep first when duplicates
  257. line_at.sort()
  258. line_at_ = []
  259. for addr, file, line in line_at:
  260. if len(line_at_) == 0 or line_at_[-1][0] != addr:
  261. line_at_.append((addr, file, line))
  262. line_at = line_at_
  263. return syms, sym_at, lines, line_at
  264. def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
  265. sources=None,
  266. everything=False,
  267. propagate=0,
  268. depth=1,
  269. **args):
  270. trace_pattern = re.compile(
  271. '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:\s*(?P<prefix>[^\s]*?bd_)(?:'
  272. '(?P<read>read)\('
  273. '\s*(?P<read_ctx>\w+)' '\s*,'
  274. '\s*(?P<read_block>\w+)' '\s*,'
  275. '\s*(?P<read_off>\w+)' '\s*,'
  276. '\s*(?P<read_buffer>\w+)' '\s*,'
  277. '\s*(?P<read_size>\w+)' '\s*\)'
  278. '|' '(?P<prog>prog)\('
  279. '\s*(?P<prog_ctx>\w+)' '\s*,'
  280. '\s*(?P<prog_block>\w+)' '\s*,'
  281. '\s*(?P<prog_off>\w+)' '\s*,'
  282. '\s*(?P<prog_buffer>\w+)' '\s*,'
  283. '\s*(?P<prog_size>\w+)' '\s*\)'
  284. '|' '(?P<erase>erase)\('
  285. '\s*(?P<erase_ctx>\w+)' '\s*,'
  286. '\s*(?P<erase_block>\w+)'
  287. '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)' ')\s*$')
  288. frame_pattern = re.compile(
  289. '^\s+at (?P<addr>\w+)\s*$')
  290. # parse all of the trace files for read/prog/erase operations
  291. last_filtered = False
  292. last_file = None
  293. last_line = None
  294. last_sym = None
  295. last_readed = 0
  296. last_proged = 0
  297. last_erased = 0
  298. last_stack = []
  299. last_delta = None
  300. at_cache = {}
  301. results = {}
  302. def commit():
  303. # fallback to just capturing top-level measurements
  304. if not last_stack:
  305. file = last_file
  306. sym = last_sym
  307. line = last_line
  308. # ignore filtered sources
  309. if sources is not None:
  310. if not any(
  311. os.path.abspath(file)
  312. == os.path.abspath(s)
  313. for s in sources):
  314. return
  315. else:
  316. # default to only cwd
  317. if not everything and not os.path.commonpath([
  318. os.getcwd(),
  319. os.path.abspath(file)]) == os.getcwd():
  320. return
  321. # simplify path
  322. if os.path.commonpath([
  323. os.getcwd(),
  324. os.path.abspath(file)]) == os.getcwd():
  325. file = os.path.relpath(file)
  326. else:
  327. file = os.path.abspath(file)
  328. results[(file, sym, line)] = (
  329. last_readed,
  330. last_proged,
  331. last_erased,
  332. {})
  333. else:
  334. # tail-recursively propagate measurements
  335. for i in range(len(last_stack)):
  336. results_ = results
  337. for j in reversed(range(i+1)):
  338. if i+1-j > depth:
  339. break
  340. # propagate
  341. name = last_stack[j]
  342. if name in results_:
  343. r, p, e, children = results_[name]
  344. else:
  345. r, p, e, children = 0, 0, 0, {}
  346. results_[name] = (
  347. r+last_readed,
  348. p+last_proged,
  349. e+last_erased,
  350. children)
  351. # recurse
  352. results_ = results_[name][-1]
  353. with openio(path) as f:
  354. # try to jump to middle of file? need step out of utf8-safe mode and
  355. # then resync up with the next newline to avoid parsing half a line
  356. if start is not None and start > 0:
  357. fd = f.fileno()
  358. os.lseek(fd, start, os.SEEK_SET)
  359. while os.read(fd, 1) not in {b'\n', b'\r', b''}:
  360. pass
  361. f = os.fdopen(fd)
  362. for line in f:
  363. # we have a lot of data, try to take a few shortcuts,
  364. # string search is much faster than regex so try to use
  365. # regex as late as possible.
  366. if not line.startswith('\t'):
  367. if last_filtered:
  368. commit()
  369. last_filtered = False
  370. # done processing our slice?
  371. if stop is not None:
  372. if os.lseek(f.fileno(), 0, os.SEEK_CUR) > stop:
  373. break
  374. if 'trace' in line and 'bd' in line:
  375. m = trace_pattern.match(line)
  376. if m:
  377. last_filtered = True
  378. last_file = os.path.abspath(m.group('file'))
  379. last_line = int(m.group('line'), 0)
  380. last_sym = m.group('prefix')
  381. last_readed = 0
  382. last_proged = 0
  383. last_erased = 0
  384. last_stack = []
  385. last_delta = None
  386. if m.group('read'):
  387. last_sym += m.group('read')
  388. last_readed += int(m.group('read_size'))
  389. elif m.group('prog'):
  390. last_sym += m.group('prog')
  391. last_proged += int(m.group('prog_size'))
  392. elif m.group('erase'):
  393. last_sym += m.group('erase')
  394. last_erased += int(m.group('erase_size'))
  395. elif last_filtered:
  396. m = frame_pattern.match(line)
  397. if m:
  398. addr_ = int(m.group('addr'), 0)
  399. # before we can do anything with addr, we need to
  400. # reverse ASLR, fortunately we know the file+line of
  401. # the first stack frame, so we can use that as a point
  402. # of reference
  403. if last_delta is None:
  404. i = bisect.bisect(lines, (last_file, last_line),
  405. key=lambda x: (x[0], x[1]))
  406. if i > 0:
  407. last_delta = lines[i-1][2] - addr_
  408. else:
  409. # can't reverse ASLR, give up on backtrace
  410. commit()
  411. last_filtered = False
  412. continue
  413. addr = addr_ + last_delta
  414. # cached?
  415. if addr in at_cache:
  416. cached = at_cache[addr]
  417. if cached is None:
  418. # cache says to skip
  419. continue
  420. file, sym, line = cached
  421. else:
  422. # find sym
  423. i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
  424. # check that we're actually in the sym's size
  425. if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
  426. _, sym, _ = sym_at[i-1]
  427. else:
  428. sym = hex(addr)
  429. # filter out internal/unknown functions
  430. if not everything and (
  431. sym.startswith('__')
  432. or sym.startswith('0')
  433. or sym.startswith('-')
  434. or sym == '_start'):
  435. at_cache[addr] = None
  436. continue
  437. # find file+line
  438. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  439. if i > 0:
  440. _, file, line = line_at[i-1]
  441. elif len(last_stack) == 0:
  442. file, line = last_file, last_line
  443. else:
  444. file, line = re.sub('(\.o)?$', '.c', obj_path, 1), 0
  445. # ignore filtered sources
  446. if sources is not None:
  447. if not any(
  448. os.path.abspath(file)
  449. == os.path.abspath(s)
  450. for s in sources):
  451. at_cache[addr] = None
  452. continue
  453. else:
  454. # default to only cwd
  455. if not everything and not os.path.commonpath([
  456. os.getcwd(),
  457. os.path.abspath(file)]) == os.getcwd():
  458. at_cache[addr] = None
  459. continue
  460. # simplify path
  461. if os.path.commonpath([
  462. os.getcwd(),
  463. os.path.abspath(file)]) == os.getcwd():
  464. file = os.path.relpath(file)
  465. else:
  466. file = os.path.abspath(file)
  467. at_cache[addr] = file, sym, line
  468. last_stack.append((file, sym, line))
  469. # stop propagating?
  470. if propagate and len(last_stack) >= propagate:
  471. commit()
  472. last_filtered = False
  473. if last_filtered:
  474. commit()
  475. # rearrange results into result type
  476. def to_results(results):
  477. results_ = []
  478. for name, (r, p, e, children) in results.items():
  479. results_.append(PerfBdResult(*name,
  480. r, p, e,
  481. children=to_results(children)))
  482. return results_
  483. return to_results(results)
  484. def starapply(args):
  485. f, args, kwargs = args
  486. return f(*args, **kwargs)
  487. def collect(obj_path, trace_paths, *,
  488. jobs=None,
  489. **args):
  490. # automatic job detection?
  491. if jobs == 0:
  492. jobs = len(os.sched_getaffinity(0))
  493. # find sym/line info to reverse ASLR
  494. syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
  495. if jobs is not None:
  496. # try to split up files so that even single files can be processed
  497. # in parallel
  498. #
  499. # this looks naive, since we're splitting up text files by bytes, but
  500. # we do proper backtrace delimination in collect_job
  501. trace_ranges = []
  502. for path in trace_paths:
  503. if path == '-':
  504. trace_ranges.append([(None, None)])
  505. continue
  506. size = os.path.getsize(path)
  507. if size == 0:
  508. trace_ranges.append([(None, None)])
  509. continue
  510. perjob = m.ceil(size // jobs)
  511. trace_ranges.append([(i, i+perjob) for i in range(0, size, perjob)])
  512. results = []
  513. with mp.Pool(jobs) as p:
  514. for results_ in p.imap_unordered(
  515. starapply,
  516. ((collect_job, (path, start, stop,
  517. syms, sym_at, lines, line_at),
  518. args)
  519. for path, ranges in zip(trace_paths, trace_ranges)
  520. for start, stop in ranges)):
  521. results.extend(results_)
  522. else:
  523. results = []
  524. for path in trace_paths:
  525. results.extend(collect_job(path, None, None,
  526. syms, sym_at, lines, line_at,
  527. **args))
  528. return results
  529. def fold(Result, results, *,
  530. by=None,
  531. defines=None,
  532. **_):
  533. if by is None:
  534. by = Result._by
  535. for k in it.chain(by or [], (k for k, _ in defines or [])):
  536. if k not in Result._by and k not in Result._fields:
  537. print("error: could not find field %r?" % k)
  538. sys.exit(-1)
  539. # filter by matching defines
  540. if defines is not None:
  541. results_ = []
  542. for r in results:
  543. if all(getattr(r, k) in vs for k, vs in defines):
  544. results_.append(r)
  545. results = results_
  546. # organize results into conflicts
  547. folding = co.OrderedDict()
  548. for r in results:
  549. name = tuple(getattr(r, k) for k in by)
  550. if name not in folding:
  551. folding[name] = []
  552. folding[name].append(r)
  553. # merge conflicts
  554. folded = []
  555. for name, rs in folding.items():
  556. folded.append(sum(rs[1:], start=rs[0]))
  557. # fold recursively
  558. folded_ = []
  559. for r in folded:
  560. folded_.append(r._replace(children=fold(
  561. Result, r.children,
  562. by=by,
  563. defines=defines)))
  564. folded = folded_
  565. return folded
  566. def table(Result, results, diff_results=None, *,
  567. by=None,
  568. fields=None,
  569. sort=None,
  570. summary=False,
  571. all=False,
  572. percent=False,
  573. depth=1,
  574. **_):
  575. all_, all = all, __builtins__.all
  576. if by is None:
  577. by = Result._by
  578. if fields is None:
  579. fields = Result._fields
  580. types = Result._types
  581. # fold again
  582. results = fold(Result, results, by=by)
  583. if diff_results is not None:
  584. diff_results = fold(Result, diff_results, by=by)
  585. # organize by name
  586. table = {
  587. ','.join(str(getattr(r, k) or '') for k in by): r
  588. for r in results}
  589. diff_table = {
  590. ','.join(str(getattr(r, k) or '') for k in by): r
  591. for r in diff_results or []}
  592. names = list(table.keys() | diff_table.keys())
  593. # sort again, now with diff info, note that python's sort is stable
  594. names.sort()
  595. if diff_results is not None:
  596. names.sort(key=lambda n: tuple(
  597. types[k].ratio(
  598. getattr(table.get(n), k, None),
  599. getattr(diff_table.get(n), k, None))
  600. for k in fields),
  601. reverse=True)
  602. if sort:
  603. for k, reverse in reversed(sort):
  604. names.sort(
  605. key=lambda n: tuple(
  606. (getattr(table[n], k),)
  607. if getattr(table.get(n), k, None) is not None else ()
  608. for k in ([k] if k else [
  609. k for k in Result._sort if k in fields])),
  610. reverse=reverse ^ (not k or k in Result._fields))
  611. # build up our lines
  612. lines = []
  613. # header
  614. header = []
  615. header.append('%s%s' % (
  616. ','.join(by),
  617. ' (%d added, %d removed)' % (
  618. sum(1 for n in table if n not in diff_table),
  619. sum(1 for n in diff_table if n not in table))
  620. if diff_results is not None and not percent else '')
  621. if not summary else '')
  622. if diff_results is None:
  623. for k in fields:
  624. header.append(k)
  625. elif percent:
  626. for k in fields:
  627. header.append(k)
  628. else:
  629. for k in fields:
  630. header.append('o'+k)
  631. for k in fields:
  632. header.append('n'+k)
  633. for k in fields:
  634. header.append('d'+k)
  635. header.append('')
  636. lines.append(header)
  637. def table_entry(name, r, diff_r=None, ratios=[]):
  638. entry = []
  639. entry.append(name)
  640. if diff_results is None:
  641. for k in fields:
  642. entry.append(getattr(r, k).table()
  643. if getattr(r, k, None) is not None
  644. else types[k].none)
  645. elif percent:
  646. for k in fields:
  647. entry.append(getattr(r, k).diff_table()
  648. if getattr(r, k, None) is not None
  649. else types[k].diff_none)
  650. else:
  651. for k in fields:
  652. entry.append(getattr(diff_r, k).diff_table()
  653. if getattr(diff_r, k, None) is not None
  654. else types[k].diff_none)
  655. for k in fields:
  656. entry.append(getattr(r, k).diff_table()
  657. if getattr(r, k, None) is not None
  658. else types[k].diff_none)
  659. for k in fields:
  660. entry.append(types[k].diff_diff(
  661. getattr(r, k, None),
  662. getattr(diff_r, k, None)))
  663. if diff_results is None:
  664. entry.append('')
  665. elif percent:
  666. entry.append(' (%s)' % ', '.join(
  667. '+∞%' if t == +m.inf
  668. else '-∞%' if t == -m.inf
  669. else '%+.1f%%' % (100*t)
  670. for t in ratios))
  671. else:
  672. entry.append(' (%s)' % ', '.join(
  673. '+∞%' if t == +m.inf
  674. else '-∞%' if t == -m.inf
  675. else '%+.1f%%' % (100*t)
  676. for t in ratios
  677. if t)
  678. if any(ratios) else '')
  679. return entry
  680. # entries
  681. if not summary:
  682. for name in names:
  683. r = table.get(name)
  684. if diff_results is None:
  685. diff_r = None
  686. ratios = None
  687. else:
  688. diff_r = diff_table.get(name)
  689. ratios = [
  690. types[k].ratio(
  691. getattr(r, k, None),
  692. getattr(diff_r, k, None))
  693. for k in fields]
  694. if not all_ and not any(ratios):
  695. continue
  696. lines.append(table_entry(name, r, diff_r, ratios))
  697. # total
  698. r = next(iter(fold(Result, results, by=[])), None)
  699. if diff_results is None:
  700. diff_r = None
  701. ratios = None
  702. else:
  703. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  704. ratios = [
  705. types[k].ratio(
  706. getattr(r, k, None),
  707. getattr(diff_r, k, None))
  708. for k in fields]
  709. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  710. # find the best widths, note that column 0 contains the names and column -1
  711. # the ratios, so those are handled a bit differently
  712. widths = [
  713. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  714. for w, i in zip(
  715. it.chain([23], it.repeat(7)),
  716. range(len(lines[0])-1))]
  717. # adjust the name width based on the expected call depth, though
  718. # note this doesn't really work with unbounded recursion
  719. if not summary and not m.isinf(depth):
  720. widths[0] += 4*(depth-1)
  721. # print the tree recursively
  722. print('%-*s %s%s' % (
  723. widths[0], lines[0][0],
  724. ' '.join('%*s' % (w, x)
  725. for w, x in zip(widths[1:], lines[0][1:-1])),
  726. lines[0][-1]))
  727. if not summary:
  728. def recurse(results_, depth_, prefixes=('', '', '', '')):
  729. # rebuild our tables at each layer
  730. table_ = {
  731. ','.join(str(getattr(r, k) or '') for k in by): r
  732. for r in results_}
  733. names_ = list(table_.keys())
  734. # sort again at each layer, keep in mind the numbers are
  735. # changing as we descend
  736. names_.sort()
  737. if sort:
  738. for k, reverse in reversed(sort):
  739. names_.sort(
  740. key=lambda n: tuple(
  741. (getattr(table_[n], k),)
  742. if getattr(table_.get(n), k, None) is not None
  743. else ()
  744. for k in ([k] if k else [
  745. k for k in Result._sort if k in fields])),
  746. reverse=reverse ^ (not k or k in Result._fields))
  747. for i, name in enumerate(names_):
  748. r = table_[name]
  749. is_last = (i == len(names_)-1)
  750. print('%s%-*s %s' % (
  751. prefixes[0+is_last],
  752. widths[0] - (
  753. len(prefixes[0+is_last])
  754. if not m.isinf(depth) else 0),
  755. name,
  756. ' '.join('%*s' % (w, x)
  757. for w, x in zip(
  758. widths[1:],
  759. table_entry(name, r)[1:]))))
  760. # recurse?
  761. if depth_ > 1:
  762. recurse(
  763. r.children,
  764. depth_-1,
  765. (prefixes[2+is_last] + "|-> ",
  766. prefixes[2+is_last] + "'-> ",
  767. prefixes[2+is_last] + "| ",
  768. prefixes[2+is_last] + " "))
  769. # we have enough going on with diffing to make the top layer
  770. # a special case
  771. for name, line in zip(names, lines[1:-1]):
  772. print('%-*s %s%s' % (
  773. widths[0], line[0],
  774. ' '.join('%*s' % (w, x)
  775. for w, x in zip(widths[1:], line[1:-1])),
  776. line[-1]))
  777. if name in table and depth > 1:
  778. recurse(
  779. table[name].children,
  780. depth-1,
  781. ("|-> ",
  782. "'-> ",
  783. "| ",
  784. " "))
  785. print('%-*s %s%s' % (
  786. widths[0], lines[-1][0],
  787. ' '.join('%*s' % (w, x)
  788. for w, x in zip(widths[1:], lines[-1][1:-1])),
  789. lines[-1][-1]))
  790. def annotate(Result, results, *,
  791. annotate=None,
  792. threshold=None,
  793. read_threshold=None,
  794. prog_threshold=None,
  795. erase_threshold=None,
  796. **args):
  797. # figure out the thresholds
  798. if threshold is None:
  799. threshold = THRESHOLD
  800. elif len(threshold) == 1:
  801. threshold = threshold[0], threshold[0]
  802. if read_threshold is None:
  803. read_t0, read_t1 = threshold
  804. elif len(read_threshold) == 1:
  805. read_t0, read_t1 = read_threshold[0], read_threshold[0]
  806. else:
  807. read_t0, read_t1 = read_threshold
  808. read_t0, read_t1 = min(read_t0, read_t1), max(read_t0, read_t1)
  809. if prog_threshold is None:
  810. prog_t0, prog_t1 = threshold
  811. elif len(prog_threshold) == 1:
  812. prog_t0, prog_t1 = prog_threshold[0], prog_threshold[0]
  813. else:
  814. prog_t0, prog_t1 = prog_threshold
  815. prog_t0, prog_t1 = min(prog_t0, prog_t1), max(prog_t0, prog_t1)
  816. if erase_threshold is None:
  817. erase_t0, erase_t1 = threshold
  818. elif len(erase_threshold) == 1:
  819. erase_t0, erase_t1 = erase_threshold[0], erase_threshold[0]
  820. else:
  821. erase_t0, erase_t1 = erase_threshold
  822. erase_t0, erase_t1 = min(erase_t0, erase_t1), max(erase_t0, erase_t1)
  823. # find maxs
  824. max_readed = max(it.chain((float(r.readed) for r in results), [1]))
  825. max_proged = max(it.chain((float(r.proged) for r in results), [1]))
  826. max_erased = max(it.chain((float(r.erased) for r in results), [1]))
  827. for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
  828. # flatten to line info
  829. results = fold(Result, results, by=['file', 'line'])
  830. table = {r.line: r for r in results if r.file == path}
  831. # calculate spans to show
  832. if not annotate:
  833. spans = []
  834. last = None
  835. func = None
  836. for line, r in sorted(table.items()):
  837. if (float(r.readed) / max_readed >= read_t0
  838. or float(r.proged) / max_proged >= prog_t0
  839. or float(r.erased) / max_erased >= erase_t0):
  840. if last is not None and line - last.stop <= args['context']:
  841. last = range(
  842. last.start,
  843. line+1+args['context'])
  844. else:
  845. if last is not None:
  846. spans.append((last, func))
  847. last = range(
  848. line-args['context'],
  849. line+1+args['context'])
  850. func = r.function
  851. if last is not None:
  852. spans.append((last, func))
  853. with open(path) as f:
  854. skipped = False
  855. for i, line in enumerate(f):
  856. # skip lines not in spans?
  857. if not annotate and not any(i+1 in s for s, _ in spans):
  858. skipped = True
  859. continue
  860. if skipped:
  861. skipped = False
  862. print('%s@@ %s:%d: %s @@%s' % (
  863. '\x1b[36m' if args['color'] else '',
  864. path,
  865. i+1,
  866. next(iter(f for _, f in spans)),
  867. '\x1b[m' if args['color'] else ''))
  868. # build line
  869. if line.endswith('\n'):
  870. line = line[:-1]
  871. if i+1 in table:
  872. r = table[i+1]
  873. line = '%-*s // %s readed, %s proged, %s erased' % (
  874. args['width'],
  875. line,
  876. r.readed,
  877. r.proged,
  878. r.erased)
  879. if args['color']:
  880. if (float(r.readed) / max_readed >= read_t1
  881. or float(r.proged) / max_proged >= prog_t1
  882. or float(r.erased) / max_erased >= erase_t1):
  883. line = '\x1b[1;31m%s\x1b[m' % line
  884. elif (float(r.readed) / max_readed >= read_t0
  885. or float(r.proged) / max_proged >= prog_t0
  886. or float(r.erased) / max_erased >= erase_t0):
  887. line = '\x1b[35m%s\x1b[m' % line
  888. print(line)
  889. def report(obj_path='', trace_paths=[], *,
  890. by=None,
  891. fields=None,
  892. defines=None,
  893. sort=None,
  894. **args):
  895. # figure out what color should be
  896. if args.get('color') == 'auto':
  897. args['color'] = sys.stdout.isatty()
  898. elif args.get('color') == 'always':
  899. args['color'] = True
  900. else:
  901. args['color'] = False
  902. # depth of 0 == m.inf
  903. if args.get('depth') == 0:
  904. args['depth'] = m.inf
  905. # find sizes
  906. if not args.get('use', None):
  907. results = collect(obj_path, trace_paths, **args)
  908. else:
  909. results = []
  910. with openio(args['use']) as f:
  911. reader = csv.DictReader(f, restval='')
  912. for r in reader:
  913. if not any('perfbd_'+k in r and r['perfbd_'+k].strip()
  914. for k in PerfBdResult._fields):
  915. continue
  916. try:
  917. results.append(PerfBdResult(
  918. **{k: r[k] for k in PerfBdResult._by
  919. if k in r and r[k].strip()},
  920. **{k: r['perfbd_'+k] for k in PerfBdResult._fields
  921. if 'perfbd_'+k in r and r['perfbd_'+k].strip()}))
  922. except TypeError:
  923. pass
  924. # fold
  925. results = fold(PerfBdResult, results, by=by, defines=defines)
  926. # sort, note that python's sort is stable
  927. results.sort()
  928. if sort:
  929. for k, reverse in reversed(sort):
  930. results.sort(
  931. key=lambda r: tuple(
  932. (getattr(r, k),) if getattr(r, k) is not None else ()
  933. for k in ([k] if k else PerfBdResult._sort)),
  934. reverse=reverse ^ (not k or k in PerfBdResult._fields))
  935. # write results to CSV
  936. if args.get('output'):
  937. with openio(args['output'], 'w') as f:
  938. writer = csv.DictWriter(f,
  939. (by if by is not None else PerfBdResult._by)
  940. + ['perfbd_'+k for k in (
  941. fields if fields is not None else PerfBdResult._fields)])
  942. writer.writeheader()
  943. for r in results:
  944. writer.writerow(
  945. {k: getattr(r, k) for k in (
  946. by if by is not None else PerfBdResult._by)}
  947. | {'perfbd_'+k: getattr(r, k) for k in (
  948. fields if fields is not None else PerfBdResult._fields)})
  949. # find previous results?
  950. if args.get('diff'):
  951. diff_results = []
  952. try:
  953. with openio(args['diff']) as f:
  954. reader = csv.DictReader(f, restval='')
  955. for r in reader:
  956. if not any('perfbd_'+k in r and r['perfbd_'+k].strip()
  957. for k in PerfBdResult._fields):
  958. continue
  959. try:
  960. diff_results.append(PerfBdResult(
  961. **{k: r[k] for k in PerfBdResult._by
  962. if k in r and r[k].strip()},
  963. **{k: r['perfbd_'+k] for k in PerfBdResult._fields
  964. if 'perfbd_'+k in r
  965. and r['perfbd_'+k].strip()}))
  966. except TypeError:
  967. pass
  968. except FileNotFoundError:
  969. pass
  970. # fold
  971. diff_results = fold(PerfBdResult, diff_results, by=by, defines=defines)
  972. # print table
  973. if not args.get('quiet'):
  974. if (args.get('annotate')
  975. or args.get('threshold')
  976. or args.get('read_threshold')
  977. or args.get('prog_threshold')
  978. or args.get('erase_threshold')):
  979. # annotate sources
  980. annotate(PerfBdResult, results, **args)
  981. else:
  982. # print table
  983. table(PerfBdResult, results,
  984. diff_results if args.get('diff') else None,
  985. by=by if by is not None else ['function'],
  986. fields=fields,
  987. sort=sort,
  988. **args)
  989. def main(**args):
  990. if args.get('record'):
  991. return record(**args)
  992. else:
  993. return report(**args)
  994. if __name__ == "__main__":
  995. import argparse
  996. import sys
  997. parser = argparse.ArgumentParser(
  998. description="Aggregate and report call-stack propagated "
  999. "block-device operations from trace output.",
  1000. allow_abbrev=False)
  1001. parser.add_argument(
  1002. 'obj_path',
  1003. nargs='?',
  1004. help="Input executable for mapping addresses to symbols.")
  1005. parser.add_argument(
  1006. 'trace_paths',
  1007. nargs='*',
  1008. help="Input *.trace files.")
  1009. parser.add_argument(
  1010. '-v', '--verbose',
  1011. action='store_true',
  1012. help="Output commands that run behind the scenes.")
  1013. parser.add_argument(
  1014. '-q', '--quiet',
  1015. action='store_true',
  1016. help="Don't show anything, useful with -o.")
  1017. parser.add_argument(
  1018. '-o', '--output',
  1019. help="Specify CSV file to store results.")
  1020. parser.add_argument(
  1021. '-u', '--use',
  1022. help="Don't parse anything, use this CSV file.")
  1023. parser.add_argument(
  1024. '-d', '--diff',
  1025. help="Specify CSV file to diff against.")
  1026. parser.add_argument(
  1027. '-a', '--all',
  1028. action='store_true',
  1029. help="Show all, not just the ones that changed.")
  1030. parser.add_argument(
  1031. '-p', '--percent',
  1032. action='store_true',
  1033. help="Only show percentage change, not a full diff.")
  1034. parser.add_argument(
  1035. '-b', '--by',
  1036. action='append',
  1037. choices=PerfBdResult._by,
  1038. help="Group by this field.")
  1039. parser.add_argument(
  1040. '-f', '--field',
  1041. dest='fields',
  1042. action='append',
  1043. choices=PerfBdResult._fields,
  1044. help="Show this field.")
  1045. parser.add_argument(
  1046. '-D', '--define',
  1047. dest='defines',
  1048. action='append',
  1049. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  1050. help="Only include results where this field is this value.")
  1051. class AppendSort(argparse.Action):
  1052. def __call__(self, parser, namespace, value, option):
  1053. if namespace.sort is None:
  1054. namespace.sort = []
  1055. namespace.sort.append((value, True if option == '-S' else False))
  1056. parser.add_argument(
  1057. '-s', '--sort',
  1058. nargs='?',
  1059. action=AppendSort,
  1060. help="Sort by this field.")
  1061. parser.add_argument(
  1062. '-S', '--reverse-sort',
  1063. nargs='?',
  1064. action=AppendSort,
  1065. help="Sort by this field, but backwards.")
  1066. parser.add_argument(
  1067. '-Y', '--summary',
  1068. action='store_true',
  1069. help="Only show the total.")
  1070. parser.add_argument(
  1071. '-F', '--source',
  1072. dest='sources',
  1073. action='append',
  1074. help="Only consider definitions in this file. Defaults to anything "
  1075. "in the current directory.")
  1076. parser.add_argument(
  1077. '--everything',
  1078. action='store_true',
  1079. help="Include builtin and libc specific symbols.")
  1080. parser.add_argument(
  1081. '-P', '--propagate',
  1082. type=lambda x: int(x, 0),
  1083. help="Depth to propagate samples up the call-stack. 0 propagates up "
  1084. "to the entry point, 1 does no propagation. Defaults to 0.")
  1085. parser.add_argument(
  1086. '-Z', '--depth',
  1087. nargs='?',
  1088. type=lambda x: int(x, 0),
  1089. const=0,
  1090. help="Depth of function calls to show. 0 shows all calls but may not "
  1091. "terminate!")
  1092. parser.add_argument(
  1093. '-A', '--annotate',
  1094. action='store_true',
  1095. help="Show source files annotated with coverage info.")
  1096. parser.add_argument(
  1097. '-T', '--threshold',
  1098. nargs='?',
  1099. type=lambda x: tuple(float(x) for x in x.split(',')),
  1100. const=THRESHOLD,
  1101. help="Show lines with any ops above this threshold as a percent of "
  1102. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1103. parser.add_argument(
  1104. '--read-threshold',
  1105. nargs='?',
  1106. type=lambda x: tuple(float(x) for x in x.split(',')),
  1107. const=THRESHOLD,
  1108. help="Show lines with reads above this threshold as a percent of "
  1109. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1110. parser.add_argument(
  1111. '--prog-threshold',
  1112. nargs='?',
  1113. type=lambda x: tuple(float(x) for x in x.split(',')),
  1114. const=THRESHOLD,
  1115. help="Show lines with progs above this threshold as a percent of "
  1116. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1117. parser.add_argument(
  1118. '--erase-threshold',
  1119. nargs='?',
  1120. type=lambda x: tuple(float(x) for x in x.split(',')),
  1121. const=THRESHOLD,
  1122. help="Show lines with erases above this threshold as a percent of "
  1123. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1124. parser.add_argument(
  1125. '-c', '--context',
  1126. type=lambda x: int(x, 0),
  1127. default=3,
  1128. help="Show n additional lines of context. Defaults to 3.")
  1129. parser.add_argument(
  1130. '-W', '--width',
  1131. type=lambda x: int(x, 0),
  1132. default=80,
  1133. help="Assume source is styled with this many columns. Defaults to 80.")
  1134. parser.add_argument(
  1135. '--color',
  1136. choices=['never', 'always', 'auto'],
  1137. default='auto',
  1138. help="When to use terminal colors. Defaults to 'auto'.")
  1139. parser.add_argument(
  1140. '-j', '--jobs',
  1141. nargs='?',
  1142. type=lambda x: int(x, 0),
  1143. const=0,
  1144. help="Number of processes to use. 0 spawns one process per core.")
  1145. parser.add_argument(
  1146. '--objdump-path',
  1147. type=lambda x: x.split(),
  1148. default=OBJDUMP_PATH,
  1149. help="Path to the objdump executable, may include flags. "
  1150. "Defaults to %r." % OBJDUMP_PATH)
  1151. sys.exit(main(**{k: v
  1152. for k, v in vars(parser.parse_intermixed_args()).items()
  1153. if v is not None}))